<?php
/**
 * Content Improvements — storage helpers and server-side segmentation
 */

if ( ! defined('ABSPATH') ) { exit; }

if (!function_exists('aeo_impr_normalize_text')) {
    function aeo_impr_normalize_text($text) {
        // Ensure we're working with a string
        $text = (string) $text;

        // 1. Decode all HTML entities to their character equivalents (e.g., '&amp;' -> '&')
        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');

        // 2. Strip any remaining HTML tags
        $text = wp_strip_all_tags($text, true);

        // 3. Remove the BOM character and other zero-width spaces using their hex codes
        $text = str_replace(
            ["\xEF\xBB\xBF", "\xE2\x80\x8B", "\xE2\x80\x8C", "\xE2\x80\x8D"],
            '',
            $text
        );

        // 4. Normalize all whitespace (including non-breaking spaces) to single spaces and trim
        $result = preg_replace('/\s+/u', ' ', trim($text));
        return $result !== null ? $result : '';
    }
}

if (!function_exists('aeo_impr_allowed_html')) {
    function aeo_impr_allowed_html() {
        return array(
            'p' => array(),
            'br' => array(),
            'strong' => array(),
            'em' => array(),
            'b' => array(),
            'i' => array(),
            'u' => array(),
            'code' => array(),
            'a' => array(
                'href' => true,
                'title' => true,
                'rel' => true,
                'target' => true,
            ),
            'ul' => array(),
            'ol' => array(),
            'li' => array(),
            'table' => array(),
            'thead' => array(),
            'tbody' => array(),
            'tfoot' => array(),
            'tr' => array(),
            'th' => array(),
            'td' => array(),
            'caption' => array(),
            'col' => array(),
            'colgroup' => array(),
            'blockquote' => array(),
            'h2' => array(),
            'h3' => array(),
            'h4' => array(),
        );
    }
}

if (!function_exists('aeo_impr_sanitize_html')) {
    function aeo_impr_sanitize_html($html) {
        if (!is_string($html) || $html === '') {
            return '';
        }
        return wp_kses($html, aeo_impr_allowed_html());
    }
}

if (!function_exists('aeo_impr_get_inner_html')) {
    function aeo_impr_get_inner_html($node) {
        if (!$node || !isset($node->childNodes) || !is_object($node->ownerDocument)) {
            return '';
        }
        $html = '';
        foreach ($node->childNodes as $child) {
            $html .= $node->ownerDocument->saveHTML($child);
        }
        return $html;
    }
}

if (!function_exists('aeo_impr_sanitize_hints')) {
    function aeo_impr_sanitize_hints($hints) {
        if (!is_array($hints)) {
            return array();
        }
        $out = array();
        foreach (array('steps', 'comparison', 'list') as $key) {
            if (isset($hints[$key])) {
                $out[$key] = (bool) $hints[$key];
            }
        }
        return $out;
    }
}

if (!function_exists('aeo_impr_detect_structural_hints')) {
    function aeo_impr_detect_structural_hints($text) {
        $t = strtolower((string) $text);
        $step_matches = preg_match_all('/\bstep\s*\d+\b/', $t, $m1) ? count($m1[0]) : 0;
        $ord_matches = preg_match_all('/\b(first|second|third|fourth|fifth|sixth|seventh|eighth|ninth|tenth|next|then|finally|last|lastly)\b/', $t, $m2) ? count($m2[0]) : 0;
        $num_matches = preg_match_all('/(?:^|\s)\d{1,2}[.)-]\s+[a-z]/', $t, $m3) ? count($m3[0]) : 0;
        $steps = ($step_matches + $ord_matches + $num_matches) >= 2;
        $comparison_cue = (bool) preg_match('/\b(vs\.?|versus|compared to|compared with|compare|comparison|difference between|differences between|pros and cons|pros\/cons|advantages and disadvantages|better|worse|faster|slower|cheaper|more expensive|less expensive|higher|lower|larger|smaller|heavier|lighter|on the other hand|in contrast|whereas)\b/', $t);
        $number_count = preg_match_all('/\b\d+(?:\.\d+)?\b/', $t, $m4) ? count($m4[0]) : 0;
        $model_token_count = preg_match_all('/\b[a-z]{1,6}\d{1,4}\b/', $t, $m5) ? count($m5[0]) : 0;
        $compare_joiner = (bool) preg_match('/\b(and|or|both|either|neither|than|while)\b/', $t);
        $comparison = $comparison_cue || (($number_count >= 4 || $model_token_count >= 2) && $compare_joiner);
        $list_trigger = (bool) preg_match('/\b(include|includes|including|such as|for example|e\.g\.|examples)\b/', $t) || strpos($t, ':') !== false;
        $comma_count = preg_match_all('/,/', $t, $m6) ? count($m6[0]) : 0;
        $semicolon_count = preg_match_all('/;/', $t, $m7) ? count($m7[0]) : 0;
        $list = ($list_trigger && $comma_count >= 2) || $semicolon_count >= 2;
        return array(
            'steps' => $steps,
            'comparison' => $comparison,
            'list' => $list,
        );
    }
}

if (!function_exists('aeo_impr_is_long_segment')) {
    function aeo_impr_is_long_segment($text, $limit = 300) {
        $text = (string) $text;
        if ($text === '') {
            return false;
        }
        if (function_exists('mb_strlen')) {
            return mb_strlen($text, 'UTF-8') > $limit;
        }
        return strlen($text) > $limit;
    }
}

if (!function_exists('aeo_impr_hash_text')) {
    function aeo_impr_hash_text($text) {
        $norm = aeo_impr_normalize_text($text);
        if (function_exists('hash')) {
            return hash('sha1', $norm);
        }
        return sha1($norm);
    }
}

if (!function_exists('aeo_impr_is_gutenberg_content')) {
    function aeo_impr_is_gutenberg_content($content) {
        return function_exists('has_blocks') && has_blocks($content);
    }
}

if (!function_exists('aeo_impr_collect_paragraphs_from_blocks')) {
    function aeo_impr_collect_paragraphs_from_blocks($blocks, &$items, &$index) {
        if (!is_array($blocks)) return;
        foreach ($blocks as $block) {
            $name = isset($block['blockName']) ? $block['blockName'] : '';
            
            $is_supported = (
                $name === 'core/paragraph' ||
                $name === 'core/heading' ||
                $name === 'core/list-item' ||
                $name === 'core/pullquote'
            );

            if ($is_supported) {
                // Prefer innerHTML; fallback to implode(innerContent)
                $raw = '';
                if (!empty($block['innerHTML'])) {
                    $raw = $block['innerHTML'];
                } elseif (!empty($block['innerContent']) && is_array($block['innerContent'])) {
                    $raw = implode('', array_map(function($c){ return is_string($c) ? $c : ''; }, $block['innerContent']));
                } elseif (!empty($block['attrs']['content'])) {
                    $raw = $block['attrs']['content'];
                } elseif (!empty($block['attrs']['value'])) {
                    $raw = $block['attrs']['value'];
                }

                $text = aeo_impr_normalize_text(wp_specialchars_decode(wp_strip_all_tags($raw, true)));
                if ($text !== '') {
                    $html = is_string($raw) ? $raw : '';
                    $html = $html !== '' ? aeo_impr_sanitize_html($html) : '';
                    $hints = aeo_impr_detect_structural_hints($text);
                    $is_long = aeo_impr_is_long_segment($text);
                    $items[] = array(
                        'index'     => $index,
                        'text'      => $text,
                        'hash'      => aeo_impr_hash_text($text),
                        'blockType' => $name,
                        'html'      => $html,
                        'hints'     => $hints,
                        'is_long'   => $is_long,
                    );
                    $index++;
                }
            }
            // Recurse into innerBlocks if present
            if (!empty($block['innerBlocks'])) {
                aeo_impr_collect_paragraphs_from_blocks($block['innerBlocks'], $items, $index);
            }
        }
    }
}

if (!function_exists('aeo_impr_segment_content_gutenberg')) {
    function aeo_impr_segment_content_gutenberg($post_content) {
        $items = array();
        $index = 0;
        if (!function_exists('parse_blocks')) {
            return array('editor' => 'gutenberg', 'items' => $items);
        }
        $blocks = parse_blocks($post_content);
        aeo_impr_collect_paragraphs_from_blocks($blocks, $items, $index);
        return array(
            'editor' => 'gutenberg',
            'items'  => $items,
        );
    }
}

if (!function_exists('aeo_impr_segment_content_classic')) {
    function aeo_impr_segment_content_classic($post_content) {
        $items = array();
        $index = 0;

        // Try DOMDocument for robust parsing; fallback to a regex split.
        $html = apply_filters('the_content', $post_content);
        $html = preg_replace('/\s+/', ' ', (string)$html);

        if (class_exists('DOMDocument') && class_exists('DOMXPath') && function_exists('libxml_use_internal_errors')) {
            $dom = new DOMDocument();
            // Suppress warnings from malformed HTML
            $prev = libxml_use_internal_errors(true);
            $dom->loadHTML('<?xml encoding="UTF-8">' . $html, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
            libxml_clear_errors();
            libxml_use_internal_errors($prev);

            $xpath = new DOMXPath($dom);
            // Select p, h1-h6, li. Skip blockquote to avoid duplication with inner paragraphs.
            $nodes = $xpath->query('//p | //li | //h1 | //h2 | //h3 | //h4 | //h5 | //h6');

            if ($nodes) {
                foreach ($nodes as $node) {
                    $text = aeo_impr_normalize_text($node->textContent);
                    if ($text !== '') {
                        $inner_html = aeo_impr_get_inner_html($node);
                        $inner_html = $inner_html !== '' ? aeo_impr_sanitize_html($inner_html) : '';
                        $hints = aeo_impr_detect_structural_hints($text);
                        $is_long = aeo_impr_is_long_segment($text);
                        $items[] = array(
                            'index' => $index,
                            'text'  => $text,
                            'hash'  => aeo_impr_hash_text($text),
                            'html'  => $inner_html,
                            'hints' => $hints,
                            'is_long' => $is_long,
                        );
                        $index++;
                    }
                }
            }
        } else {
            // Fallback: split by closing tags of supported elements
            $parts = preg_split('#</(p|li|h[1-6])\s*>#i', $html);
            foreach ($parts as $part) {
                $text = aeo_impr_normalize_text(wp_strip_all_tags($part, true));
                if ($text !== '') {
                    $hints = aeo_impr_detect_structural_hints($text);
                    $is_long = aeo_impr_is_long_segment($text);
                    $items[] = array(
                        'index' => $index,
                        'text'  => $text,
                        'hash'  => aeo_impr_hash_text($text),
                        'html'  => '',
                        'hints' => $hints,
                        'is_long' => $is_long,
                    );
                    $index++;
                }
            }
        }

        return array(
            'editor' => 'classic',
            'items'  => $items,
        );
    }
}

if (!function_exists('aeo_impr_segment_post_content')) {
    function aeo_impr_segment_post_content($post_id) {
        $post = get_post($post_id);
        if (!$post || $post->post_type !== 'post') {
            return array('editor' => 'classic', 'items' => array());
        }
        $content = $post->post_content;
        if (aeo_impr_is_gutenberg_content($content)) {
            return aeo_impr_segment_content_gutenberg($content);
        }
        return aeo_impr_segment_content_classic($content);
    }
}

if (!function_exists('aeo_impr_sanitize_item')) {
    function aeo_impr_sanitize_item($item) {
        $now_iso = gmdate('c');
        $out = array(
            'id'            => isset($item['id']) && is_string($item['id']) ? sanitize_text_field($item['id']) : (function_exists('wp_generate_uuid4') ? wp_generate_uuid4() : uniqid('aeo_', true)),
            'type'          => 'paragraph',
            'editor'        => in_array(($item['editor'] ?? ''), array('gutenberg','classic'), true) ? $item['editor'] : 'classic',
            'locator'       => array(),
            'originalText'  => isset($item['originalText']) ? wp_kses_post($item['originalText']) : '',
            'suggestedText' => isset($item['suggestedText']) ? wp_kses_post($item['suggestedText']) : '',
            'suggestedHtml' => '',
            'structureType' => 'paragraph',
            'applyMode'     => 'auto',
            'score'         => isset($item['score']) ? floatval($item['score']) : 0.0,
            'reason'        => isset($item['reason']) ? sanitize_text_field($item['reason']) : '',
            'status'        => 'pending',
            'created_at'    => isset($item['created_at']) ? sanitize_text_field($item['created_at']) : $now_iso,
            'updated_at'    => $now_iso,
        );

        $suggested_html = '';
        if (isset($item['suggestedHtml'])) {
            $suggested_html = (string) $item['suggestedHtml'];
        } elseif (isset($item['suggested_html'])) {
            $suggested_html = (string) $item['suggested_html'];
        }
        $suggested_html = $suggested_html !== '' ? aeo_impr_sanitize_html($suggested_html) : '';
        $out['suggestedHtml'] = $suggested_html;
        if ($out['suggestedText'] === '' && $suggested_html !== '') {
            $out['suggestedText'] = wp_strip_all_tags($suggested_html);
        }

        $structure = '';
        if (isset($item['structureType'])) {
            $structure = sanitize_text_field($item['structureType']);
        } elseif (isset($item['structure_type'])) {
            $structure = sanitize_text_field($item['structure_type']);
        }
        $structure = strtolower($structure);
        if ($structure === 'ordered_list') { $structure = 'numbered_list'; }
        if ($structure === 'unordered_list' || $structure === 'bullet_list' || $structure === 'bullets') { $structure = 'bulleted_list'; }
        if (!in_array($structure, array('paragraph','bulleted_list','numbered_list','table'), true)) {
            $structure = 'paragraph';
        }
        $out['structureType'] = $structure;

        $apply_mode = '';
        if (isset($item['applyMode'])) {
            $apply_mode = sanitize_text_field($item['applyMode']);
        } elseif (isset($item['apply_mode'])) {
            $apply_mode = sanitize_text_field($item['apply_mode']);
        }
        if (!in_array($apply_mode, array('auto','manual'), true)) {
            $apply_mode = 'auto';
        }
        if ($apply_mode === 'manual' && ($out['suggestedText'] !== '' || $out['suggestedHtml'] !== '')) {
            $apply_mode = 'auto';
        }
        $out['applyMode'] = $apply_mode;

        // Locator
        $locator = isset($item['locator']) && is_array($item['locator']) ? $item['locator'] : array();
        $loc = array();
        if ($out['editor'] === 'gutenberg') {
            if (!empty($locator['clientId']) && is_string($locator['clientId'])) {
                $loc['clientId'] = sanitize_text_field($locator['clientId']);
            }
            if (!empty($locator['blockType']) && is_string($locator['blockType'])) {
                $loc['blockType'] = sanitize_text_field($locator['blockType']);
            }
        }
        if (isset($locator['index'])) {
            $loc['index'] = intval($locator['index']);
        }
        if (!empty($locator['hash']) && is_string($locator['hash'])) {
            $loc['hash'] = sanitize_text_field($locator['hash']);
        }
        $out['locator'] = $loc;

        return $out;
    }
}

if (!function_exists('aeo_impr_get')) {
    function aeo_impr_get($post_id) {
        $raw = get_post_meta($post_id, '_aeo_improvements', true);
        if (!is_array($raw)) return array();
        $san = array();
        foreach ($raw as $item) {
            if (!is_array($item)) continue;
            $san[] = aeo_impr_sanitize_item($item);
        }
        return $san;
    }
}

if (!function_exists('aeo_impr_overwrite')) {
    function aeo_impr_overwrite($post_id, array $items) {
        $san = array();
        foreach ($items as $item) {
            $san[] = aeo_impr_sanitize_item($item);
        }
        update_post_meta($post_id, '_aeo_improvements', $san);
        return $san;
    }
}

if (!function_exists('aeo_impr_clear')) {
    function aeo_impr_clear($post_id) {
        delete_post_meta($post_id, '_aeo_improvements');
    }
}

if (!function_exists('aeo_impr_parse_segments_from_request')) {
    function aeo_impr_parse_segments_from_request($raw_segments) {
        // Accept JSON string or array
        if (is_string($raw_segments)) {
            $dec = json_decode(wp_unslash($raw_segments), true);
        } elseif (is_array($raw_segments)) {
            $dec = $raw_segments;
        } else {
            $dec = null;
        }
        if (!is_array($dec)) {
            return null;
        }
        // Normalize to { editor, items: [{ index, hash, text, blockType?, clientId? }] }
        $editor = isset($dec['editor']) ? sanitize_text_field($dec['editor']) : '';
        $items  = array();
        if (isset($dec['items']) && is_array($dec['items'])) {
            foreach ($dec['items'] as $it) {
                if (!is_array($it)) continue;
                $index = isset($it['index']) ? intval($it['index']) : null;
                $text  = isset($it['text']) ? aeo_impr_normalize_text($it['text']) : '';
                $hash  = isset($it['hash']) ? sanitize_text_field($it['hash']) : aeo_impr_hash_text($text);
                $html  = isset($it['html']) ? aeo_impr_sanitize_html((string) $it['html']) : '';
                $hints = isset($it['hints']) ? aeo_impr_sanitize_hints($it['hints']) : array();
                $is_long = isset($it['is_long']) ? (bool) $it['is_long'] : aeo_impr_is_long_segment($text);
                $obj = array(
                    'index' => $index,
                    'text'  => $text,
                    'hash'  => $hash,
                    'is_long' => $is_long,
                );
                if ($html !== '') { $obj['html'] = $html; }
                if (!empty($hints)) { $obj['hints'] = $hints; }
                if (!empty($it['blockType'])) $obj['blockType'] = sanitize_text_field($it['blockType']);
                if (!empty($it['clientId']))  $obj['clientId']  = sanitize_text_field($it['clientId']);
                $items[] = $obj;
            }
        }
        return array(
            'editor' => in_array($editor, array('gutenberg','classic'), true) ? $editor : '',
            'items'  => $items,
        );
    }
}

if (!function_exists('aeo_impr_is_pro_active')) {
    function aeo_impr_is_pro_active() {
        // Try to defer to any existing license checker if present
        if (function_exists('aeo_is_license_active')) {
            return (bool) call_user_func('aeo_is_license_active');
        }
        // Fallback to an option pattern if your plugin uses it
        $status = get_option('aeo_license_status');
        if (is_string($status)) {
            return strtolower($status) === 'active';
        }
        return false;
    }
}

// ================= AJAX: Improvements API (Phase 3) =================
if ( ! function_exists( 'aeo_ajax_get_improvements' ) ) {
    function aeo_ajax_get_improvements() {
        check_ajax_referer( 'aeo_editor_nonce', 'nonce' );
        $post_id = isset($_POST['post_id']) ? absint( wp_unslash( $_POST['post_id'] ) ) : 0;
        if ( $post_id < 1 ) { wp_send_json_error( array( 'message' => 'Missing Post ID.' ) ); }
        if ( ! current_user_can( 'edit_post', $post_id ) ) { wp_send_json_error( array( 'message' => 'Permission denied.' ) ); }
        if ( 'post' !== get_post_type( $post_id ) ) { wp_send_json_error( array( 'message' => 'Unsupported post type.' ) ); }
        $items = aeo_impr_get( $post_id );
        wp_send_json_success( array( 'improvements' => $items ) );
    }
    add_action( 'wp_ajax_aeo_get_improvements', 'aeo_ajax_get_improvements' );
}

if ( ! function_exists( 'aeo_ajax_apply_improvement' ) ) {
    function aeo_ajax_apply_improvement() {
        check_ajax_referer( 'aeo_editor_nonce', 'nonce' );
        $post_id = isset($_POST['post_id']) ? absint( wp_unslash( $_POST['post_id'] ) ) : 0;
        $id      = isset($_POST['id']) ? sanitize_text_field( wp_unslash( $_POST['id'] ) ) : '';
        if ( $post_id < 1 || $id === '' ) { wp_send_json_error( array( 'message' => 'Missing parameters.' ) ); }
        if ( ! current_user_can( 'edit_post', $post_id ) ) { wp_send_json_error( array( 'message' => 'Permission denied.' ) ); }
        if ( 'post' !== get_post_type( $post_id ) ) { wp_send_json_error( array( 'message' => 'Unsupported post type.' ) ); }
        $items = aeo_impr_get( $post_id );
        $new   = array(); $removed = false;
        foreach ( $items as $it ) {
            if ( isset($it['id']) && $it['id'] === $id ) { $removed = true; continue; }
            $new[] = $it;
        }
        if ( $removed ) { update_post_meta( $post_id, '_aeo_improvements', $new ); }
        wp_send_json_success( array( 'removed' => $removed, 'remaining' => count( $new ) ) );
    }
    add_action( 'wp_ajax_aeo_apply_improvement', 'aeo_ajax_apply_improvement' );
}

if ( ! function_exists( 'aeo_ajax_reject_improvement' ) ) {
    function aeo_ajax_reject_improvement() {
        check_ajax_referer( 'aeo_editor_nonce', 'nonce' );
        $post_id = isset($_POST['post_id']) ? absint( wp_unslash( $_POST['post_id'] ) ) : 0;
        $id      = isset($_POST['id']) ? sanitize_text_field( wp_unslash( $_POST['id'] ) ) : '';
        if ( $post_id < 1 || $id === '' ) { wp_send_json_error( array( 'message' => 'Missing parameters.' ) ); }
        if ( ! current_user_can( 'edit_post', $post_id ) ) { wp_send_json_error( array( 'message' => 'Permission denied.' ) ); }
        if ( 'post' !== get_post_type( $post_id ) ) { wp_send_json_error( array( 'message' => 'Unsupported post type.' ) ); }
        $items = aeo_impr_get( $post_id );
        $new   = array(); $removed = false;
        foreach ( $items as $it ) {
            if ( isset($it['id']) && $it['id'] === $id ) { $removed = true; continue; }
            $new[] = $it;
        }
        if ( $removed ) { update_post_meta( $post_id, '_aeo_improvements', $new ); }
        wp_send_json_success( array( 'removed' => $removed, 'remaining' => count( $new ) ) );
    }
    add_action( 'wp_ajax_aeo_reject_improvement', 'aeo_ajax_reject_improvement' );
}
