<?php
/**
 * ===========================================
 * FLOWBOT DCI - SEARXNG SEARCH ADAPTER
 * ===========================================
 * Uses public SearXNG instances for reliable free search
 * No API key required - uses public metasearch instances
 *
 * SearXNG is a privacy-respecting metasearch engine
 * that aggregates results from multiple sources
 *
 * v2.0 - Improved with more instances, health checks, caching
 */

declare(strict_types=1);

namespace FlowbotDCI\Services\SearchEngine;

class SearXNGAdapter implements SearchEngineInterface
{
    const NAME = 'SearXNG';
    const VERSION = '2.0';

    // Extended list of public SearXNG instances with JSON API enabled
    // Ordered by reliability (updated January 2025)
    private array $instances = [
        // High reliability (tested working)
        'https://search.sapti.me',
        'https://searx.be',
        'https://search.ononoki.org',
        'https://searx.tiekoetter.com',
        'https://search.bus-hit.me',
        'https://searx.work',
        'https://paulgo.io',
        'https://search.mdosch.de',
        'https://searx.namejeff.xyz',
        'https://searx.divided-by-zero.eu',
        // Additional instances
        'https://search.privacyguides.net',
        'https://searx.tuxcloud.net',
        'https://searx.fmac.xyz',
        'https://searx.github.io',
        'https://search.disroot.org',
        'https://searx.info',
        'https://searx.xyz',
        'https://searx.prvcy.eu',
        'https://searx.bar',
        'https://searx.gnu.style',
    ];

    private ?string $lastError = null;
    private int $timeout = 12; // Reduced timeout per instance
    private int $delayMs = 1500;
    private ?string $workingInstance = null;
    private array $failedInstances = []; // Track failed instances
    private int $maxInstancesToTry = 8; // Limit attempts

    public function __construct(array $config = [])
    {
        $this->timeout = $config['timeout'] ?? 12;
        $this->delayMs = $config['delay_ms'] ?? 1500;
        if (!empty($config['instances'])) {
            $this->instances = $config['instances'];
        }
    }

    public function getName(): string
    {
        return self::NAME;
    }

    public function isAvailable(): bool
    {
        return true; // Always available (public instances)
    }

    public function search(string $query, int $maxResults = 10, int $offset = 0): array
    {
        $this->lastError = null;
        $results = [];

        // Build prioritized list of instances
        $instances = $this->getPrioritizedInstances();

        $tried = 0;
        foreach ($instances as $instance) {
            if ($tried >= $this->maxInstancesToTry) {
                break;
            }

            // Skip recently failed instances
            if (isset($this->failedInstances[$instance])) {
                $failedTime = $this->failedInstances[$instance];
                if (time() - $failedTime < 300) { // 5 minutes cooldown
                    continue;
                }
                unset($this->failedInstances[$instance]);
            }

            $tried++;
            $results = $this->searchInstance($instance, $query, $maxResults);

            if (!empty($results)) {
                $this->workingInstance = $instance;
                return $results;
            }

            // Mark as failed
            $this->failedInstances[$instance] = time();
        }

        if (empty($this->lastError)) {
            $this->lastError = "All SearXNG instances failed after trying {$tried} instances";
        }

        return [];
    }

    /**
     * Get instances sorted by priority (working instance first)
     */
    private function getPrioritizedInstances(): array
    {
        $instances = $this->instances;

        // Put working instance first
        if ($this->workingInstance) {
            $key = array_search($this->workingInstance, $instances);
            if ($key !== false) {
                unset($instances[$key]);
                array_unshift($instances, $this->workingInstance);
            }
        } else {
            // Randomize to distribute load
            shuffle($instances);
        }

        return array_values($instances);
    }

    /**
     * Search a specific SearXNG instance
     */
    private function searchInstance(string $instance, string $query, int $maxResults): array
    {
        $results = [];

        try {
            // Build search URL with JSON format
            $url = rtrim($instance, '/') . '/search?' . http_build_query([
                'q' => $query,
                'format' => 'json',
                'categories' => 'general',
                'language' => 'en',
                'pageno' => 1,
            ]);

            $ch = curl_init();
            curl_setopt_array($ch, [
                CURLOPT_URL => $url,
                CURLOPT_RETURNTRANSFER => true,
                CURLOPT_TIMEOUT => $this->timeout,
                CURLOPT_CONNECTTIMEOUT => 5, // Quick connect timeout
                CURLOPT_FOLLOWLOCATION => true,
                CURLOPT_MAXREDIRS => 3,
                CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
                CURLOPT_HTTPHEADER => [
                    'Accept: application/json',
                    'Accept-Language: en-US,en;q=0.9',
                    'Accept-Encoding: gzip, deflate, br',
                    'Connection: keep-alive',
                ],
                CURLOPT_ENCODING => 'gzip, deflate, br',
                CURLOPT_SSL_VERIFYPEER => true,
                CURLOPT_SSL_VERIFYHOST => 2,
            ]);

            $response = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            $error = curl_error($ch);
            $errno = curl_errno($ch);
            curl_close($ch);

            // Connection errors - skip quickly
            if ($errno === CURLE_OPERATION_TIMEDOUT || $errno === CURLE_COULDNT_CONNECT) {
                return [];
            }

            if ($error) {
                $this->lastError = "Instance {$instance}: cURL error - {$error}";
                return [];
            }

            // Rate limited or blocked
            if ($httpCode === 429 || $httpCode === 403) {
                $this->lastError = "Instance {$instance}: HTTP {$httpCode} - Rate limited";
                return [];
            }

            if ($httpCode !== 200 || empty($response)) {
                return [];
            }

            $data = json_decode($response, true);
            if (!is_array($data) || !isset($data['results'])) {
                // Might be HTML error page
                return [];
            }

            // Parse results
            $count = 0;
            foreach ($data['results'] as $item) {
                if ($count >= $maxResults) {
                    break;
                }

                $url = $item['url'] ?? '';
                $title = $item['title'] ?? '';

                if (empty($url) || empty($title)) {
                    continue;
                }

                // Skip internal/invalid URLs
                if (!filter_var($url, FILTER_VALIDATE_URL)) {
                    continue;
                }

                $results[] = [
                    'url' => $url,
                    'title' => html_entity_decode($title, ENT_QUOTES, 'UTF-8'),
                    'snippet' => html_entity_decode($item['content'] ?? '', ENT_QUOTES, 'UTF-8'),
                    'source' => self::NAME,
                    'engine' => $item['engine'] ?? 'unknown',
                ];

                $count++;
            }

            if (!empty($results)) {
                // Rate limiting delay after successful request
                if ($this->delayMs > 0) {
                    usleep($this->delayMs * 1000);
                }
                return $results;
            }

        } catch (\Exception $e) {
            $this->lastError = "Instance {$instance}: " . $e->getMessage();
        }

        return [];
    }

    /**
     * Health check for an instance
     */
    public function checkInstanceHealth(string $instance): bool
    {
        try {
            $url = rtrim($instance, '/') . '/search?' . http_build_query([
                'q' => 'test',
                'format' => 'json',
            ]);

            $ch = curl_init();
            curl_setopt_array($ch, [
                CURLOPT_URL => $url,
                CURLOPT_RETURNTRANSFER => true,
                CURLOPT_TIMEOUT => 5,
                CURLOPT_CONNECTTIMEOUT => 3,
                CURLOPT_NOBODY => false,
                CURLOPT_USERAGENT => 'FlowbotDCI/6.1 HealthCheck',
                CURLOPT_HTTPHEADER => ['Accept: application/json'],
            ]);

            $response = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);

            if ($httpCode === 200 && !empty($response)) {
                $data = json_decode($response, true);
                return is_array($data) && isset($data['results']);
            }
        } catch (\Exception $e) {
            // Ignore
        }

        return false;
    }

    public function getLastError(): ?string
    {
        return $this->lastError;
    }

    /**
     * Get the last working instance
     */
    public function getWorkingInstance(): ?string
    {
        return $this->workingInstance;
    }

    /**
     * Get list of all configured instances
     */
    public function getInstances(): array
    {
        return $this->instances;
    }

    /**
     * Get failed instances with their failure timestamps
     */
    public function getFailedInstances(): array
    {
        return $this->failedInstances;
    }

    /**
     * Clear failed instances cache
     */
    public function clearFailedInstances(): void
    {
        $this->failedInstances = [];
    }
}
