<?php
/**
 * ============================================
 * FLOWBOT DCI - INFINITE MODE HANDLER v1.0
 * ============================================
 * Continuous crawling without end condition.
 *
 * Features:
 * - Continuous URL discovery
 * - Periodic search query rotation
 * - Progressive depth expansion
 * - Checkpoint every N URLs
 * - Memory-efficient with garbage collection
 * ============================================
 */

declare(strict_types=1);

namespace FlowbotDCI\Services\Crawler;

use FlowbotDCI\Core\Database;
use PDO;
use Generator;

class InfiniteMode
{
    const VERSION = '1.0';

    /**
     * Configuration
     */
    private array $config = [
        'checkpoint_interval' => 100,
        'max_memory_mb' => 512,
        'query_rotation_interval' => 500,
        'depth_expansion_interval' => 200,
        'gc_interval' => 50,
        'pause_on_errors' => 10,
        'restart_delay_seconds' => 60,
    ];

    /**
     * State tracking
     */
    private array $state = [
        'status' => 'idle',
        'urls_processed' => 0,
        'urls_discovered' => 0,
        'current_depth' => 1,
        'current_query_index' => 0,
        'errors_count' => 0,
        'last_checkpoint' => null,
        'start_time' => null,
        'pause_until' => null,
    ];

    /**
     * Search queries to rotate through
     */
    private array $queries = [];

    /**
     * Discovered URLs queue
     */
    private array $urlQueue = [];

    /**
     * Seen URLs (for deduplication)
     */
    private array $seenUrls = [];

    /**
     * Database connection
     */
    private ?PDO $pdo = null;

    /**
     * Job ID
     */
    private ?string $jobId = null;

    /**
     * Constructor
     */
    public function __construct(array $config = [])
    {
        $this->config = array_merge($this->config, $config);
    }

    /**
     * Set database connection
     */
    public function setDatabase(Database $database): self
    {
        $this->pdo = $database->getConnection();
        return $this;
    }

    /**
     * Set job ID for checkpointing
     */
    public function setJobId(string $jobId): self
    {
        $this->jobId = $jobId;
        return $this;
    }

    /**
     * Configure infinite mode
     */
    public function configure(array $options): self
    {
        if (isset($options['checkpoint_interval'])) {
            $this->config['checkpoint_interval'] = max(10, (int)$options['checkpoint_interval']);
        }
        if (isset($options['max_memory_mb'])) {
            $this->config['max_memory_mb'] = max(128, (int)$options['max_memory_mb']);
        }
        if (isset($options['query_rotation_interval'])) {
            $this->config['query_rotation_interval'] = max(50, (int)$options['query_rotation_interval']);
        }

        return $this;
    }

    /**
     * Set search queries for rotation
     */
    public function setQueries(array $queries): self
    {
        $this->queries = array_filter($queries, fn($q) => !empty(trim($q)));
        return $this;
    }

    /**
     * Add seed URLs
     */
    public function addSeedUrls(array $urls): self
    {
        foreach ($urls as $url) {
            $this->addToQueue($url, 0);
        }
        return $this;
    }

    /**
     * Start infinite mode generator
     */
    public function start(): Generator
    {
        $this->state['status'] = 'running';
        $this->state['start_time'] = microtime(true);
        $this->state['last_checkpoint'] = time();

        // Try to restore from checkpoint
        if ($this->jobId) {
            $this->restoreCheckpoint();
        }

        while ($this->state['status'] === 'running') {
            // Check memory usage
            if ($this->checkMemoryLimit()) {
                yield [
                    'type' => 'memory_warning',
                    'message' => 'Memory limit approaching, running garbage collection',
                    'memory_mb' => memory_get_usage(true) / 1024 / 1024,
                ];
                $this->runGarbageCollection();
            }

            // Check if paused
            if ($this->state['pause_until'] && time() < $this->state['pause_until']) {
                yield [
                    'type' => 'paused',
                    'message' => 'Paused due to errors',
                    'resume_at' => date('Y-m-d H:i:s', $this->state['pause_until']),
                ];
                sleep(5);
                continue;
            }

            // Get next URL to process
            $urlData = $this->getNextUrl();

            if (!$urlData) {
                // No URLs in queue, try to discover more
                if (!empty($this->queries)) {
                    $query = $this->getNextQuery();
                    yield [
                        'type' => 'query_rotation',
                        'query' => $query,
                        'index' => $this->state['current_query_index'],
                    ];

                    // Yield request to search for more URLs
                    yield [
                        'type' => 'search_request',
                        'query' => $query,
                        'depth' => $this->state['current_depth'],
                    ];
                } else {
                    // No queries and no URLs, wait
                    yield [
                        'type' => 'idle',
                        'message' => 'Waiting for URLs to process',
                    ];
                    sleep(5);
                }
                continue;
            }

            // Yield URL for processing
            yield [
                'type' => 'url',
                'url' => $urlData['url'],
                'depth' => $urlData['depth'],
                'source' => $urlData['source'] ?? 'queue',
            ];

            $this->state['urls_processed']++;

            // Checkpoint at intervals
            if ($this->state['urls_processed'] % $this->config['checkpoint_interval'] === 0) {
                $this->saveCheckpoint();
                yield [
                    'type' => 'checkpoint',
                    'urls_processed' => $this->state['urls_processed'],
                    'urls_in_queue' => count($this->urlQueue),
                ];
            }

            // Query rotation at intervals
            if (!empty($this->queries) &&
                $this->state['urls_processed'] % $this->config['query_rotation_interval'] === 0) {
                $this->rotateQuery();
            }

            // Depth expansion at intervals
            if ($this->state['urls_processed'] % $this->config['depth_expansion_interval'] === 0) {
                $this->expandDepth();
                yield [
                    'type' => 'depth_expanded',
                    'new_depth' => $this->state['current_depth'],
                ];
            }

            // Garbage collection at intervals
            if ($this->state['urls_processed'] % $this->config['gc_interval'] === 0) {
                $this->runGarbageCollection();
            }
        }

        // Final checkpoint
        $this->saveCheckpoint();

        yield [
            'type' => 'stopped',
            'total_processed' => $this->state['urls_processed'],
            'total_discovered' => $this->state['urls_discovered'],
            'runtime_seconds' => microtime(true) - $this->state['start_time'],
        ];
    }

    /**
     * Add discovered URLs from crawler
     */
    public function addDiscoveredUrls(array $urls, int $depth = 0): void
    {
        foreach ($urls as $url) {
            if ($this->addToQueue($url, $depth)) {
                $this->state['urls_discovered']++;
            }
        }
    }

    /**
     * Report error (for auto-pause logic)
     */
    public function reportError(): void
    {
        $this->state['errors_count']++;

        if ($this->state['errors_count'] >= $this->config['pause_on_errors']) {
            $this->state['pause_until'] = time() + $this->config['restart_delay_seconds'];
            $this->state['errors_count'] = 0;
        }
    }

    /**
     * Stop infinite mode
     */
    public function stop(): void
    {
        $this->state['status'] = 'stopped';
        $this->saveCheckpoint();
    }

    /**
     * Pause infinite mode
     */
    public function pause(): void
    {
        $this->state['status'] = 'paused';
        $this->saveCheckpoint();
    }

    /**
     * Resume infinite mode
     */
    public function resume(): void
    {
        $this->state['status'] = 'running';
        $this->state['pause_until'] = null;
    }

    /**
     * Get current state
     */
    public function getState(): array
    {
        return array_merge($this->state, [
            'queue_size' => count($this->urlQueue),
            'seen_urls_count' => count($this->seenUrls),
            'memory_mb' => memory_get_usage(true) / 1024 / 1024,
            'runtime_seconds' => $this->state['start_time']
                ? microtime(true) - $this->state['start_time']
                : 0,
        ]);
    }

    /**
     * Add URL to queue
     */
    private function addToQueue(string $url, int $depth, string $source = 'discovered'): bool
    {
        $normalizedUrl = $this->normalizeUrl($url);
        $urlHash = md5($normalizedUrl);

        if (isset($this->seenUrls[$urlHash])) {
            return false;
        }

        $this->seenUrls[$urlHash] = true;
        $this->urlQueue[] = [
            'url' => $url,
            'depth' => $depth,
            'source' => $source,
            'added_at' => time(),
        ];

        return true;
    }

    /**
     * Get next URL from queue
     */
    private function getNextUrl(): ?array
    {
        if (empty($this->urlQueue)) {
            return null;
        }

        // Get and remove first item (FIFO)
        return array_shift($this->urlQueue);
    }

    /**
     * Get next search query
     */
    private function getNextQuery(): string
    {
        if (empty($this->queries)) {
            return '';
        }

        return $this->queries[$this->state['current_query_index'] % count($this->queries)];
    }

    /**
     * Rotate to next query
     */
    private function rotateQuery(): void
    {
        $this->state['current_query_index']++;
    }

    /**
     * Expand depth limit
     */
    private function expandDepth(): void
    {
        $maxDepth = $this->config['max_depth'] ?? 10;
        if ($this->state['current_depth'] < $maxDepth) {
            $this->state['current_depth']++;
        }
    }

    /**
     * Check if memory limit is approaching
     */
    private function checkMemoryLimit(): bool
    {
        $currentMb = memory_get_usage(true) / 1024 / 1024;
        $limitMb = $this->config['max_memory_mb'];

        return $currentMb > ($limitMb * 0.8); // 80% threshold
    }

    /**
     * Run garbage collection and trim caches
     */
    private function runGarbageCollection(): void
    {
        // Trim seen URLs if too large (keep recent 50%)
        if (count($this->seenUrls) > 100000) {
            $this->seenUrls = array_slice($this->seenUrls, -50000, null, true);
        }

        // Force garbage collection
        gc_collect_cycles();
    }

    /**
     * Normalize URL for deduplication
     */
    private function normalizeUrl(string $url): string
    {
        $url = trim($url);
        $url = preg_replace('#^https?://#i', '', $url);
        $url = preg_replace('#^www\.#i', '', $url);
        $url = rtrim($url, '/');
        $url = strtolower($url);

        // Remove common tracking parameters
        $url = preg_replace('/[?&](utm_[^&]*|ref[^&]*|source[^&]*|fbclid[^&]*|gclid[^&]*)/', '', $url);

        return $url;
    }

    /**
     * Save checkpoint to database
     */
    private function saveCheckpoint(): void
    {
        if (!$this->pdo || !$this->jobId) {
            return;
        }

        try {
            $checkpoint = [
                'state' => $this->state,
                'queue_sample' => array_slice($this->urlQueue, 0, 1000), // Save first 1000
                'queries' => $this->queries,
            ];

            $stmt = $this->pdo->prepare("
                INSERT INTO crawler_checkpoints (job_id, checkpoint_data, created_at)
                VALUES (?, ?, NOW())
                ON DUPLICATE KEY UPDATE
                    checkpoint_data = VALUES(checkpoint_data),
                    created_at = NOW()
            ");
            $stmt->execute([$this->jobId, json_encode($checkpoint)]);

            $this->state['last_checkpoint'] = time();

        } catch (\Exception $e) {
            // Ignore checkpoint errors
        }
    }

    /**
     * Restore from checkpoint
     */
    private function restoreCheckpoint(): bool
    {
        if (!$this->pdo || !$this->jobId) {
            return false;
        }

        try {
            $stmt = $this->pdo->prepare("
                SELECT checkpoint_data
                FROM crawler_checkpoints
                WHERE job_id = ?
                ORDER BY created_at DESC
                LIMIT 1
            ");
            $stmt->execute([$this->jobId]);
            $row = $stmt->fetch();

            if (!$row) {
                return false;
            }

            $checkpoint = json_decode($row['checkpoint_data'], true);
            if (!$checkpoint) {
                return false;
            }

            // Restore state
            if (isset($checkpoint['state'])) {
                $this->state = array_merge($this->state, $checkpoint['state']);
                $this->state['status'] = 'running'; // Always set to running on restore
            }

            // Restore queue sample
            if (isset($checkpoint['queue_sample'])) {
                foreach ($checkpoint['queue_sample'] as $urlData) {
                    $this->urlQueue[] = $urlData;
                }
            }

            // Restore queries
            if (isset($checkpoint['queries'])) {
                $this->queries = $checkpoint['queries'];
            }

            return true;

        } catch (\Exception $e) {
            return false;
        }
    }
}
