-- =====================================================
-- FLOWB0T NEXUS DATABASE SCHEMA
-- Database: digupdog_FEED
-- Version: 1.0.0
-- Generated: 2026-01-25
-- =====================================================

-- =====================================================
-- SECTION 1: NEXUS TABLES (8 new tables)
-- =====================================================

-- 1. JOBS TABLE (Main crawler jobs)
CREATE TABLE IF NOT EXISTS nexus_jobs (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_uuid            CHAR(36) NOT NULL UNIQUE,
    user_id             BIGINT UNSIGNED DEFAULT NULL,
    name                VARCHAR(255) NOT NULL,
    description         TEXT,

    -- Job Configuration
    job_type            ENUM('search','domain','sitemap','direct','mixed') NOT NULL,
    search_terms        LONGTEXT,           -- JSON array of search terms
    target_domains      LONGTEXT,           -- JSON array of target domains
    direct_urls         LONGTEXT,           -- JSON array of direct URLs

    -- Crawler Settings
    max_depth           INT DEFAULT 3,
    max_pages           INT DEFAULT 10000,
    max_concurrent      INT DEFAULT 100,
    request_timeout     INT DEFAULT 10,
    delay_between       FLOAT DEFAULT 0.5,
    respect_robots      TINYINT(1) DEFAULT 1,
    follow_redirects    TINYINT(1) DEFAULT 1,

    -- Search Settings
    search_provider     ENUM('bing','google','duckduckgo','all') DEFAULT 'bing',
    search_pages        INT DEFAULT 10,
    search_types        JSON DEFAULT '["web","images","videos"]',

    -- Filter Settings
    relevance_threshold INT DEFAULT 2,
    min_content_length  INT DEFAULT 100,
    allowed_domains     LONGTEXT,           -- JSON array (whitelist)
    blocked_domains     LONGTEXT,           -- JSON array (blacklist)
    allowed_languages   JSON DEFAULT '["en","pt","es"]',
    content_types       JSON DEFAULT '["article","blog","news"]',

    -- Import Settings
    auto_import         TINYINT(1) DEFAULT 1,
    import_to_pinfeeds  TINYINT(1) DEFAULT 1,
    create_users        TINYINT(1) DEFAULT 1,
    dedupe_by           ENUM('url','title','content_hash','all') DEFAULT 'url',

    -- Status & Progress
    status              ENUM('pending','queued','running','paused','completed','failed','cancelled') DEFAULT 'pending',
    progress_percent    DECIMAL(5,2) DEFAULT 0.00,
    current_phase       INT DEFAULT 0,

    -- Statistics
    total_seeds         INT DEFAULT 0,
    total_queued        INT DEFAULT 0,
    total_processed     INT DEFAULT 0,
    total_successful    INT DEFAULT 0,
    total_failed        INT DEFAULT 0,
    total_skipped       INT DEFAULT 0,
    total_imported      INT DEFAULT 0,
    total_duplicates    INT DEFAULT 0,

    -- Timing
    started_at          DATETIME DEFAULT NULL,
    paused_at           DATETIME DEFAULT NULL,
    completed_at        DATETIME DEFAULT NULL,
    estimated_completion DATETIME DEFAULT NULL,

    -- Metadata
    priority            INT DEFAULT 5,
    created_at          DATETIME DEFAULT CURRENT_TIMESTAMP,
    updated_at          DATETIME ON UPDATE CURRENT_TIMESTAMP,

    INDEX idx_status (status),
    INDEX idx_user_id (user_id),
    INDEX idx_created_at (created_at),
    INDEX idx_priority_status (priority DESC, status)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 2. JOB QUEUE TABLE (URLs to process)
CREATE TABLE IF NOT EXISTS nexus_queue (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id              BIGINT UNSIGNED NOT NULL,
    url_hash            CHAR(64) NOT NULL,
    url                 VARCHAR(2048) NOT NULL,
    domain              VARCHAR(255),

    -- Queue metadata
    source_type         ENUM('seed','search','discovered') DEFAULT 'seed',
    search_term         VARCHAR(500) DEFAULT NULL,
    parent_url_hash     CHAR(64) DEFAULT NULL,
    depth               INT DEFAULT 0,

    -- Processing
    priority            INT DEFAULT 5,
    status              ENUM('pending','processing','completed','failed','skipped') DEFAULT 'pending',
    worker_id           VARCHAR(100) DEFAULT NULL,
    retry_count         INT DEFAULT 0,
    last_error          TEXT,

    -- Timing
    scheduled_for       DATETIME DEFAULT CURRENT_TIMESTAMP,
    started_at          DATETIME DEFAULT NULL,
    completed_at        DATETIME DEFAULT NULL,

    UNIQUE KEY unique_job_url (job_id, url_hash),
    INDEX idx_job_status (job_id, status),
    INDEX idx_status_scheduled (status, scheduled_for),
    INDEX idx_domain (domain),

    FOREIGN KEY (job_id) REFERENCES nexus_jobs(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 3. RESULTS TABLE (Crawled content)
CREATE TABLE IF NOT EXISTS nexus_results (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id              BIGINT UNSIGNED NOT NULL,
    queue_id            BIGINT UNSIGNED NOT NULL,
    url_hash            CHAR(64) NOT NULL,
    url                 VARCHAR(2048) NOT NULL,

    -- Fetched Data
    http_status         INT,
    content_type        VARCHAR(100),
    content_length      INT,
    response_time_ms    INT,
    final_url           VARCHAR(2048),

    -- Extracted Content
    title               VARCHAR(500),
    description         TEXT,
    content             LONGTEXT,
    content_hash        CHAR(64),
    thumbnail           VARCHAR(2048),
    favicon             VARCHAR(500),
    canonical_url       VARCHAR(2048),

    -- Metadata
    author              VARCHAR(255),
    publish_date        DATETIME,
    language            VARCHAR(10),
    word_count          INT,
    reading_time        INT,

    -- Relevance
    relevance_score     DECIMAL(5,2),
    matched_terms       JSON,

    -- Links Found
    internal_links      INT DEFAULT 0,
    external_links      INT DEFAULT 0,
    image_links         INT DEFAULT 0,
    video_links         INT DEFAULT 0,

    -- Import Status
    import_status       ENUM('pending','imported','skipped','failed') DEFAULT 'pending',
    imported_to         JSON,               -- Which tables imported to
    pinfeeds_id         BIGINT UNSIGNED DEFAULT NULL,

    -- Timing
    crawled_at          DATETIME DEFAULT CURRENT_TIMESTAMP,
    imported_at         DATETIME DEFAULT NULL,

    UNIQUE KEY unique_job_url (job_id, url_hash),
    INDEX idx_job_id (job_id),
    INDEX idx_import_status (import_status),
    INDEX idx_relevance (relevance_score DESC),

    FOREIGN KEY (job_id) REFERENCES nexus_jobs(id) ON DELETE CASCADE,
    FOREIGN KEY (queue_id) REFERENCES nexus_queue(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 4. DISCOVERED LINKS TABLE
CREATE TABLE IF NOT EXISTS nexus_discovered_links (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id              BIGINT UNSIGNED NOT NULL,
    source_url_hash     CHAR(64) NOT NULL,
    target_url_hash     CHAR(64) NOT NULL,
    target_url          VARCHAR(2048) NOT NULL,
    link_type           ENUM('internal','external') NOT NULL,
    anchor_text         VARCHAR(500),
    added_to_queue      TINYINT(1) DEFAULT 0,
    discovered_at       DATETIME DEFAULT CURRENT_TIMESTAMP,

    UNIQUE KEY unique_source_target (job_id, source_url_hash, target_url_hash),
    INDEX idx_job_id (job_id),

    FOREIGN KEY (job_id) REFERENCES nexus_jobs(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 5. EXPORTS TABLE
CREATE TABLE IF NOT EXISTS nexus_exports (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id              BIGINT UNSIGNED NOT NULL,
    user_id             BIGINT UNSIGNED DEFAULT NULL,

    export_type         ENUM('csv','json','xml','sql','xlsx') NOT NULL,
    export_scope        ENUM('all','successful','imported','failed') DEFAULT 'successful',

    status              ENUM('pending','processing','completed','failed') DEFAULT 'pending',
    progress_percent    DECIMAL(5,2) DEFAULT 0.00,

    file_path           VARCHAR(500),
    file_size           BIGINT,
    total_records       INT DEFAULT 0,

    started_at          DATETIME DEFAULT NULL,
    completed_at        DATETIME DEFAULT NULL,
    expires_at          DATETIME DEFAULT NULL,

    created_at          DATETIME DEFAULT CURRENT_TIMESTAMP,

    INDEX idx_job_id (job_id),
    INDEX idx_status (status),

    FOREIGN KEY (job_id) REFERENCES nexus_jobs(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 6. LOGS TABLE
CREATE TABLE IF NOT EXISTS nexus_logs (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    job_id              BIGINT UNSIGNED DEFAULT NULL,
    log_level           ENUM('DEBUG','INFO','WARNING','ERROR','CRITICAL') NOT NULL,
    category            VARCHAR(50) NOT NULL,
    message             TEXT NOT NULL,
    context             JSON,
    created_at          DATETIME(3) DEFAULT CURRENT_TIMESTAMP(3),

    INDEX idx_job_id (job_id),
    INDEX idx_level_created (log_level, created_at),
    INDEX idx_category (category)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 7. SETTINGS TABLE
CREATE TABLE IF NOT EXISTS nexus_settings (
    id                  INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    setting_key         VARCHAR(100) NOT NULL UNIQUE,
    setting_value       LONGTEXT,
    setting_type        ENUM('string','int','float','bool','json') DEFAULT 'string',
    description         VARCHAR(500),
    updated_at          DATETIME ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- 8. SESSIONS TABLE (for multi-user)
CREATE TABLE IF NOT EXISTS nexus_sessions (
    id                  BIGINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
    user_id             BIGINT UNSIGNED NOT NULL,
    session_token       CHAR(64) NOT NULL UNIQUE,
    ip_address          VARCHAR(45),
    user_agent          VARCHAR(500),
    last_activity       DATETIME DEFAULT CURRENT_TIMESTAMP,
    expires_at          DATETIME NOT NULL,

    INDEX idx_token (session_token),
    INDEX idx_user_id (user_id),
    INDEX idx_expires (expires_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;

-- =====================================================
-- SECTION 2: DEFAULT SETTINGS
-- =====================================================
INSERT INTO nexus_settings (setting_key, setting_value, setting_type, description) VALUES
('max_concurrent_jobs', '10', 'int', 'Maximum concurrent running jobs'),
('max_concurrent_workers', '500', 'int', 'Maximum concurrent crawler workers'),
('default_request_timeout', '10', 'int', 'Default HTTP request timeout in seconds'),
('default_delay_between', '0.5', 'float', 'Default delay between requests in seconds'),
('max_queue_size', '1000000', 'int', 'Maximum URLs in queue per job'),
('auto_cleanup_days', '30', 'int', 'Auto-delete completed jobs after X days'),
('export_retention_days', '7', 'int', 'Auto-delete export files after X days'),
('log_retention_days', '14', 'int', 'Auto-delete logs after X days')
ON DUPLICATE KEY UPDATE setting_value = VALUES(setting_value);

-- =====================================================
-- SECTION 3: VERIFICATION QUERIES
-- =====================================================

-- Verify all NEXUS tables were created
-- Run this after executing the schema:
-- SHOW TABLES LIKE 'nexus_%';

-- Expected result:
-- nexus_discovered_links
-- nexus_exports
-- nexus_jobs
-- nexus_logs
-- nexus_queue
-- nexus_results
-- nexus_sessions
-- nexus_settings

-- Verify settings were inserted:
-- SELECT * FROM nexus_settings;
