syncio/app/queue.py

import json
import threading
import time
import uuid
from dataclasses import dataclass
from datetime import datetime, timedelta, time as dt_time
from typing import Any, Dict, List, Optional

from .config import Config


@dataclass
class MigrationJob:
    """Сериализуемое представление задания миграции."""
    job_id: str
    created_at: datetime
    run_at: datetime
    tables: Optional[List[str]]
    send_email: bool
    dry_run: Optional[bool]
    read_limit: Optional[int]
    force_full: bool
    status: str
    started_at: Optional[datetime]
    finished_at: Optional[datetime]
    report: Optional[Dict[str, Any]]
    error: Optional[str]
    queue_sequence: int
    schedule_id: Optional[str]

    def to_dict(self) -> Dict[str, Any]:
        return {
            'job_id': self.job_id,
            'created_at': self.created_at.isoformat(),
            'run_at': self.run_at.isoformat(),
            'tables': self.tables,
            'send_email': self.send_email,
            'dry_run': self.dry_run,
            'read_limit': self.read_limit,
            'force_full': self.force_full,
            'status': self.status,
            'started_at': self.started_at.isoformat() if self.started_at else None,
            'finished_at': self.finished_at.isoformat() if self.finished_at else None,
            'report': self.report,
            'error': self.error,
            'queue_sequence': self.queue_sequence,
            'schedule_id': self.schedule_id,
        }


@dataclass
class MigrationSchedule:
    """Сериализуемое представление расписания."""
    schedule_id: str
    created_at: datetime
    updated_at: datetime
    name: Optional[str]
    schedule_type: str
    enabled: bool
    catch_up_missed_runs: bool
    initial_force_full: bool
    tables: Optional[List[str]]
    send_email: bool
    dry_run: Optional[bool]
    read_limit: Optional[int]
    interval_seconds: Optional[int]
    daily_time: Optional[str]
    start_at: Optional[datetime]
    next_run_at: datetime
    last_enqueued_at: Optional[datetime]
    last_job_id: Optional[str]

    def to_dict(self) -> Dict[str, Any]:
        return {
            'schedule_id': self.schedule_id,
            'created_at': self.created_at.isoformat(),
            'updated_at': self.updated_at.isoformat(),
            'name': self.name,
            'schedule_type': self.schedule_type,
            'enabled': self.enabled,
            'catch_up_missed_runs': self.catch_up_missed_runs,
            'initial_force_full': self.initial_force_full,
            'tables': self.tables,
            'send_email': self.send_email,
            'dry_run': self.dry_run,
            'read_limit': self.read_limit,
            'interval_seconds': self.interval_seconds,
            'daily_time': self.daily_time,
            'start_at': self.start_at.isoformat() if self.start_at else None,
            'next_run_at': self.next_run_at.isoformat(),
            'last_enqueued_at': self.last_enqueued_at.isoformat() if self.last_enqueued_at else None,
            'last_job_id': self.last_job_id,
        }


class MigrationJobQueue:
    """Persistent очередь и планировщик миграций поверх PostgreSQL."""

    JOBS_TABLE = 'migration_jobs'
    SCHEDULES_TABLE = 'migration_schedules'

    def __init__(self):
        self.lock = threading.Lock()
        self.condition = threading.Condition(self.lock)
        self.engine = None
        self.schema_ready = False
        self.worker_started = False
        self.worker = None

    def start(self):
        """Ленивая инициализация worker thread."""
        with self.condition:
            if self.worker_started:
                return
            self.worker_started = True
            self.worker = threading.Thread(target=self._worker_loop, daemon=True)
            self.worker.start()
            self.condition.notify_all()

    def enqueue(
        self,
        tables: Optional[List[str]] = None,
        send_email: bool = True,
        dry_run: Optional[bool] = None,
        read_limit: Optional[int] = None,
        force_full: bool = False,
        run_at: Optional[datetime] = None,
        delay_seconds: Optional[int] = None,
        schedule_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        self.start()
        self._ensure_schema()
        scheduled_at = self._resolve_run_at(run_at=run_at, delay_seconds=delay_seconds)
        job_id = str(uuid.uuid4())
        created_at = datetime.now()

        sql = self._text(f"""
            INSERT INTO {self._qualified_table(self.JOBS_TABLE)}
                (job_id, schedule_id, created_at, run_at, tables_json, send_email, dry_run, read_limit, force_full, status)
            VALUES
                (:job_id, :schedule_id, :created_at, :run_at, CAST(:tables_json AS jsonb), :send_email, :dry_run, :read_limit, :force_full, 'queued')
            RETURNING *
        """)
        with self._get_engine().connect() as conn:
            row = conn.execute(sql, {
                'job_id': job_id,
                'schedule_id': schedule_id,
                'created_at': created_at,
                'run_at': scheduled_at,
                'tables_json': json.dumps(tables) if tables is not None else None,
                'send_email': send_email,
                'dry_run': dry_run,
                'read_limit': read_limit,
                'force_full': force_full,
            }).mappings().first()
            conn.commit()

        with self.condition:
            self.condition.notify_all()

        return self._job_from_row(row).to_dict()

    def list_jobs(self, limit: int = 100) -> List[Dict[str, Any]]:
        self.start()
        self._ensure_schema()
        sql = self._text(f"""
            SELECT *
            FROM {self._qualified_table(self.JOBS_TABLE)}
            ORDER BY queue_sequence DESC
            LIMIT :limit
        """)
        with self._get_engine().connect() as conn:
            rows = conn.execute(sql, {'limit': limit}).mappings().all()
        return [self._job_from_row(row).to_dict() for row in rows]

    def get_job(self, job_id: str) -> Optional[Dict[str, Any]]:
        self.start()
        self._ensure_schema()
        sql = self._text(f"SELECT * FROM {self._qualified_table(self.JOBS_TABLE)} WHERE job_id = :job_id")
        with self._get_engine().connect() as conn:
            row = conn.execute(sql, {'job_id': job_id}).mappings().first()
        return self._job_from_row(row).to_dict() if row else None

    def create_schedule(
        self,
        schedule_type: str,
        tables: Optional[List[str]] = None,
        send_email: bool = True,
        dry_run: Optional[bool] = None,
        read_limit: Optional[int] = None,
        interval_seconds: Optional[int] = None,
        daily_time: Optional[str] = None,
        start_at: Optional[datetime] = None,
        name: Optional[str] = None,
        enabled: bool = True,
        catch_up_missed_runs: bool = False,
        initial_force_full: bool = False,
    ) -> Dict[str, Any]:
        self.start()
        self._ensure_schema()
        schedule_id = str(uuid.uuid4())
        created_at = datetime.now()
        normalized_start_at = self._normalize_datetime(start_at)
        next_run_at = self._resolve_next_schedule_run_at(
            schedule_type=schedule_type,
            interval_seconds=interval_seconds,
            daily_time=daily_time,
            start_at=normalized_start_at,
            reference=created_at,
        )

        sql = self._text(f"""
            INSERT INTO {self._qualified_table(self.SCHEDULES_TABLE)}
                (
                    schedule_id, created_at, updated_at, name, schedule_type, enabled,
                    catch_up_missed_runs, initial_force_full, tables_json, send_email, dry_run, read_limit, interval_seconds,
                    daily_time, start_at, next_run_at
                )
            VALUES
                (
                    :schedule_id, :created_at, :updated_at, :name, :schedule_type, :enabled,
                    :catch_up_missed_runs, :initial_force_full,
                    CAST(:tables_json AS jsonb), :send_email, :dry_run, :read_limit, :interval_seconds,
                    :daily_time, :start_at, :next_run_at
                )
            RETURNING *
        """)
        with self._get_engine().connect() as conn:
            row = conn.execute(sql, {
                'schedule_id': schedule_id,
                'created_at': created_at,
                'updated_at': created_at,
                'name': name,
                'schedule_type': schedule_type,
                'enabled': enabled,
                'catch_up_missed_runs': catch_up_missed_runs,
                'initial_force_full': initial_force_full,
                'tables_json': json.dumps(tables) if tables is not None else None,
                'send_email': send_email,
                'dry_run': dry_run,
                'read_limit': read_limit,
                'interval_seconds': interval_seconds,
                'daily_time': daily_time,
                'start_at': normalized_start_at,
                'next_run_at': next_run_at,
            }).mappings().first()
            conn.commit()

        with self.condition:
            self.condition.notify_all()

        return self._schedule_from_row(row).to_dict()

    def list_schedules(self) -> List[Dict[str, Any]]:
        self.start()
        self._ensure_schema()
        sql = self._text(f"SELECT * FROM {self._qualified_table(self.SCHEDULES_TABLE)} ORDER BY next_run_at, created_at")
        with self._get_engine().connect() as conn:
            rows = conn.execute(sql).mappings().all()
        return [self._schedule_from_row(row).to_dict() for row in rows]

    def get_schedule(self, schedule_id: str) -> Optional[Dict[str, Any]]:
        self.start()
        self._ensure_schema()
        sql = self._text(f"SELECT * FROM {self._qualified_table(self.SCHEDULES_TABLE)} WHERE schedule_id = :schedule_id")
        with self._get_engine().connect() as conn:
            row = conn.execute(sql, {'schedule_id': schedule_id}).mappings().first()
        return self._schedule_from_row(row).to_dict() if row else None

    def get_status(self) -> Dict[str, Any]:
        self.start()
        self._ensure_schema()
        with self._get_engine().connect() as conn:
            running_job = conn.execute(self._text(f"""
                SELECT *
                FROM {self._qualified_table(self.JOBS_TABLE)}
                WHERE status = 'running'
                ORDER BY started_at DESC
                LIMIT 1
            """)).mappings().first()
            queued_jobs = conn.execute(self._text(f"""
                SELECT COUNT(*)
                FROM {self._qualified_table(self.JOBS_TABLE)}
                WHERE status = 'queued'
            """)).scalar()
            schedules = conn.execute(self._text(f"""
                SELECT COUNT(*)
                FROM {self._qualified_table(self.SCHEDULES_TABLE)}
                WHERE enabled = TRUE
            """)).scalar()

        return {
            'running': running_job is not None,
            'running_job': self._job_from_row(running_job).to_dict() if running_job else None,
            'queued_jobs': int(queued_jobs or 0),
            'enabled_schedules': int(schedules or 0),
        }

    def _get_engine(self):
        if self.engine is None:
            from sqlalchemy import create_engine

            self.engine = create_engine(Config.POSTGRES_CONNECTION_STRING)
        return self.engine

    def _text(self, sql: str):
        from sqlalchemy import text

        return text(sql)

    def _quote_identifier(self, identifier: str) -> str:
        return '"' + identifier.replace('"', '""') + '"'

    def _qualified_table(self, table_name: str) -> str:
        return f'{self._quote_identifier(Config.REPLICATOR_SCHEMA)}.{self._quote_identifier(table_name)}'

    def _ensure_schema(self):
        if self.schema_ready:
            return

        with self.lock:
            if self.schema_ready:
                return

            with self._get_engine().connect() as conn:
                conn.execute(self._text(
                    f'CREATE SCHEMA IF NOT EXISTS {self._quote_identifier(Config.REPLICATOR_SCHEMA)}'
                ))
                conn.execute(self._text(f"""
                    CREATE TABLE IF NOT EXISTS {self._qualified_table(self.JOBS_TABLE)} (
                        job_id text PRIMARY KEY,
                        schedule_id text NULL,
                        created_at timestamp NOT NULL,
                        run_at timestamp NOT NULL,
                        tables_json jsonb NULL,
                        send_email boolean NOT NULL DEFAULT TRUE,
                        dry_run boolean NULL,
                        read_limit integer NULL,
                        force_full boolean NOT NULL DEFAULT FALSE,
                        status text NOT NULL,
                        started_at timestamp NULL,
                        finished_at timestamp NULL,
                        report_json jsonb NULL,
                        error text NULL,
                        queue_sequence bigint GENERATED ALWAYS AS IDENTITY
                    )
                """))
                conn.execute(self._text(f"""
                    CREATE INDEX IF NOT EXISTS idx_{self.JOBS_TABLE}_status_run_at
                    ON {self._qualified_table(self.JOBS_TABLE)} (status, run_at, queue_sequence)
                """))
                conn.execute(self._text(f"""
                    CREATE TABLE IF NOT EXISTS {self._qualified_table(self.SCHEDULES_TABLE)} (
                        schedule_id text PRIMARY KEY,
                        created_at timestamp NOT NULL,
                        updated_at timestamp NOT NULL,
                        name text NULL,
                        schedule_type text NOT NULL,
                        enabled boolean NOT NULL DEFAULT TRUE,
                        catch_up_missed_runs boolean NOT NULL DEFAULT FALSE,
                        initial_force_full boolean NOT NULL DEFAULT FALSE,
                        tables_json jsonb NULL,
                        send_email boolean NOT NULL DEFAULT TRUE,
                        dry_run boolean NULL,
                        read_limit integer NULL,
                        interval_seconds integer NULL,
                        daily_time text NULL,
                        start_at timestamp NULL,
                        next_run_at timestamp NOT NULL,
                        last_enqueued_at timestamp NULL,
                        last_job_id text NULL
                    )
                """))
                conn.execute(self._text(f"""
                    ALTER TABLE {self._qualified_table(self.SCHEDULES_TABLE)}
                    ADD COLUMN IF NOT EXISTS catch_up_missed_runs boolean NOT NULL DEFAULT FALSE
                """))
                conn.execute(self._text(f"""
                    ALTER TABLE {self._qualified_table(self.SCHEDULES_TABLE)}
                    ADD COLUMN IF NOT EXISTS initial_force_full boolean NOT NULL DEFAULT FALSE
                """))
                conn.execute(self._text(f"""
                    ALTER TABLE {self._qualified_table(self.JOBS_TABLE)}
                    ADD COLUMN IF NOT EXISTS force_full boolean NOT NULL DEFAULT FALSE
                """))
                conn.execute(self._text(f"""
                    CREATE INDEX IF NOT EXISTS idx_{self.SCHEDULES_TABLE}_enabled_next_run_at
                    ON {self._qualified_table(self.SCHEDULES_TABLE)} (enabled, next_run_at)
                """))
                conn.commit()

            self.schema_ready = True

    def _resolve_run_at(
        self,
        run_at: Optional[datetime] = None,
        delay_seconds: Optional[int] = None,
    ) -> datetime:
        if run_at and delay_seconds is not None:
            raise ValueError("Specify either run_at or delay_seconds")

        if delay_seconds is not None:
            return datetime.now() + timedelta(seconds=delay_seconds)

        if run_at is None:
            return datetime.now()

        return self._normalize_datetime(run_at)

    def _resolve_next_schedule_run_at(
        self,
        schedule_type: str,
        interval_seconds: Optional[int],
        daily_time: Optional[str],
        start_at: Optional[datetime],
        reference: datetime,
    ) -> datetime:
        normalized_reference = self._normalize_datetime(reference)
        normalized_start_at = self._normalize_datetime(start_at) if start_at else None
        baseline = normalized_start_at or normalized_reference

        if schedule_type == 'interval':
            if not interval_seconds or interval_seconds <= 0:
                raise ValueError("interval_seconds must be greater than 0 for interval schedule")
            return baseline if baseline > normalized_reference else normalized_reference + timedelta(seconds=interval_seconds)

        if schedule_type == 'daily':
            if not daily_time:
                raise ValueError("daily_time is required for daily schedule")
            parsed_time = self._parse_daily_time(daily_time)
            candidate_date = baseline.date()
            candidate = datetime.combine(candidate_date, parsed_time)
            if normalized_start_at and candidate < normalized_start_at:
                candidate = datetime.combine(normalized_start_at.date(), parsed_time)
            if candidate <= normalized_reference:
                candidate = candidate + timedelta(days=1)
            return candidate

        raise ValueError("schedule_type must be one of: interval, daily")

    def _next_schedule_run_from_row(self, row: Dict[str, Any], reference: datetime) -> datetime:
        schedule_type = row['schedule_type']
        if schedule_type == 'interval':
            return reference + timedelta(seconds=int(row['interval_seconds']))
        if schedule_type == 'daily':
            parsed_time = self._parse_daily_time(row['daily_time'])
            candidate = datetime.combine(reference.date(), parsed_time)
            if candidate <= reference:
                candidate += timedelta(days=1)
            return candidate
        raise ValueError(f"Unsupported schedule_type: {schedule_type}")

    def _parse_daily_time(self, raw_value: str) -> dt_time:
        parts = raw_value.split(':')
        if len(parts) not in (2, 3):
            raise ValueError("daily_time must be HH:MM or HH:MM:SS")
        hour = int(parts[0])
        minute = int(parts[1])
        second = int(parts[2]) if len(parts) == 3 else 0
        return dt_time(hour=hour, minute=minute, second=second)

    def _normalize_datetime(self, value: Optional[datetime]) -> Optional[datetime]:
        if value is None:
            return None
        if value.tzinfo is not None:
            return value.astimezone().replace(tzinfo=None)
        return value

    def _worker_loop(self):
        self._ensure_schema()

        while True:
            try:
                self._materialize_due_schedules()
                job_row = self._claim_next_due_job()
                if job_row:
                    self._execute_job(self._job_from_row(job_row))
                    continue
            except Exception:
                time.sleep(1.0)
                continue

            with self.condition:
                self.condition.wait(timeout=Config.QUEUE_POLL_SECONDS)

    def _materialize_due_schedules(self):
        now = datetime.now()
        grace_cutoff = now - timedelta(seconds=Config.SCHEDULE_GRACE_SECONDS)
        with self._get_engine().connect() as conn:
            due_rows = conn.execute(self._text(f"""
                SELECT *
                FROM {self._qualified_table(self.SCHEDULES_TABLE)}
                WHERE enabled = TRUE
                  AND next_run_at <= :now
                ORDER BY next_run_at, created_at
                FOR UPDATE
            """), {'now': now}).mappings().all()

            for row in due_rows:
                missed_run = row['next_run_at'] < grace_cutoff
                next_run_at = self._next_schedule_run_from_row(row, now)
                job_id = row['last_job_id']
                force_full = bool(row.get('initial_force_full', False))

                if not missed_run or row['catch_up_missed_runs']:
                    job_id = str(uuid.uuid4())
                    conn.execute(self._text(f"""
                        INSERT INTO {self._qualified_table(self.JOBS_TABLE)}
                            (job_id, schedule_id, created_at, run_at, tables_json, send_email, dry_run, read_limit, force_full, status)
                        VALUES
                            (
                                :job_id, :schedule_id, :created_at, :run_at, CAST(:tables_json AS jsonb),
                                :send_email, :dry_run, :read_limit, :force_full, 'queued'
                            )
                    """), {
                        'job_id': job_id,
                        'schedule_id': row['schedule_id'],
                        'created_at': now,
                        'run_at': now,
                        'tables_json': json.dumps(row['tables_json']) if row['tables_json'] is not None else None,
                        'send_email': row['send_email'],
                        'dry_run': row['dry_run'],
                        'read_limit': row['read_limit'],
                        'force_full': force_full,
                    })
                conn.execute(self._text(f"""
                    UPDATE {self._qualified_table(self.SCHEDULES_TABLE)}
                    SET updated_at = :updated_at,
                        last_enqueued_at = :last_enqueued_at,
                        last_job_id = :last_job_id,
                        initial_force_full = CASE
                            WHEN :reset_initial_force_full THEN FALSE
                            ELSE initial_force_full
                        END,
                        next_run_at = :next_run_at
                    WHERE schedule_id = :schedule_id
                """), {
                    'updated_at': now,
                    'last_enqueued_at': now if (not missed_run or row['catch_up_missed_runs']) else row['last_enqueued_at'],
                    'last_job_id': job_id,
                    'reset_initial_force_full': (not missed_run or row['catch_up_missed_runs']) and force_full,
                    'next_run_at': next_run_at,
                    'schedule_id': row['schedule_id'],
                })

            conn.commit()

        if due_rows:
            with self.condition:
                self.condition.notify_all()

    def _claim_next_due_job(self) -> Optional[Dict[str, Any]]:
        now = datetime.now()
        sql = self._text(f"""
            WITH next_job AS (
                SELECT job_id
                FROM {self._qualified_table(self.JOBS_TABLE)}
                WHERE status = 'queued'
                  AND run_at <= :now
                ORDER BY run_at, queue_sequence
                LIMIT 1
                FOR UPDATE SKIP LOCKED
            )
            UPDATE {self._qualified_table(self.JOBS_TABLE)} job
            SET status = 'running',
                started_at = :now,
                finished_at = NULL,
                error = NULL,
                report_json = NULL
            FROM next_job
            WHERE job.job_id = next_job.job_id
            RETURNING job.*
        """)
        with self._get_engine().connect() as conn:
            row = conn.execute(sql, {'now': now}).mappings().first()
            conn.commit()
        return row

    def _execute_job(self, job: MigrationJob):
        migrator = None
        try:
            from .migrator import DatabaseMigrator

            config = Config()
            migrator = DatabaseMigrator(config)
            report = migrator.run_migration(
                table_names=job.tables,
                send_email=False,
                dry_run=job.dry_run,
                read_limit=job.read_limit,
                force_full=job.force_full,
            )
            migrator.cleanup_old_logs(days_to_keep=7)
            self._finish_job(job.job_id, status='completed', report=report, error=None)
            if job.send_email:
                migrator.send_notification(report)
        except Exception as exc:
            self._finish_job(job.job_id, status='failed', report=None, error=str(exc))
            if job.send_email and migrator is not None:
                try:
                    migrator.send_failure_notification(
                        error=str(exc),
                        table_names=job.tables,
                        job_id=job.job_id,
                    )
                except Exception:
                    pass

    def _finish_job(
        self,
        job_id: str,
        status: str,
        report: Optional[Dict[str, Any]],
        error: Optional[str],
    ):
        with self._get_engine().connect() as conn:
            conn.execute(self._text(f"""
                UPDATE {self._qualified_table(self.JOBS_TABLE)}
                SET status = :status,
                    finished_at = :finished_at,
                    report_json = CAST(:report_json AS jsonb),
                    error = :error
                WHERE job_id = :job_id
            """), {
                'status': status,
                'finished_at': datetime.now(),
                'report_json': json.dumps(report) if report is not None else None,
                'error': error,
                'job_id': job_id,
            })
            conn.commit()

    def _job_from_row(self, row: Optional[Dict[str, Any]]) -> Optional[MigrationJob]:
        if row is None:
            return None

        tables = row['tables_json']
        if isinstance(tables, str):
            tables = json.loads(tables)
        report = row['report_json']
        if isinstance(report, str):
            report = json.loads(report)

        return MigrationJob(
            job_id=row['job_id'],
            created_at=row['created_at'],
            run_at=row['run_at'],
            tables=tables,
            send_email=row['send_email'],
            dry_run=row['dry_run'],
            read_limit=row['read_limit'],
            force_full=row.get('force_full', False),
            status=row['status'],
            started_at=row['started_at'],
            finished_at=row['finished_at'],
            report=report,
            error=row['error'],
            queue_sequence=row['queue_sequence'],
            schedule_id=row['schedule_id'],
        )

    def _schedule_from_row(self, row: Optional[Dict[str, Any]]) -> Optional[MigrationSchedule]:
        if row is None:
            return None

        tables = row['tables_json']
        if isinstance(tables, str):
            tables = json.loads(tables)

        return MigrationSchedule(
            schedule_id=row['schedule_id'],
            created_at=row['created_at'],
            updated_at=row['updated_at'],
            name=row['name'],
            schedule_type=row['schedule_type'],
            enabled=row['enabled'],
            catch_up_missed_runs=row['catch_up_missed_runs'],
            initial_force_full=row.get('initial_force_full', False),
            tables=tables,
            send_email=row['send_email'],
            dry_run=row['dry_run'],
            read_limit=row['read_limit'],
            interval_seconds=row['interval_seconds'],
            daily_time=row['daily_time'],
            start_at=row['start_at'],
            next_run_at=row['next_run_at'],
            last_enqueued_at=row['last_enqueued_at'],
            last_job_id=row['last_job_id'],
        )


migration_queue = MigrationJobQueue()