This commit is contained in:
brusnitsyn
2026-06-10 16:53:03 +09:00
parent ff810e59bc
commit b5d1f61a82
9 changed files with 640 additions and 151 deletions

View File

@@ -387,6 +387,13 @@ class MigrationJobQueue:
CREATE INDEX IF NOT EXISTS idx_{self.SCHEDULES_TABLE}_enabled_next_run_at
ON {self._qualified_table(self.SCHEDULES_TABLE)} (enabled, next_run_at)
"""))
conn.execute(self._text(f"""
UPDATE {self._qualified_table(self.JOBS_TABLE)}
SET status = 'failed',
finished_at = :now,
error = 'Прервано перезапуском воркера'
WHERE status = 'running'
"""), {'now': datetime.now()})
conn.commit()
self.schema_ready = True
@@ -477,6 +484,8 @@ class MigrationJobQueue:
self._execute_job(self._job_from_row(job_row))
continue
except Exception:
import logging
logging.getLogger(__name__).exception("Необработанная ошибка в _worker_loop")
time.sleep(1.0)
continue
@@ -577,7 +586,14 @@ class MigrationJobQueue:
return row
def _execute_job(self, job: MigrationJob):
import logging
logger = logging.getLogger(__name__)
migrator = None
final_status = 'failed'
final_report = None
final_error = 'Неизвестная ошибка'
try:
from .migrator import DatabaseMigrator
@@ -591,11 +607,14 @@ class MigrationJobQueue:
force_full=job.force_full,
)
migrator.cleanup_old_logs(days_to_keep=7)
self._finish_job(job.job_id, status='completed', report=report, error=None)
final_status = 'completed'
final_report = report
final_error = None
if job.send_email:
migrator.send_notification(report)
except Exception as exc:
self._finish_job(job.job_id, status='failed', report=None, error=str(exc))
final_error = str(exc)
logger.exception(f"Ошибка выполнения job {job.job_id}")
if job.send_email and migrator is not None:
try:
migrator.send_failure_notification(
@@ -605,6 +624,47 @@ class MigrationJobQueue:
)
except Exception:
pass
finally:
if migrator is not None:
try:
migrator.logger.close()
except Exception:
pass
try:
self._finish_job(
job.job_id,
status=final_status,
report=final_report,
error=final_error,
)
except Exception:
logger.exception(f"Не удалось финализировать job {job.job_id}, принудительный сброс")
self._force_fail_job(job.job_id, 'Ошибка финализации job')
def _force_fail_job(self, job_id: str, error: str):
"""Аварийная финализация job через новое соединение вне пула."""
try:
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool
engine = create_engine(Config.POSTGRES_CONNECTION_STRING, poolclass=NullPool)
try:
with engine.connect() as conn:
conn.execute(self._text(f"""
UPDATE {self._qualified_table(self.JOBS_TABLE)}
SET status = 'failed',
finished_at = :now,
error = :error
WHERE job_id = :job_id
"""), {'now': datetime.now(), 'error': error, 'job_id': job_id})
conn.commit()
finally:
engine.dispose()
except Exception:
import logging
logging.getLogger(__name__).exception(
f"Критическая ошибка: не удалось аварийно завершить job {job_id}"
)
def _finish_job(
self,