Небольшие изменения

This commit is contained in:
brusnitsyn
2026-05-29 06:47:29 +09:00
parent 7a13ff3b74
commit ff810e59bc
4 changed files with 284 additions and 88 deletions

View File

@@ -21,6 +21,8 @@ ENABLE_TIMESCALE=false
DRY_RUN=false DRY_RUN=false
READ_LIMIT=0 READ_LIMIT=0
CHUNK_SIZE=5000 CHUNK_SIZE=5000
INCREMENTAL_CHUNK_SIZE=5000
FULL_LOAD_CHUNK_SIZE=5000
WRITE_CHUNK_SIZE=5000 WRITE_CHUNK_SIZE=5000
CREATE_FOREIGN_KEYS=true CREATE_FOREIGN_KEYS=true
QUEUE_POLL_SECONDS=1 QUEUE_POLL_SECONDS=1

View File

@@ -100,6 +100,8 @@ class Config:
# Настройки миграции # Настройки миграции
CHUNK_SIZE = int(os.getenv('CHUNK_SIZE', '5000')) CHUNK_SIZE = int(os.getenv('CHUNK_SIZE', '5000'))
WRITE_CHUNK_SIZE = int(os.getenv('WRITE_CHUNK_SIZE', str(CHUNK_SIZE))) WRITE_CHUNK_SIZE = int(os.getenv('WRITE_CHUNK_SIZE', str(CHUNK_SIZE)))
INCREMENTAL_CHUNK_SIZE = int(os.getenv('INCREMENTAL_CHUNK_SIZE', str(CHUNK_SIZE * 2)))
FULL_LOAD_CHUNK_SIZE = int(os.getenv('FULL_LOAD_CHUNK_SIZE', str(CHUNK_SIZE * 4)))
BATCH_SIZE = 10 # Через сколько чанков выводить прогресс BATCH_SIZE = 10 # Через сколько чанков выводить прогресс
REPLICATOR_SCHEMA = os.getenv('REPLICATOR_SCHEMA', 'replicator') REPLICATOR_SCHEMA = os.getenv('REPLICATOR_SCHEMA', 'replicator')
STATE_TABLE = 'migration_state' STATE_TABLE = 'migration_state'

View File

@@ -37,15 +37,16 @@ class MigrationLogger:
def setup_logging(self): def setup_logging(self):
"""Настройка системы логирования""" """Настройка системы логирования"""
logging.basicConfig( fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
level=self.config.LOG_LEVEL, self.logger = logging.getLogger(f"migration.{self.timestamp}")
format='%(asctime)s - %(levelname)s - %(message)s', self.logger.setLevel(self.config.LOG_LEVEL)
handlers=[ self.logger.propagate = False
logging.FileHandler(self.log_file, encoding='utf-8'), file_handler = logging.FileHandler(self.log_file, encoding='utf-8')
logging.StreamHandler() # Вывод в консоль file_handler.setFormatter(fmt)
] stream_handler = logging.StreamHandler()
) stream_handler.setFormatter(fmt)
self.logger = logging.getLogger(__name__) self.logger.addHandler(file_handler)
self.logger.addHandler(stream_handler)
def log_info(self, message: str): def log_info(self, message: str):
"""Логирование информационного сообщения""" """Логирование информационного сообщения"""

View File

@@ -1,12 +1,15 @@
import os import os
import re import re
import time import time
import csv
import io
from datetime import datetime from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, List, Optional, Tuple
import pandas as pd import pandas as pd
from sqlalchemy import create_engine, inspect, text from sqlalchemy import create_engine, inspect, text
from sqlalchemy.exc import DBAPIError, OperationalError from sqlalchemy.exc import DBAPIError, OperationalError
from sqlalchemy.pool import NullPool
from sqlalchemy.sql import sqltypes from sqlalchemy.sql import sqltypes
from .config import Config, TableMigrationConfig from .config import Config, TableMigrationConfig
@@ -50,12 +53,8 @@ class DatabaseMigrator:
connect_args={ connect_args={
'charset': self.config.MSSQL_CHARSET, 'charset': self.config.MSSQL_CHARSET,
'login_timeout': self.config.MSSQL_CONNECT_TIMEOUT, 'login_timeout': self.config.MSSQL_CONNECT_TIMEOUT,
'timeout': self.config.MSSQL_CONNECT_TIMEOUT,
}, },
pool_pre_ping=True, poolclass=NullPool,
pool_recycle=self.config.MSSQL_POOL_RECYCLE,
pool_size=self.config.MSSQL_POOL_SIZE,
max_overflow=self.config.MSSQL_MAX_OVERFLOW,
) )
def reconnect_mssql_engine(self): def reconnect_mssql_engine(self):
@@ -72,10 +71,10 @@ class DatabaseMigrator:
def is_retryable_mssql_error(self, exception: Exception) -> bool: def is_retryable_mssql_error(self, exception: Exception) -> bool:
"""Определение временной ошибки MSSQL/pymssql, при которой есть смысл повторить таблицу.""" """Определение временной ошибки MSSQL/pymssql, при которой есть смысл повторить таблицу."""
if isinstance(exception, (OperationalError, DBAPIError)):
message = str(exception).lower() message = str(exception).lower()
retry_markers = ( retry_markers = (
'dbprocess is dead', 'dbprocess is dead',
'unexpected eof',
'adaptive server connection failed', 'adaptive server connection failed',
'server closed the connection unexpectedly', 'server closed the connection unexpectedly',
'connection reset', 'connection reset',
@@ -85,14 +84,32 @@ class DatabaseMigrator:
'closed connection', 'closed connection',
'not enabled', 'not enabled',
'08s01', '08s01',
'20002',
'20017',
) )
return any(marker in message for marker in retry_markers)
return False if any(marker in message for marker in retry_markers):
return True
return isinstance(exception, (OperationalError, DBAPIError))
def migrate_table_once(self, table_config: TableMigrationConfig, force_full: bool = False) -> bool: def migrate_table_once(self, table_config: TableMigrationConfig, force_full: bool = False) -> bool:
"""Один проход миграции таблицы без retry-обертки.""" """Один проход миграции таблицы без retry-обертки."""
if force_full: if force_full:
self.logger.log_info(f"Force full reload для таблицы {table_config.source_table}") self.logger.log_info(f"Force full reload для таблицы {table_config.source_table}")
is_initial_force_full = (
table_config.mode == 'incremental'
and table_config.initial_load_mode == 'full_then_incremental'
and not self.table_exists(table_config.pg_table)
and self.get_last_watermark(table_config.pg_table).get('last_x_datetime') is None
)
if is_initial_force_full:
self.logger.log_info(
f"Первый force_full для {table_config.source_table}: "
"загрузка выполняется без SQLAlchemy"
)
success = self.migrate_full_table_without_sqlalchemy(table_config)
else:
success = self.migrate_full_table(table_config) success = self.migrate_full_table(table_config)
if success and table_config.mode == 'incremental' and table_config.life_table: if success and table_config.mode == 'incremental' and table_config.life_table:
upper_bound = self.get_incremental_upper_bound(table_config) upper_bound = self.get_incremental_upper_bound(table_config)
@@ -453,7 +470,7 @@ class DatabaseMigrator:
sql, sql,
self.src_engine, self.src_engine,
params=params, params=params,
chunksize=self.config.CHUNK_SIZE, chunksize=self.config.INCREMENTAL_CHUNK_SIZE,
) )
def read_full_chunks( def read_full_chunks(
@@ -464,9 +481,9 @@ class DatabaseMigrator:
"""Чтение полной таблицы чанками с опциональным лимитом для проверки.""" """Чтение полной таблицы чанками с опциональным лимитом для проверки."""
if read_limit: if read_limit:
sql = text(f"SELECT TOP {int(read_limit)} * FROM {self.quote_mssql_identifier(table_name)}") sql = text(f"SELECT TOP {int(read_limit)} * FROM {self.quote_mssql_identifier(table_name)}")
return pd.read_sql_query(sql, self.src_engine, chunksize=self.config.CHUNK_SIZE) return pd.read_sql_query(sql, self.src_engine, chunksize=self.config.FULL_LOAD_CHUNK_SIZE)
return pd.read_sql_table(table_name, self.src_engine, chunksize=self.config.CHUNK_SIZE) return pd.read_sql_table(table_name, self.src_engine, chunksize=self.config.FULL_LOAD_CHUNK_SIZE)
def write_dataframe_batch( def write_dataframe_batch(
self, self,
@@ -488,6 +505,80 @@ class DatabaseMigrator:
method='multi', method='multi',
) )
def read_full_chunks_without_sqlalchemy(
self,
table_name: str,
read_limit: Optional[int] = None,
):
"""Чтение полной таблицы чанками через DBAPI-курсор (без pandas.read_sql_*)."""
src_connection = self.src_engine.raw_connection()
cursor = src_connection.cursor()
top_clause = f"TOP {int(read_limit)} " if read_limit else ""
sql = f"SELECT {top_clause}* FROM {self.quote_mssql_identifier(table_name)}"
try:
cursor.execute(sql)
columns = [column[0] for column in cursor.description]
while True:
rows = cursor.fetchmany(self.config.FULL_LOAD_CHUNK_SIZE)
if not rows:
break
yield pd.DataFrame.from_records(rows, columns=columns)
finally:
try:
cursor.close()
finally:
src_connection.close()
def write_dataframe_batch_without_sqlalchemy(
self,
chunk: pd.DataFrame,
table_name: str,
):
"""Batch-запись DataFrame в PostgreSQL через COPY (без pandas.to_sql)."""
if chunk.empty:
return
if self.config.DRY_RUN:
self.logger.log_info(f"DRY RUN: пропущена запись {len(chunk)} строк в {table_name}")
return
buffer = io.StringIO()
chunk.to_csv(
buffer,
index=False,
header=False,
sep='\t',
na_rep='\\N',
date_format='%Y-%m-%d %H:%M:%S.%f',
quoting=csv.QUOTE_MINIMAL,
escapechar='\\',
)
buffer.seek(0)
quoted_columns = ', '.join(
self.quote_identifier(column_name)
for column_name in chunk.columns
)
copy_sql = (
f"COPY {self.qualify_table_name(table_name)} ({quoted_columns}) "
"FROM STDIN WITH (FORMAT csv, DELIMITER E'\\t', NULL '\\N', ESCAPE '\\', QUOTE '\"')"
)
dst_connection = self.dst_engine.raw_connection()
cursor = dst_connection.cursor()
try:
cursor.copy_expert(copy_sql, buffer)
dst_connection.commit()
except Exception:
dst_connection.rollback()
raise
finally:
try:
cursor.close()
finally:
dst_connection.close()
def prepare_incremental_chunk( def prepare_incremental_chunk(
self, self,
chunk: pd.DataFrame, chunk: pd.DataFrame,
@@ -571,6 +662,7 @@ class DatabaseMigrator:
chunk: pd.DataFrame, chunk: pd.DataFrame,
table_name: str, table_name: str,
primary_key: List[str], primary_key: List[str],
staging_table: Optional[str] = None,
): ):
"""Batch delete в PostgreSQL через staging-таблицу с ключами.""" """Batch delete в PostgreSQL через staging-таблицу с ключами."""
if chunk.empty: if chunk.empty:
@@ -588,7 +680,8 @@ class DatabaseMigrator:
if missing_columns: if missing_columns:
raise ValueError(f"Для удаления из {table_name} не найдены ключевые поля: {missing_columns}") raise ValueError(f"Для удаления из {table_name} не найдены ключевые поля: {missing_columns}")
staging_table = f"_stg_delete_{table_name}_{int(time.time() * 1000)}" own_staging = staging_table is None
staging = staging_table or f"_stg_delete_{table_name}_{int(time.time() * 1000)}"
key_chunk = chunk[primary_key].drop_duplicates() key_chunk = chunk[primary_key].drop_duplicates()
join_condition = ' AND '.join([ join_condition = ' AND '.join([
f"target.{self.quote_identifier(column)} = source.{self.quote_identifier(column)}" f"target.{self.quote_identifier(column)} = source.{self.quote_identifier(column)}"
@@ -596,19 +689,30 @@ class DatabaseMigrator:
]) ])
try: try:
self.write_dataframe_batch(key_chunk, staging_table, if_exists='replace') if self.table_exists(staging):
with self.dst_engine.connect() as conn:
conn.execute(text(f'TRUNCATE TABLE {self.quote_identifier(staging)}'))
conn.commit()
else:
key_chunk.iloc[0:0].to_sql(staging, self.dst_engine, if_exists='replace', index=False)
with self.dst_engine.connect() as conn:
conn.execute(text(f'ALTER TABLE {self.quote_identifier(staging)} SET UNLOGGED'))
conn.commit()
self.write_dataframe_batch_without_sqlalchemy(key_chunk, staging)
sql = f""" sql = f"""
DELETE FROM {self.quote_identifier(table_name)} AS target DELETE FROM {self.quote_identifier(table_name)} AS target
USING {self.quote_identifier(staging_table)} AS source USING {self.quote_identifier(staging)} AS source
WHERE {join_condition} WHERE {join_condition}
""" """
with self.dst_engine.connect() as conn: with self.dst_engine.connect() as conn:
conn.execute(text(sql)) conn.execute(text(sql))
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging_table)}')) if own_staging:
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging)}'))
conn.commit() conn.commit()
except Exception: except Exception:
if own_staging:
with self.dst_engine.connect() as conn: with self.dst_engine.connect() as conn:
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging_table)}')) conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging)}'))
conn.commit() conn.commit()
raise raise
@@ -617,6 +721,7 @@ class DatabaseMigrator:
chunk: pd.DataFrame, chunk: pd.DataFrame,
table_name: str, table_name: str,
primary_key: List[str], primary_key: List[str],
staging_table: Optional[str] = None,
): ):
"""Batch upsert через staging-таблицу.""" """Batch upsert через staging-таблицу."""
if self.config.DRY_RUN: if self.config.DRY_RUN:
@@ -627,8 +732,8 @@ class DatabaseMigrator:
self.write_dataframe_batch(chunk, table_name, if_exists='append') self.write_dataframe_batch(chunk, table_name, if_exists='append')
return return
chunk = self.deduplicate_incremental_chunk(chunk, primary_key) own_staging = staging_table is None
staging_table = f"_stg_{table_name}_{int(time.time() * 1000)}" staging = staging_table or f"_stg_{table_name}_{int(time.time() * 1000)}"
columns = list(chunk.columns) columns = list(chunk.columns)
quoted_columns = ', '.join([self.quote_identifier(column) for column in columns]) quoted_columns = ', '.join([self.quote_identifier(column) for column in columns])
conflict_columns = ', '.join([self.quote_identifier(column) for column in primary_key]) conflict_columns = ', '.join([self.quote_identifier(column) for column in primary_key])
@@ -644,20 +749,31 @@ class DatabaseMigrator:
conflict_action = "DO NOTHING" conflict_action = "DO NOTHING"
try: try:
self.write_dataframe_batch(chunk, staging_table, if_exists='replace') if self.table_exists(staging):
with self.dst_engine.connect() as conn:
conn.execute(text(f'TRUNCATE TABLE {self.quote_identifier(staging)}'))
conn.commit()
else:
chunk.iloc[0:0].to_sql(staging, self.dst_engine, if_exists='replace', index=False)
with self.dst_engine.connect() as conn:
conn.execute(text(f'ALTER TABLE {self.quote_identifier(staging)} SET UNLOGGED'))
conn.commit()
self.write_dataframe_batch_without_sqlalchemy(chunk, staging)
sql = f""" sql = f"""
INSERT INTO {self.quote_identifier(table_name)} ({quoted_columns}) INSERT INTO {self.quote_identifier(table_name)} ({quoted_columns})
SELECT {quoted_columns} SELECT {quoted_columns}
FROM {self.quote_identifier(staging_table)} FROM {self.quote_identifier(staging)}
ON CONFLICT ({conflict_columns}) {conflict_action} ON CONFLICT ({conflict_columns}) {conflict_action}
""" """
with self.dst_engine.connect() as conn: with self.dst_engine.connect() as conn:
conn.execute(text(sql)) conn.execute(text(sql))
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging_table)}')) if own_staging:
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging)}'))
conn.commit() conn.commit()
except Exception: except Exception:
if own_staging:
with self.dst_engine.connect() as conn: with self.dst_engine.connect() as conn:
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging_table)}')) conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(staging)}'))
conn.commit() conn.commit()
raise raise
@@ -1040,10 +1156,6 @@ class DatabaseMigrator:
pk_columns = self.get_mssql_primary_key(table_name) pk_columns = self.get_mssql_primary_key(table_name)
foreign_keys = self.get_mssql_foreign_keys(table_name) foreign_keys = self.get_mssql_foreign_keys(table_name)
# Очищаем целевую таблицу
self.logger.log_info(f"Очистка целевой таблицы {pg_table}")
self.truncate_table(pg_table)
# Читаем данные # Читаем данные
self.logger.log_info(f"Чтение данных из {table_name}") self.logger.log_info(f"Чтение данных из {table_name}")
chunks = self.read_full_chunks(table_name, read_limit=read_limit) chunks = self.read_full_chunks(table_name, read_limit=read_limit)
@@ -1054,11 +1166,84 @@ class DatabaseMigrator:
for chunk_num, chunk in enumerate(chunks, 1): for chunk_num, chunk in enumerate(chunks, 1):
if first_chunk: if first_chunk:
self.write_dataframe_batch(chunk, pg_table, if_exists='fail') # Дропаем целевую таблицу только после успешного чтения первого чанка,
first_chunk = False # чтобы не уничтожить данные при недоступном MSSQL
self.logger.log_info(f"Очистка целевой таблицы {pg_table}")
self.truncate_table(pg_table)
chunk.iloc[0:0].to_sql(pg_table, self.dst_engine, if_exists='replace', index=False)
self.logger.log_info(f"Таблица {pg_table} создана") self.logger.log_info(f"Таблица {pg_table} создана")
else: first_chunk = False
self.write_dataframe_batch(chunk, pg_table, if_exists='append') self.write_dataframe_batch_without_sqlalchemy(chunk, pg_table)
total_rows += len(chunk)
if chunk_num % self.config.BATCH_SIZE == 0:
self.logger.log_progress(table_name, chunk_num, total_rows)
self.logger.log_info(f"Всего загружено строк: {total_rows}")
if total_rows > 0:
self.sync_target_schema(table_name, pg_table)
self.create_timescale_hypertable(table_config)
if self.can_create_primary_key(table_config, pk_columns):
self.logger.log_info(f"Создание первичного ключа для {pg_table}")
self.create_pg_primary_key(pg_table, pk_columns)
if indexes:
self.logger.log_info(f"Создание {len(indexes)} индексов для {pg_table}")
self.create_pg_indexes(pg_table, indexes)
if foreign_keys:
self.logger.log_info(f"Создание {len(foreign_keys)} внешних ключей для {pg_table}")
self.create_pg_foreign_keys(pg_table, foreign_keys)
self.logger.log_info(f"Обновление статистики для {pg_table}")
if total_rows > 1000000:
self.vacuum_analyze_table(pg_table)
else:
self.analyze_table(pg_table)
self.logger.log_table_success(table_name, total_rows)
return True
except Exception as e:
if self.is_retryable_mssql_error(e):
raise
self.logger.log_table_failure(table_name, str(e))
return False
def migrate_full_table_without_sqlalchemy(
self,
table_config: TableMigrationConfig,
read_limit: Optional[int] = None,
) -> bool:
"""Полная миграция одной таблицы без SQLAlchemy в этапе чтения/заливки данных."""
table_name = table_config.source_table
pg_table = table_config.pg_table
self.logger.log_table_start(table_name)
try:
indexes = self.get_mssql_indexes(table_name)
pk_columns = self.get_mssql_primary_key(table_name)
foreign_keys = self.get_mssql_foreign_keys(table_name)
self.logger.log_info(f"Чтение данных из {table_name} без SQLAlchemy")
chunks = self.read_full_chunks_without_sqlalchemy(table_name, read_limit=read_limit)
first_chunk = True
total_rows = 0
for chunk_num, chunk in enumerate(chunks, 1):
if first_chunk:
# Дропаем целевую таблицу только после успешного чтения первого чанка,
# чтобы не уничтожить данные при недоступном MSSQL
self.logger.log_info(f"Очистка целевой таблицы {pg_table}")
self.truncate_table(pg_table)
self.write_dataframe_batch(chunk.iloc[0:0], pg_table, if_exists='fail')
self.logger.log_info(f"Таблица {pg_table} создана")
first_chunk = False
self.write_dataframe_batch_without_sqlalchemy(chunk, pg_table)
total_rows += len(chunk) total_rows += len(chunk)
if chunk_num % self.config.BATCH_SIZE == 0: if chunk_num % self.config.BATCH_SIZE == 0:
@@ -1068,26 +1253,20 @@ class DatabaseMigrator:
if total_rows > 0: if total_rows > 0:
self.sync_target_schema(table_name, pg_table) self.sync_target_schema(table_name, pg_table)
if total_rows > 0:
self.create_timescale_hypertable(table_config) self.create_timescale_hypertable(table_config)
# Создаем первичный ключ
if self.can_create_primary_key(table_config, pk_columns): if self.can_create_primary_key(table_config, pk_columns):
self.logger.log_info(f"Создание первичного ключа для {pg_table}") self.logger.log_info(f"Создание первичного ключа для {pg_table}")
self.create_pg_primary_key(pg_table, pk_columns) self.create_pg_primary_key(pg_table, pk_columns)
# Создаем индексы
if indexes: if indexes:
self.logger.log_info(f"Создание {len(indexes)} индексов для {pg_table}") self.logger.log_info(f"Создание {len(indexes)} индексов для {pg_table}")
self.create_pg_indexes(pg_table, indexes) self.create_pg_indexes(pg_table, indexes)
# Создаем внешние ключи
if foreign_keys: if foreign_keys:
self.logger.log_info(f"Создание {len(foreign_keys)} внешних ключей для {pg_table}") self.logger.log_info(f"Создание {len(foreign_keys)} внешних ключей для {pg_table}")
self.create_pg_foreign_keys(pg_table, foreign_keys) self.create_pg_foreign_keys(pg_table, foreign_keys)
# Обновляем статистику
self.logger.log_info(f"Обновление статистики для {pg_table}") self.logger.log_info(f"Обновление статистики для {pg_table}")
if total_rows > 1000000: if total_rows > 1000000:
self.vacuum_analyze_table(pg_table) self.vacuum_analyze_table(pg_table)
@@ -1108,6 +1287,8 @@ class DatabaseMigrator:
table_name = table_config.source_table table_name = table_config.source_table
pg_table = table_config.pg_table pg_table = table_config.pg_table
self.logger.log_table_start(f"{table_name} ({table_config.read_table})") self.logger.log_table_start(f"{table_name} ({table_config.read_table})")
upsert_staging = f"_stg_upsert_{pg_table}"
delete_staging = f"_stg_delete_{pg_table}"
try: try:
if not table_config.life_table: if not table_config.life_table:
@@ -1188,18 +1369,19 @@ class DatabaseMigrator:
) )
if not delete_chunk.empty: if not delete_chunk.empty:
self.delete_dataframe_batch(delete_chunk, pg_table, table_config.primary_key) self.delete_dataframe_batch(delete_chunk, pg_table, table_config.primary_key, staging_table=delete_staging)
if write_chunk.empty: if write_chunk.empty:
pass pass
elif first_chunk: elif first_chunk:
self.write_dataframe_batch(write_chunk, pg_table, if_exists='append') write_chunk.iloc[0:0].to_sql(pg_table, self.dst_engine, if_exists='replace', index=False)
self.write_dataframe_batch_without_sqlalchemy(write_chunk, pg_table)
self.create_timescale_hypertable(table_config) self.create_timescale_hypertable(table_config)
if self.can_create_primary_key(table_config, table_config.primary_key): if self.can_create_primary_key(table_config, table_config.primary_key):
self.create_pg_primary_key(pg_table, table_config.primary_key) self.create_pg_primary_key(pg_table, table_config.primary_key)
first_chunk = False first_chunk = False
elif table_config.primary_key: elif table_config.primary_key:
self.upsert_dataframe_batch(write_chunk, pg_table, table_config.primary_key) self.upsert_dataframe_batch(write_chunk, pg_table, table_config.primary_key, staging_table=upsert_staging)
else: else:
self.write_dataframe_batch(write_chunk, pg_table, if_exists='append') self.write_dataframe_batch(write_chunk, pg_table, if_exists='append')
@@ -1260,6 +1442,15 @@ class DatabaseMigrator:
self.logger.log_table_failure(table_name, str(e)) self.logger.log_table_failure(table_name, str(e))
return False return False
finally:
try:
with self.dst_engine.connect() as conn:
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(upsert_staging)}'))
conn.execute(text(f'DROP TABLE IF EXISTS {self.quote_identifier(delete_staging)}'))
conn.commit()
except Exception as cleanup_error:
self.logger.log_warning(f"Не удалось очистить staging-таблицы для {pg_table}: {cleanup_error}")
def run_migration( def run_migration(
self, self,
table_names: Optional[List[str]] = None, table_names: Optional[List[str]] = None,