v2026.06.3
This commit is contained in:
@@ -11,9 +11,7 @@ RUN apk add --no-cache \
|
||||
freetds \
|
||||
freetds-dev \
|
||||
linux-headers \
|
||||
postgresql-dev \
|
||||
python3-dev \
|
||||
tzdata
|
||||
postgresql-dev
|
||||
|
||||
COPY req.txt .
|
||||
RUN pip install --upgrade pip \
|
||||
@@ -40,7 +38,9 @@ RUN pip install --upgrade pip \
|
||||
|
||||
COPY app ./app
|
||||
COPY main.py .
|
||||
COPY sql ./sql
|
||||
|
||||
# Фикс OpenSSL 3 совместимости с SQL Server 2016
|
||||
COPY docker/openssl.cnf /etc/ssl/openssl.cnf
|
||||
|
||||
RUN mkdir -p logs
|
||||
|
||||
|
||||
@@ -856,6 +856,20 @@ class DatabaseMigrator:
|
||||
else:
|
||||
conflict_action = "DO NOTHING"
|
||||
|
||||
try:
|
||||
target_types = {
|
||||
col['name']: str(col['type'])
|
||||
for col in inspect(self.dst_engine).get_columns(table_name)
|
||||
}
|
||||
except Exception:
|
||||
target_types = {}
|
||||
|
||||
select_expr = ', '.join(
|
||||
f'CAST({self.quote_identifier(col)} AS {target_types[col]})'
|
||||
if col in target_types else self.quote_identifier(col)
|
||||
for col in columns
|
||||
)
|
||||
|
||||
try:
|
||||
if self.table_exists(staging):
|
||||
with self.dst_engine.connect() as conn:
|
||||
@@ -869,7 +883,7 @@ class DatabaseMigrator:
|
||||
self.write_dataframe_batch_without_sqlalchemy(chunk, staging)
|
||||
sql = f"""
|
||||
INSERT INTO {self.quote_identifier(table_name)} ({quoted_columns})
|
||||
SELECT {quoted_columns}
|
||||
SELECT {select_expr}
|
||||
FROM {self.quote_identifier(staging)}
|
||||
ON CONFLICT ({conflict_columns}) {conflict_action}
|
||||
"""
|
||||
@@ -1403,9 +1417,37 @@ class DatabaseMigrator:
|
||||
self.logger.log_table_success(table_name, 0)
|
||||
return True
|
||||
|
||||
if (
|
||||
last_watermark['last_x_datetime'] is None
|
||||
and target_exists
|
||||
and table_config.primary_key
|
||||
and table_config.life_table
|
||||
):
|
||||
pg_count = self._get_pg_row_count(pg_table)
|
||||
if pg_count > 0:
|
||||
self.logger.log_info(
|
||||
f"Таблица {pg_table} содержит {pg_count} строк без watermark — "
|
||||
f"автоопределение watermark из {table_config.life_table}"
|
||||
)
|
||||
detected = self._get_watermark_for_pg_data(table_config, find_min=True)
|
||||
if detected['last_x_datetime'] is not None:
|
||||
self.save_watermark(
|
||||
pg_table,
|
||||
detected['last_x_datetime'],
|
||||
detected['last_sequence_value'],
|
||||
pg_count,
|
||||
'success',
|
||||
)
|
||||
last_watermark = detected
|
||||
self.logger.log_info(f"Watermark установлен автоматически: {last_watermark}")
|
||||
else:
|
||||
self.logger.log_warning(
|
||||
f"Не удалось определить watermark для {pg_table} из существующих данных, "
|
||||
f"начинаем инкрементальную миграцию с нуля"
|
||||
)
|
||||
|
||||
if (
|
||||
table_config.initial_load_mode == 'full_then_incremental'
|
||||
and not target_exists
|
||||
and last_watermark['last_x_datetime'] is None
|
||||
):
|
||||
self.logger.log_info(
|
||||
@@ -1641,11 +1683,11 @@ class DatabaseMigrator:
|
||||
return '0x' + value.hex()
|
||||
return "'" + str(value).replace("'", "''") + "'"
|
||||
|
||||
def _get_watermark_for_pg_data(self, table_config: TableMigrationConfig) -> Dict[str, Any]:
|
||||
def _get_watermark_for_pg_data(self, table_config: TableMigrationConfig, find_min: bool = False) -> Dict[str, Any]:
|
||||
"""Поиск watermark в MSSQL Life_ для строк, уже имеющихся в PostgreSQL.
|
||||
|
||||
Читает PK из PG потоком (без загрузки всей таблицы в память), батчами
|
||||
запрашивает MSSQL Life_ и находит максимальную (datetime, sequence) пару
|
||||
запрашивает MSSQL Life_ и находит минимальную или максимальную (datetime, sequence) пару
|
||||
среди событий, относящихся к уже реплицированным строкам.
|
||||
"""
|
||||
empty: Dict[str, Any] = {'last_x_datetime': None, 'last_sequence_value': None}
|
||||
@@ -1656,9 +1698,10 @@ class DatabaseMigrator:
|
||||
|
||||
LOOKUP_BATCH = 500
|
||||
|
||||
order_parts = [f"{self.quote_mssql_identifier(table_config.datetime_column)} DESC"]
|
||||
direction = 'ASC' if find_min else 'DESC'
|
||||
order_parts = [f"{self.quote_mssql_identifier(table_config.datetime_column)} {direction}"]
|
||||
if table_config.sequence_column:
|
||||
order_parts.append(f"{self.quote_mssql_identifier(table_config.sequence_column)} DESC")
|
||||
order_parts.append(f"{self.quote_mssql_identifier(table_config.sequence_column)} {direction}")
|
||||
order_clause = ', '.join(order_parts)
|
||||
|
||||
select_parts = [
|
||||
@@ -1722,15 +1765,18 @@ class DatabaseMigrator:
|
||||
batch_dt = row['max_dt']
|
||||
batch_seq = row['max_seq'] if table_config.sequence_column else None
|
||||
|
||||
if (
|
||||
is_better = (
|
||||
max_datetime is None
|
||||
or batch_dt > max_datetime
|
||||
or (
|
||||
batch_dt == max_datetime
|
||||
and batch_seq is not None
|
||||
and (max_sequence is None or batch_seq > max_sequence)
|
||||
)
|
||||
):
|
||||
or (find_min and (
|
||||
batch_dt < max_datetime
|
||||
or (batch_dt == max_datetime and batch_seq is not None and (max_sequence is None or batch_seq < max_sequence))
|
||||
))
|
||||
or (not find_min and (
|
||||
batch_dt > max_datetime
|
||||
or (batch_dt == max_datetime and batch_seq is not None and (max_sequence is None or batch_seq > max_sequence))
|
||||
))
|
||||
)
|
||||
if is_better:
|
||||
max_datetime = batch_dt
|
||||
max_sequence = batch_seq
|
||||
|
||||
@@ -1740,9 +1786,10 @@ class DatabaseMigrator:
|
||||
f"(строки {total_rows - len(batch) + 1}–{total_rows}): {e}"
|
||||
)
|
||||
|
||||
direction_label = 'минимальный' if find_min else 'максимальный'
|
||||
self.logger.log_info(
|
||||
f"Обработано {total_rows} PK из {table_config.pg_table} "
|
||||
f"в {batch_num} батчах, watermark: {max_datetime}"
|
||||
f"в {batch_num} батчах, {direction_label} watermark: {max_datetime}"
|
||||
)
|
||||
|
||||
return {'last_x_datetime': max_datetime, 'last_sequence_value': max_sequence}
|
||||
|
||||
@@ -494,7 +494,7 @@ class MigrationJobQueue:
|
||||
|
||||
def _materialize_due_schedules(self):
|
||||
now = datetime.now()
|
||||
grace_cutoff = now - timedelta(seconds=Config.SCHEDULE_GRACE_SECONDS)
|
||||
grace_cutoff = now - timedelta(seconds=max(Config.SCHEDULE_GRACE_SECONDS, Config.QUEUE_POLL_SECONDS))
|
||||
with self._get_engine().connect() as conn:
|
||||
due_rows = conn.execute(self._text(f"""
|
||||
SELECT *
|
||||
|
||||
400
docker/openssl.cnf
Normal file
400
docker/openssl.cnf
Normal file
@@ -0,0 +1,400 @@
|
||||
#
|
||||
# OpenSSL example configuration file.
|
||||
# See doc/man5/config.pod for more info.
|
||||
#
|
||||
# This is mostly being used for generation of certificate requests,
|
||||
# but may be used for auto loading of providers
|
||||
|
||||
# Note that you can include other files from the main configuration
|
||||
# file using the .include directive.
|
||||
#.include filename
|
||||
|
||||
# This definition stops the following lines choking if HOME isn't
|
||||
# defined.
|
||||
HOME = .
|
||||
|
||||
# Use this in order to automatically load providers.
|
||||
openssl_conf = default_conf
|
||||
|
||||
# Comment out the next line to ignore configuration errors
|
||||
config_diagnostics = 1
|
||||
|
||||
# Extra OBJECT IDENTIFIER info:
|
||||
# oid_file = $ENV::HOME/.oid
|
||||
oid_section = new_oids
|
||||
|
||||
# To use this configuration file with the "-extfile" option of the
|
||||
# "openssl x509" utility, name here the section containing the
|
||||
# X.509v3 extensions to use:
|
||||
# extensions =
|
||||
# (Alternatively, use a configuration file that has only
|
||||
# X.509v3 extensions in its main [= default] section.)
|
||||
|
||||
[ new_oids ]
|
||||
# We can add new OIDs in here for use by 'ca', 'req' and 'ts'.
|
||||
# Add a simple OID like this:
|
||||
# testoid1=1.2.3.4
|
||||
# Or use config file substitution like this:
|
||||
# testoid2=${testoid1}.5.6
|
||||
|
||||
# Policies used by the TSA examples.
|
||||
tsa_policy1 = 1.2.3.4.1
|
||||
tsa_policy2 = 1.2.3.4.5.6
|
||||
tsa_policy3 = 1.2.3.4.5.7
|
||||
|
||||
# For FIPS
|
||||
# Optionally include a file that is generated by the OpenSSL fipsinstall
|
||||
# application. This file contains configuration data required by the OpenSSL
|
||||
# fips provider. It contains a named section e.g. [fips_sect] which is
|
||||
# referenced from the [provider_sect] below.
|
||||
# Refer to the OpenSSL security policy for more information.
|
||||
# .include fipsmodule.cnf
|
||||
|
||||
[openssl_init]
|
||||
providers = provider_sect
|
||||
|
||||
# List of providers to load
|
||||
[provider_sect]
|
||||
default = default_sect
|
||||
# The fips section name should match the section name inside the
|
||||
# included fipsmodule.cnf.
|
||||
# fips = fips_sect
|
||||
|
||||
# If no providers are activated explicitly, the default one is activated implicitly.
|
||||
# See man 7 OSSL_PROVIDER-default for more details.
|
||||
#
|
||||
# If you add a section explicitly activating any other provider(s), you most
|
||||
# probably need to explicitly activate the default provider, otherwise it
|
||||
# becomes unavailable in openssl. As a consequence applications depending on
|
||||
# OpenSSL may not work correctly which could lead to significant system
|
||||
# problems including inability to remotely access the system.
|
||||
[default_sect]
|
||||
# activate = 1
|
||||
|
||||
|
||||
####################################################################
|
||||
[ ca ]
|
||||
default_ca = CA_default # The default ca section
|
||||
|
||||
####################################################################
|
||||
[ CA_default ]
|
||||
|
||||
dir = ./demoCA # Where everything is kept
|
||||
certs = $dir/certs # Where the issued certs are kept
|
||||
crl_dir = $dir/crl # Where the issued crl are kept
|
||||
database = $dir/index.txt # database index file.
|
||||
#unique_subject = no # Set to 'no' to allow creation of
|
||||
# several certs with same subject.
|
||||
new_certs_dir = $dir/newcerts # default place for new certs.
|
||||
|
||||
certificate = $dir/cacert.pem # The CA certificate
|
||||
serial = $dir/serial # The current serial number
|
||||
crlnumber = $dir/crlnumber # the current crl number
|
||||
# must be commented out to leave a V1 CRL
|
||||
crl = $dir/crl.pem # The current CRL
|
||||
private_key = $dir/private/cakey.pem # The private key
|
||||
|
||||
x509_extensions = usr_cert # The extensions to add to the cert
|
||||
|
||||
# Comment out the following two lines for the "traditional"
|
||||
# (and highly broken) format.
|
||||
name_opt = ca_default # Subject Name options
|
||||
cert_opt = ca_default # Certificate field options
|
||||
|
||||
# Extension copying option: use with caution.
|
||||
# copy_extensions = copy
|
||||
|
||||
# Extensions to add to a CRL. Note: Netscape communicator chokes on V2 CRLs
|
||||
# so this is commented out by default to leave a V1 CRL.
|
||||
# crlnumber must also be commented out to leave a V1 CRL.
|
||||
# crl_extensions = crl_ext
|
||||
|
||||
default_days = 365 # how long to certify for
|
||||
default_crl_days= 30 # how long before next CRL
|
||||
default_md = default # use public key default MD
|
||||
preserve = no # keep passed DN ordering
|
||||
|
||||
# A few difference way of specifying how similar the request should look
|
||||
# For type CA, the listed attributes must be the same, and the optional5A5A5A5A
|
||||
# and supplied fields are just that :-)
|
||||
policy = policy_match
|
||||
|
||||
# For the CA policy
|
||||
[ policy_match ]
|
||||
countryName = match
|
||||
stateOrProvinceName = match
|
||||
organizationName = match
|
||||
organizationalUnitName = optional
|
||||
commonName = supplied
|
||||
emailAddress = optional
|
||||
|
||||
# For the 'anything' policy
|
||||
# At this point in time, you must list all acceptable 'object'
|
||||
# types.
|
||||
[ policy_anything ]
|
||||
countryName = optional
|
||||
stateOrProvinceName = optional
|
||||
localityName = optional
|
||||
organizationName = optional
|
||||
organizationalUnitName = optional
|
||||
commonName = supplied
|
||||
emailAddress = optional
|
||||
|
||||
####################################################################
|
||||
[ req ]
|
||||
default_bits = 2048
|
||||
default_keyfile = privkey.pem
|
||||
distinguished_name = req_distinguished_name
|
||||
attributes = req_attributes
|
||||
x509_extensions = v3_ca # The extensions to add to the self signed cert
|
||||
|
||||
# Passwords for private keys if not present they will be prompted for
|
||||
# input_password = secret
|
||||
# output_password = secret
|
||||
|
||||
# This sets a mask for permitted string types. There are several options.
|
||||
# default: PrintableString, T61String, BMPString.
|
||||
# pkix : PrintableString, BMPString (PKIX recommendation before 2004)
|
||||
# utf8only: only UTF8Strings (PKIX recommendation after 2004).
|
||||
# nombstr : PrintableString, T61String (no BMPStrings or UTF8Strings).
|
||||
# MASK:XXXX a literal mask value.
|
||||
# WARNING: ancient versions of Netscape crash on BMPStrings or UTF8Strings.
|
||||
string_mask = utf8only
|
||||
|
||||
# req_extensions = v3_req # The extensions to add to a certificate request
|
||||
|
||||
[ req_distinguished_name ]
|
||||
countryName = Country Name (2 letter code)
|
||||
countryName_default = AU
|
||||
countryName_min = 2
|
||||
countryName_max = 2
|
||||
|
||||
stateOrProvinceName = State or Province Name (full name)
|
||||
stateOrProvinceName_default = Some-State
|
||||
|
||||
localityName = Locality Name (eg, city)
|
||||
|
||||
0.organizationName = Organization Name (eg, company)
|
||||
0.organizationName_default = Internet Widgits Pty Ltd
|
||||
|
||||
# we can do this but it is not needed normally :-)
|
||||
#1.organizationName = Second Organization Name (eg, company)
|
||||
#1.organizationName_default = World Wide Web Pty Ltd
|
||||
|
||||
organizationalUnitName = Organizational Unit Name (eg, section)
|
||||
#organizationalUnitName_default =
|
||||
|
||||
commonName = Common Name (e.g. server FQDN or YOUR name)
|
||||
commonName_max = 64
|
||||
|
||||
emailAddress = Email Address
|
||||
emailAddress_max = 64
|
||||
|
||||
# SET-ex3 = SET extension number 3
|
||||
|
||||
[ req_attributes ]
|
||||
challengePassword = A challenge password
|
||||
challengePassword_min = 4
|
||||
challengePassword_max = 20
|
||||
|
||||
unstructuredName = An optional company name
|
||||
|
||||
[ usr_cert ]
|
||||
|
||||
# These extensions are added when 'ca' signs a request.
|
||||
|
||||
# This goes against PKIX guidelines but some CAs do it and some software
|
||||
# requires this to avoid interpreting an end user certificate as a CA.
|
||||
|
||||
basicConstraints=CA:FALSE
|
||||
|
||||
# This is typical in keyUsage for a client certificate.
|
||||
# keyUsage = nonRepudiation, digitalSignature, keyEncipherment
|
||||
|
||||
# PKIX recommendations harmless if included in all certificates.
|
||||
subjectKeyIdentifier=hash
|
||||
authorityKeyIdentifier=keyid,issuer
|
||||
|
||||
# This stuff is for subjectAltName and issuerAltname.
|
||||
# Import the email address.
|
||||
# subjectAltName=email:copy
|
||||
# An alternative to produce certificates that aren't
|
||||
# deprecated according to PKIX.
|
||||
# subjectAltName=email:move
|
||||
|
||||
# Copy subject details
|
||||
# issuerAltName=issuer:copy
|
||||
|
||||
# This is required for TSA certificates.
|
||||
# extendedKeyUsage = critical,timeStamping
|
||||
|
||||
[ v3_req ]
|
||||
|
||||
# Extensions to add to a certificate request
|
||||
|
||||
basicConstraints = CA:FALSE
|
||||
keyUsage = nonRepudiation, digitalSignature, keyEncipherment
|
||||
|
||||
[ v3_ca ]
|
||||
|
||||
|
||||
# Extensions for a typical CA
|
||||
|
||||
|
||||
# PKIX recommendation.
|
||||
|
||||
subjectKeyIdentifier=hash
|
||||
|
||||
authorityKeyIdentifier=keyid:always,issuer
|
||||
|
||||
basicConstraints = critical,CA:true
|
||||
|
||||
# Key usage: this is typical for a CA certificate. However since it will
|
||||
# prevent it being used as an test self-signed certificate it is best
|
||||
# left out by default.
|
||||
# keyUsage = cRLSign, keyCertSign
|
||||
|
||||
# Include email address in subject alt name: another PKIX recommendation
|
||||
# subjectAltName=email:copy
|
||||
# Copy issuer details
|
||||
# issuerAltName=issuer:copy
|
||||
|
||||
# DER hex encoding of an extension: beware experts only!
|
||||
# obj=DER:02:03
|
||||
# Where 'obj' is a standard or added object
|
||||
# You can even override a supported extension:
|
||||
# basicConstraints= critical, DER:30:03:01:01:FF
|
||||
|
||||
[ crl_ext ]
|
||||
|
||||
# CRL extensions.
|
||||
# Only issuerAltName and authorityKeyIdentifier make any sense in a CRL.
|
||||
|
||||
# issuerAltName=issuer:copy
|
||||
authorityKeyIdentifier=keyid:always
|
||||
|
||||
[ proxy_cert_ext ]
|
||||
# These extensions should be added when creating a proxy certificate
|
||||
|
||||
# This goes against PKIX guidelines but some CAs do it and some software
|
||||
# requires this to avoid interpreting an end user certificate as a CA.
|
||||
|
||||
basicConstraints=CA:FALSE
|
||||
|
||||
# This is typical in keyUsage for a client certificate.
|
||||
# keyUsage = nonRepudiation, digitalSignature, keyEncipherment
|
||||
|
||||
# PKIX recommendations harmless if included in all certificates.
|
||||
subjectKeyIdentifier=hash
|
||||
authorityKeyIdentifier=keyid,issuer
|
||||
|
||||
# This stuff is for subjectAltName and issuerAltname.
|
||||
# Import the email address.
|
||||
# subjectAltName=email:copy
|
||||
# An alternative to produce certificates that aren't
|
||||
# deprecated according to PKIX.
|
||||
# subjectAltName=email:move
|
||||
|
||||
# Copy subject details
|
||||
# issuerAltName=issuer:copy
|
||||
|
||||
# This really needs to be in place for it to be a proxy certificate.
|
||||
proxyCertInfo=critical,language:id-ppl-anyLanguage,pathlen:3,policy:foo
|
||||
|
||||
####################################################################
|
||||
[ tsa ]
|
||||
|
||||
default_tsa = tsa_config1 # the default TSA section
|
||||
|
||||
[ tsa_config1 ]
|
||||
|
||||
# These are used by the TSA reply generation only.
|
||||
dir = ./demoCA # TSA root directory
|
||||
serial = $dir/tsaserial # The current serial number (mandatory)
|
||||
crypto_device = builtin # OpenSSL engine to use for signing
|
||||
signer_cert = $dir/tsacert.pem # The TSA signing certificate
|
||||
# (optional)
|
||||
certs = $dir/cacert.pem # Certificate chain to include in reply
|
||||
# (optional)
|
||||
signer_key = $dir/private/tsakey.pem # The TSA private key (optional)
|
||||
signer_digest = sha256 # Signing digest to use. (Optional)
|
||||
default_policy = tsa_policy1 # Policy if request did not specify it
|
||||
# (optional)
|
||||
other_policies = tsa_policy2, tsa_policy3 # acceptable policies (optional)
|
||||
digests = sha1, sha256, sha384, sha512 # Acceptable message digests (mandatory)
|
||||
accuracy = secs:1, millisecs:500, microsecs:100 # (optional)
|
||||
clock_precision_digits = 0 # number of digits after dot. (optional)
|
||||
ordering = yes # Is ordering defined for timestamps?
|
||||
# (optional, default: no)
|
||||
tsa_name = yes # Must the TSA name be included in the reply?
|
||||
# (optional, default: no)
|
||||
ess_cert_id_chain = no # Must the ESS cert id chain be included?
|
||||
# (optional, default: no)
|
||||
ess_cert_id_alg = sha256 # algorithm to compute certificate
|
||||
# identifier (optional, default: sha256)
|
||||
|
||||
[insta] # CMP using Insta Demo CA
|
||||
# Message transfer
|
||||
server = pki.certificate.fi:8700
|
||||
# proxy = # set this as far as needed, e.g., http://192.168.1.1:8080
|
||||
# tls_use = 0
|
||||
path = pkix/
|
||||
|
||||
# Server authentication
|
||||
recipient = "/C=FI/O=Insta Demo/CN=Insta Demo CA" # or set srvcert or issuer
|
||||
ignore_keyusage = 1 # quirk needed to accept Insta CA cert not including digitalsignature
|
||||
unprotected_errors = 1 # quirk needed to accept negative responses possibly not protected
|
||||
extracertsout = insta.extracerts.pem
|
||||
|
||||
# Client authentication
|
||||
ref = 3078 # user identification
|
||||
secret = pass:insta # can be used for both client and server side
|
||||
|
||||
# Generic message options
|
||||
cmd = ir # default operation, can be overridden on cmd line with, e.g., kur
|
||||
|
||||
# Certificate enrollment
|
||||
subject = "/CN=openssl-cmp-test"
|
||||
newkey = insta.priv.pem
|
||||
out_trusted = apps/insta.ca.crt # does not include keyUsage digitalSignature
|
||||
certout = insta.cert.pem
|
||||
|
||||
[pbm] # Password-based protection for Insta CA
|
||||
# Server and client authentication
|
||||
ref = $insta::ref # 3078
|
||||
secret = $insta::secret # pass:insta
|
||||
|
||||
[signature] # Signature-based protection for Insta CA
|
||||
# Server authentication
|
||||
trusted = $insta::out_trusted # apps/insta.ca.crt
|
||||
|
||||
# Client authentication
|
||||
secret = # disable PBM
|
||||
key = $insta::newkey # insta.priv.pem
|
||||
cert = $insta::certout # insta.cert.pem
|
||||
|
||||
[ir]
|
||||
cmd = ir
|
||||
|
||||
[cr]
|
||||
cmd = cr
|
||||
|
||||
[kur]
|
||||
# Certificate update
|
||||
cmd = kur"
|
||||
oldcert = $insta::certout # insta.cert.pem
|
||||
|
||||
[rr]
|
||||
# Certificate revocation
|
||||
cmd = rr
|
||||
oldcert = $insta::certout # insta.cert.pem
|
||||
|
||||
[default_conf]
|
||||
ssl_conf = ssl_sect
|
||||
|
||||
[ssl_sect]
|
||||
system_default = system_default_sect
|
||||
|
||||
[system_default_sect]
|
||||
MinProtocol = TLSv1.2
|
||||
CipherString = DEFAULT@SECLEVEL=0
|
||||
136
docs/replication.md
Normal file
136
docs/replication.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# Схемы репликации
|
||||
|
||||
## Конфигурация таблиц в БД
|
||||
|
||||
Хранится в `replicator.migration_tables`. Заполняется автоматически из `DEFAULT_TABLE_MIGRATIONS` при первом запуске, если таблица пуста.
|
||||
|
||||
### Поля таблицы `replicator.migration_tables`
|
||||
|
||||
| Поле | Тип | Описание |
|
||||
|---|---|---|
|
||||
| `source_table` | text PK | Имя таблицы в MSSQL |
|
||||
| `target_table` | text | Имя таблицы в PostgreSQL (если `NULL` — берётся `source_table`) |
|
||||
| `mode` | text | Схема репликации: `full` или `incremental` |
|
||||
| `initial_load_mode` | text | Режим первого запуска: `full_then_incremental` |
|
||||
| `life_table` | text | Имя Life_-таблицы в MSSQL (только для `incremental`) |
|
||||
| `datetime_column` | text | Колонка даты в Life_-таблице (обычно `x_DateTime`) |
|
||||
| `sequence_column` | text | Колонка последовательности в Life_-таблице (например `LPULifeID`) |
|
||||
| `order_columns_json` | jsonb | Порядок сортировки при инкрементальном чтении, например `["x_DateTime","LPULifeID"]` |
|
||||
| `operation_column` | text | Колонка типа операции в Life_-таблице (обычно `x_Operation`) |
|
||||
| `delete_operations_json` | jsonb | Значения operation_column, означающие удаление (по умолчанию `["d"]`) |
|
||||
| `upsert_operations_json` | jsonb | Значения operation_column, означающие вставку/обновление (по умолчанию `["i","u"]`) |
|
||||
| `primary_key_json` | jsonb | Первичный ключ целевой таблицы, например `["LPUID"]` |
|
||||
| `exclude_columns_json` | jsonb | Колонки Life_-таблицы, которые не нужно реплицировать (например служебные `x_DateTime`, `x_Operation`, `LPULifeID`) |
|
||||
| `timescale` | boolean | Использовать TimescaleDB hypertable |
|
||||
| `timescale_time_column` | text | Колонка времени для TimescaleDB |
|
||||
| `enabled` | boolean | Включена ли репликация данной таблицы |
|
||||
|
||||
### Состояние репликации `replicator.migration_state`
|
||||
|
||||
| Поле | Описание |
|
||||
|---|---|
|
||||
| `table_name` | Имя таблицы PostgreSQL |
|
||||
| `last_x_datetime` | Последняя обработанная дата из Life_ |
|
||||
| `last_sequence_value` | Последнее обработанное значение sequence |
|
||||
| `last_run_at` | Время последнего запуска |
|
||||
| `rows_copied` | Количество скопированных строк |
|
||||
| `status` | `success` / `failed` |
|
||||
| `error` | Текст ошибки если `status = failed` |
|
||||
|
||||
---
|
||||
|
||||
## Схема 1: `mode = 'full'`
|
||||
|
||||
Полная перезапись таблицы при каждом запуске. Нет watermark, нет состояния.
|
||||
|
||||
**Когда использовать:** небольшие справочники, которые меняются редко и не имеют Life_-таблицы.
|
||||
|
||||
**Пример записи в БД:**
|
||||
```sql
|
||||
INSERT INTO replicator.migration_tables (
|
||||
source_table, mode, initial_load_mode,
|
||||
datetime_column, enabled
|
||||
) VALUES (
|
||||
'oms_LPU', 'full', 'full_then_incremental',
|
||||
'x_DateTime', true
|
||||
);
|
||||
```
|
||||
|
||||
**Поведение каждого запуска:**
|
||||
1. Читает всю таблицу из MSSQL
|
||||
2. Перезаписывает в PostgreSQL
|
||||
|
||||
---
|
||||
|
||||
## Схема 2: `mode = 'incremental'`
|
||||
|
||||
Инкрементальная репликация через Life_-таблицу. Отслеживает watermark `(last_x_datetime, last_sequence_value)`.
|
||||
|
||||
**Когда использовать:** большие таблицы с журналом изменений (Life_).
|
||||
|
||||
**Пример записи в БД:**
|
||||
```sql
|
||||
INSERT INTO replicator.migration_tables (
|
||||
source_table, target_table, mode, initial_load_mode,
|
||||
life_table, datetime_column, sequence_column,
|
||||
order_columns_json, operation_column,
|
||||
delete_operations_json, upsert_operations_json,
|
||||
primary_key_json, exclude_columns_json,
|
||||
enabled
|
||||
) VALUES (
|
||||
'Oms_LPU', 'oms_lpu', 'incremental', 'full_then_incremental',
|
||||
'Life_oms_LPU', 'x_DateTime', 'LPULifeID',
|
||||
'["x_DateTime","LPULifeID"]', 'x_Operation',
|
||||
'["d"]', '["i","u"]',
|
||||
'["LPUID"]', '["LPULifeID","x_Operation","x_DateTime","x_Seance","x_User"]',
|
||||
true
|
||||
);
|
||||
```
|
||||
|
||||
### Поведение каждого запуска
|
||||
|
||||
```
|
||||
1. В Life_ нет новых данных (upper_bound = NULL)
|
||||
→ Ничего не делать, watermark сохраняется как есть
|
||||
|
||||
2. Watermark NULL + таблица существует с данными
|
||||
→ Автодетекция: ищет МИНИМАЛЬНЫЙ (x_DateTime, sequence)
|
||||
в Life_ для всех PK уже имеющихся в PostgreSQL
|
||||
→ Сохраняет как watermark
|
||||
→ Переходит к шагу 4
|
||||
|
||||
3. Watermark всё ещё NULL + initial_load_mode = 'full_then_incremental'
|
||||
→ Полная загрузка из source_table (не из Life_)
|
||||
→ После успеха сохраняет upper_bound как watermark
|
||||
|
||||
4. Watermark есть → инкрементальная миграция
|
||||
→ Читает из Life_: x_DateTime > watermark AND x_DateTime <= upper_bound
|
||||
→ Разбивает на upsert (операции из upsert_operations) и delete (из delete_operations)
|
||||
→ Применяет через staging-таблицу в PostgreSQL
|
||||
→ Обновляет watermark до максимального обработанного значения
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Принудительная перезагрузка (`force_full`)
|
||||
|
||||
Запускается через API или вручную. Игнорирует watermark.
|
||||
|
||||
| Условие | Поведение |
|
||||
|---|---|
|
||||
| Первый `force_full` для `full_then_incremental` таблицы без watermark и без целевой таблицы | Быстрая загрузка через `COPY` без SQLAlchemy |
|
||||
| Все остальные случаи | Обычная полная загрузка |
|
||||
| После успеха на `incremental` таблице | Сохраняет `upper_bound` как watermark |
|
||||
|
||||
---
|
||||
|
||||
## Сводная матрица поведения
|
||||
|
||||
| Ситуация | Поведение |
|
||||
|---|---|
|
||||
| `mode = full` | Всегда полная перезапись |
|
||||
| `mode = incremental`, нет данных в Life_ | Пропуск |
|
||||
| `mode = incremental`, нет watermark, таблица с данными | Автодетекция min watermark → инкрементальная |
|
||||
| `mode = incremental`, нет watermark, таблица пустая или отсутствует | Полная загрузка → watermark = upper_bound |
|
||||
| `mode = incremental`, watermark есть | Инкрементальная от watermark до upper_bound |
|
||||
| `force_full` | Полная перезагрузка → watermark = upper_bound |
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user