nex_basse/backend/scripts/convert_sql.py

177 lines
7.1 KiB
Python

import re
import sys
def convert_mysql_to_pg(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# 1. Pre-process: Remove comments and basic cleanup
lines = content.splitlines()
filtered_lines = []
for line in lines:
stripped = line.strip()
if stripped.startswith('/*') or stripped.startswith('--') or stripped == '':
continue
if stripped.startswith('SET ') or stripped.startswith('LOCK TABLES') or stripped.startswith('UNLOCK TABLES'):
continue
filtered_lines.append(line)
content = '\n'.join(filtered_lines)
# 2. Global replacements (safe ones)
# Backticks to double quotes
content = content.replace('`', '"')
# 3. Line-by-line processing for schema definitions
lines = content.splitlines()
final_lines = []
current_table = None
deferred_indexes = []
deferred_fks = []
for line in lines:
stripped = line.strip()
# Track current table
table_match = re.match(r'CREATE TABLE "(\w+)"', stripped)
if table_match:
current_table = table_match.group(1)
# Identify if this line is likely a column definition
# It should start with whitespace and a quoted identifier
# And NOT be an INSERT statement
is_column_def = stripped.startswith('"') and 'INSERT INTO' not in line
if is_column_def:
# Data types
# tinyint(1) -> SMALLINT
line = re.sub(r'tinyint\(1\)', 'SMALLINT', line, flags=re.IGNORECASE)
# tinyint -> SMALLINT (catch-all for other widths or no width)
line = re.sub(r'\btinyint(\(\d+\))?', 'SMALLINT', line, flags=re.IGNORECASE)
line = re.sub(r'int\(\d+\)', 'INTEGER', line, flags=re.IGNORECASE)
# Standalone int -> INTEGER (only in column defs)
line = re.sub(r'\bint\b', 'INTEGER', line, flags=re.IGNORECASE)
# datetime -> TIMESTAMP
line = re.sub(r'\bdatetime\b', 'TIMESTAMP', line, flags=re.IGNORECASE)
# Varchar case
line = re.sub(r'varchar\(\d+\)', lambda m: m.group(0).upper(), line, flags=re.IGNORECASE)
# Remove MySQL specific column attributes
line = re.sub(r'\s+CHARACTER\s+SET\s+[\w]+', '', line, flags=re.IGNORECASE)
line = re.sub(r'\s+COLLATE\s+[\w]+', '', line, flags=re.IGNORECASE)
# AUTO_INCREMENT -> SERIAL
# Pattern: "id" INTEGER NOT NULL AUTO_INCREMENT
# We want: "id" SERIAL
if 'AUTO_INCREMENT' in line:
# Handle INTEGER
line = re.sub(r'("[\w]+")\s+INTEGER\s+NOT\s+NULL\s+AUTO_INCREMENT', r'\1 SERIAL', line, flags=re.IGNORECASE)
# Handle BIGINT
line = re.sub(r'("[\w]+")\s+bigint\s+NOT\s+NULL\s+AUTO_INCREMENT', r'\1 BIGSERIAL', line, flags=re.IGNORECASE)
# Remove AUTO_INCREMENT if still present (e.g. not matched above)
line = re.sub(r'\s+AUTO_INCREMENT', '', line, flags=re.IGNORECASE)
# Remove COMMENT
line = re.sub(r"\s+COMMENT\s+'[^']*'", "", line, flags=re.IGNORECASE)
# Remove ON UPDATE ...
line = re.sub(r'\s+ON\s+UPDATE\s+CURRENT_TIMESTAMP', '', line, flags=re.IGNORECASE)
# Handle Keys
# PRIMARY KEY is usually fine: PRIMARY KEY ("id")
# UNIQUE KEY "name" (...) -> CONSTRAINT "name" UNIQUE (...)
if 'UNIQUE KEY' in line:
line = re.sub(r'UNIQUE KEY\s+"(\w+)"\s+(\(.*\))', r'CONSTRAINT "\1" UNIQUE \2', line, flags=re.IGNORECASE)
# KEY "name" (...) -> Extract to CREATE INDEX (skip PRIMARY, UNIQUE, FOREIGN)
# MySQL: KEY "idx_name" ("col1", "col2")
# Postgres: CREATE INDEX "idx_name" ON "table_name" ("col1", "col2");
if re.search(r'^\s*KEY\s+"', line) and 'PRIMARY' not in line and 'UNIQUE' not in line and 'FOREIGN' not in line:
key_match = re.search(r'^\s*KEY\s+"(\w+)"\s+(\(.*\))', line)
if key_match and current_table:
idx_name = key_match.group(1)
idx_cols = key_match.group(2)
deferred_indexes.append(f'CREATE INDEX "{idx_name}" ON "{current_table}" {idx_cols};')
continue # Skip this line in CREATE TABLE
else:
# Fallback if regex fails, just comment it out to avoid syntax error
line = "-- " + line
# CREATE TABLE line cleanup
if stripped.startswith('CREATE TABLE'):
# usually fine, but check for modifiers?
pass
# Foreign Key Cleanup
if 'FOREIGN KEY' in line:
# Remove db.table references like "nex_docus"."users" -> "users"
line = re.sub(r'"[\w]+"\."([\w]+)"', r'"\1"', line)
# Fix "users" -> "sys_user" if applicable
line = line.replace('"users"', '"sys_user"')
# Fix sys_user PK reference (id -> user_id)
if 'REFERENCES "sys_user"' in line:
line = line.replace('("id")', '("user_id")')
# Extract CONSTRAINT definition to defer it
# Remove trailing comma
constraint_def = line.strip().rstrip(',')
if current_table:
deferred_fks.append(f'ALTER TABLE "{current_table}" ADD {constraint_def};')
continue # Skip adding to CREATE TABLE
# Remove USING BTREE
line = re.sub(r'\s+USING\s+BTREE', '', line, flags=re.IGNORECASE)
# End of table definition cleanup
if stripped.startswith(') ENGINE='):
line = ');'
elif stripped.startswith(') DEFAULT CHARSET='):
line = ');'
elif ') ENGINE=' in line:
line = re.sub(r'\)\s*ENGINE=[^;]+;', ');', line, flags=re.IGNORECASE)
# Global string escaping for INSERTs
if 'INSERT INTO' in line:
line = line.replace(r'\"', '"')
line = line.replace(r"\'", "''")
# Ensure json type is spaced (if json keyword appears)
if 'json' in line.lower() and is_column_def:
line = re.sub(r'\bjson\b', 'JSON', line, flags=re.IGNORECASE)
final_lines.append(line)
# Append deferred indexes
if deferred_indexes:
final_lines.append("\n-- Deferred Indexes")
final_lines.extend(deferred_indexes)
# Append deferred FKs
if deferred_fks:
final_lines.append("\n-- Deferred Foreign Keys")
final_lines.extend(deferred_fks)
content = '\n'.join(final_lines)
# Fix trailing commas before );
# Regex to find comma followed by newline and );
# Or just comma followed by whitespace and );
content = re.sub(r',\s*\);', ');', content)
with open(output_file, 'w', encoding='utf-8') as f:
f.write(content)
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: python convert_sql.py <input_file> <output_file>")
sys.exit(1)
convert_mysql_to_pg(sys.argv[1], sys.argv[2])