177 lines
7.1 KiB
Python
177 lines
7.1 KiB
Python
import re
|
|
import sys
|
|
|
|
def convert_mysql_to_pg(input_file, output_file):
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# 1. Pre-process: Remove comments and basic cleanup
|
|
lines = content.splitlines()
|
|
filtered_lines = []
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
if stripped.startswith('/*') or stripped.startswith('--') or stripped == '':
|
|
continue
|
|
if stripped.startswith('SET ') or stripped.startswith('LOCK TABLES') or stripped.startswith('UNLOCK TABLES'):
|
|
continue
|
|
|
|
filtered_lines.append(line)
|
|
|
|
content = '\n'.join(filtered_lines)
|
|
|
|
# 2. Global replacements (safe ones)
|
|
# Backticks to double quotes
|
|
content = content.replace('`', '"')
|
|
|
|
# 3. Line-by-line processing for schema definitions
|
|
lines = content.splitlines()
|
|
final_lines = []
|
|
|
|
current_table = None
|
|
deferred_indexes = []
|
|
deferred_fks = []
|
|
|
|
for line in lines:
|
|
stripped = line.strip()
|
|
|
|
# Track current table
|
|
table_match = re.match(r'CREATE TABLE "(\w+)"', stripped)
|
|
if table_match:
|
|
current_table = table_match.group(1)
|
|
|
|
# Identify if this line is likely a column definition
|
|
# It should start with whitespace and a quoted identifier
|
|
# And NOT be an INSERT statement
|
|
is_column_def = stripped.startswith('"') and 'INSERT INTO' not in line
|
|
|
|
if is_column_def:
|
|
# Data types
|
|
# tinyint(1) -> SMALLINT
|
|
line = re.sub(r'tinyint\(1\)', 'SMALLINT', line, flags=re.IGNORECASE)
|
|
# tinyint -> SMALLINT (catch-all for other widths or no width)
|
|
line = re.sub(r'\btinyint(\(\d+\))?', 'SMALLINT', line, flags=re.IGNORECASE)
|
|
|
|
line = re.sub(r'int\(\d+\)', 'INTEGER', line, flags=re.IGNORECASE)
|
|
# Standalone int -> INTEGER (only in column defs)
|
|
line = re.sub(r'\bint\b', 'INTEGER', line, flags=re.IGNORECASE)
|
|
# datetime -> TIMESTAMP
|
|
line = re.sub(r'\bdatetime\b', 'TIMESTAMP', line, flags=re.IGNORECASE)
|
|
|
|
# Varchar case
|
|
line = re.sub(r'varchar\(\d+\)', lambda m: m.group(0).upper(), line, flags=re.IGNORECASE)
|
|
|
|
# Remove MySQL specific column attributes
|
|
line = re.sub(r'\s+CHARACTER\s+SET\s+[\w]+', '', line, flags=re.IGNORECASE)
|
|
line = re.sub(r'\s+COLLATE\s+[\w]+', '', line, flags=re.IGNORECASE)
|
|
|
|
# AUTO_INCREMENT -> SERIAL
|
|
# Pattern: "id" INTEGER NOT NULL AUTO_INCREMENT
|
|
# We want: "id" SERIAL
|
|
if 'AUTO_INCREMENT' in line:
|
|
# Handle INTEGER
|
|
line = re.sub(r'("[\w]+")\s+INTEGER\s+NOT\s+NULL\s+AUTO_INCREMENT', r'\1 SERIAL', line, flags=re.IGNORECASE)
|
|
# Handle BIGINT
|
|
line = re.sub(r'("[\w]+")\s+bigint\s+NOT\s+NULL\s+AUTO_INCREMENT', r'\1 BIGSERIAL', line, flags=re.IGNORECASE)
|
|
# Remove AUTO_INCREMENT if still present (e.g. not matched above)
|
|
line = re.sub(r'\s+AUTO_INCREMENT', '', line, flags=re.IGNORECASE)
|
|
|
|
# Remove COMMENT
|
|
line = re.sub(r"\s+COMMENT\s+'[^']*'", "", line, flags=re.IGNORECASE)
|
|
|
|
# Remove ON UPDATE ...
|
|
line = re.sub(r'\s+ON\s+UPDATE\s+CURRENT_TIMESTAMP', '', line, flags=re.IGNORECASE)
|
|
|
|
# Handle Keys
|
|
# PRIMARY KEY is usually fine: PRIMARY KEY ("id")
|
|
|
|
# UNIQUE KEY "name" (...) -> CONSTRAINT "name" UNIQUE (...)
|
|
if 'UNIQUE KEY' in line:
|
|
line = re.sub(r'UNIQUE KEY\s+"(\w+)"\s+(\(.*\))', r'CONSTRAINT "\1" UNIQUE \2', line, flags=re.IGNORECASE)
|
|
|
|
# KEY "name" (...) -> Extract to CREATE INDEX (skip PRIMARY, UNIQUE, FOREIGN)
|
|
# MySQL: KEY "idx_name" ("col1", "col2")
|
|
# Postgres: CREATE INDEX "idx_name" ON "table_name" ("col1", "col2");
|
|
if re.search(r'^\s*KEY\s+"', line) and 'PRIMARY' not in line and 'UNIQUE' not in line and 'FOREIGN' not in line:
|
|
key_match = re.search(r'^\s*KEY\s+"(\w+)"\s+(\(.*\))', line)
|
|
if key_match and current_table:
|
|
idx_name = key_match.group(1)
|
|
idx_cols = key_match.group(2)
|
|
deferred_indexes.append(f'CREATE INDEX "{idx_name}" ON "{current_table}" {idx_cols};')
|
|
continue # Skip this line in CREATE TABLE
|
|
else:
|
|
# Fallback if regex fails, just comment it out to avoid syntax error
|
|
line = "-- " + line
|
|
|
|
# CREATE TABLE line cleanup
|
|
if stripped.startswith('CREATE TABLE'):
|
|
# usually fine, but check for modifiers?
|
|
pass
|
|
|
|
# Foreign Key Cleanup
|
|
if 'FOREIGN KEY' in line:
|
|
# Remove db.table references like "nex_docus"."users" -> "users"
|
|
line = re.sub(r'"[\w]+"\."([\w]+)"', r'"\1"', line)
|
|
# Fix "users" -> "sys_user" if applicable
|
|
line = line.replace('"users"', '"sys_user"')
|
|
|
|
# Fix sys_user PK reference (id -> user_id)
|
|
if 'REFERENCES "sys_user"' in line:
|
|
line = line.replace('("id")', '("user_id")')
|
|
|
|
# Extract CONSTRAINT definition to defer it
|
|
# Remove trailing comma
|
|
constraint_def = line.strip().rstrip(',')
|
|
if current_table:
|
|
deferred_fks.append(f'ALTER TABLE "{current_table}" ADD {constraint_def};')
|
|
continue # Skip adding to CREATE TABLE
|
|
|
|
# Remove USING BTREE
|
|
line = re.sub(r'\s+USING\s+BTREE', '', line, flags=re.IGNORECASE)
|
|
|
|
# End of table definition cleanup
|
|
if stripped.startswith(') ENGINE='):
|
|
line = ');'
|
|
elif stripped.startswith(') DEFAULT CHARSET='):
|
|
line = ');'
|
|
elif ') ENGINE=' in line:
|
|
line = re.sub(r'\)\s*ENGINE=[^;]+;', ');', line, flags=re.IGNORECASE)
|
|
|
|
# Global string escaping for INSERTs
|
|
if 'INSERT INTO' in line:
|
|
line = line.replace(r'\"', '"')
|
|
line = line.replace(r"\'", "''")
|
|
|
|
# Ensure json type is spaced (if json keyword appears)
|
|
if 'json' in line.lower() and is_column_def:
|
|
line = re.sub(r'\bjson\b', 'JSON', line, flags=re.IGNORECASE)
|
|
|
|
final_lines.append(line)
|
|
|
|
# Append deferred indexes
|
|
if deferred_indexes:
|
|
final_lines.append("\n-- Deferred Indexes")
|
|
final_lines.extend(deferred_indexes)
|
|
|
|
# Append deferred FKs
|
|
if deferred_fks:
|
|
final_lines.append("\n-- Deferred Foreign Keys")
|
|
final_lines.extend(deferred_fks)
|
|
|
|
content = '\n'.join(final_lines)
|
|
|
|
# Fix trailing commas before );
|
|
# Regex to find comma followed by newline and );
|
|
# Or just comma followed by whitespace and );
|
|
content = re.sub(r',\s*\);', ');', content)
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) != 3:
|
|
print("Usage: python convert_sql.py <input_file> <output_file>")
|
|
sys.exit(1)
|
|
|
|
convert_mysql_to_pg(sys.argv[1], sys.argv[2])
|