Skip to content
Snippets Groups Projects
Commit b9bbad52 authored by Jacob Vosmaer's avatar Jacob Vosmaer
Browse files

Integrate mysql-to-postgres conversion

parent bf1e976d
No related branches found
No related tags found
No related merge requests found
require 'yaml'
require 'open3'
 
module Backup
class Database
Loading
Loading
@@ -17,7 +18,7 @@ module Backup
FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
end
 
def dump
def dump(mysql_to_postgresql=false)
FileUtils.rm_f(db_file_name)
compress_rd, compress_wr = IO.pipe
compress_pid = spawn(*%W(gzip -c), in: compress_rd, out: [db_file_name, 'w', 0600])
Loading
Loading
@@ -26,7 +27,9 @@ module Backup
dump_pid = case config["adapter"]
when /^mysql/ then
$progress.print "Dumping MySQL database #{config['database']} ... "
spawn('mysqldump', *mysql_args, config['database'], *TABLES, out: compress_wr)
args = mysql_args
args << '--compatible=postgresql' if mysql_to_postgresql
spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
when "postgresql" then
$progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env
Loading
Loading
@@ -38,6 +41,42 @@ module Backup
 
report_success(success)
abort 'Backup failed' unless success
convert_to_postgresql if mysql_to_postgresql
end
def convert_to_postgresql
mysql_dump_gz = db_file_name + '.mysql'
psql_dump_gz = db_file_name + '.psql'
drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
File.rename(db_file_name, mysql_dump_gz)
$progress.print "Converting MySQL database dump to Postgres ... "
statuses = Open3.pipeline(
%W(gzip -cd #{mysql_dump_gz}),
%W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
%W(gzip -c),
out: [psql_dump_gz, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "mysql-to-postgresql-converter failed"
end
$progress.puts '[DONE]'.green
$progress.print "Splicing in 'DROP INDEX' statements ... "
statuses = Open3.pipeline(
%W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
%W(gzip -c),
out: [db_file_name, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "Failed to splice in 'DROP INDEXES' statements"
end
$progress.puts '[DONE]'.green
ensure
FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
end
 
def restore
Loading
Loading
Loading
Loading
@@ -35,17 +35,14 @@ First, dump your MySQL database in PostgreSQL-compatible format
 
Then, convert it using the dbconverter.py script.
 
python db_converter.py databasename.mysql databasename.psql
python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
 
It'll print progress to the terminal
 
Now we have a DB dump that can be imported but the dump will be slow due
to existing indexes. We use 'ed' to edit the DB dump file and move the
'DROP INDEX' statements to the start of the import. Ed is not the fastest
tool for this job if your DB dump is multiple gigabytes. (Patches to
the converter are welcome!)
to existing indexes.
 
ed -s databasename.psql < move_drop_indexes.ed
./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
 
Next, load your new dump into a fresh PostgreSQL database using:
 
Loading
Loading
Loading
Loading
@@ -15,7 +15,7 @@ import time
import subprocess
 
 
def parse(input_filename, output_filename):
def parse(input_filename, output_filename, drop_index_filename):
"Feed it a file, and it'll output a fixed one"
 
# State storage
Loading
Loading
@@ -44,6 +44,8 @@ def parse(input_filename, output_filename):
output = open(output_filename, "w")
logging = sys.stdout
 
drop_index = open(drop_index_filename, "w")
if input_filename == "-":
input_fh = sys.stdin
else:
Loading
Loading
@@ -234,12 +236,9 @@ def parse(input_filename, output_filename):
for line in sequence_lines:
output.write("%s;\n" % line)
 
# This line is an anchor for move_drop_indexes.ed
output.write("\n-- Drop indexes --\n")
drop_index.write("-- Drop indexes --\n")
for line in drop_index_lines:
output.write("%s;\n" % line)
# This line is an anchor for move_drop_indexes.ed
output.write("-- END Drop indexes --\n")
drop_index.write("%s;\n" % line)
 
# Write indexes out
output.write("\n-- Indexes --\n")
Loading
Loading
@@ -253,4 +252,4 @@ def parse(input_filename, output_filename):
 
 
if __name__ == "__main__":
parse(sys.argv[1], sys.argv[2])
parse(sys.argv[1], sys.argv[2], sys.argv[3])
#!/bin/sh
# This script reorders database dumps generated by db_converter.py for
# efficient consumption by Postgres.
fail() {
echo "$@" 1>2
exit 1
}
db_gz=$1
drop_indexes_sql=$2
if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
fail "Usage: $0 database.sql.gz drop_indexes.sql"
fi
# Capture all text up to the first occurence of 'SET CONSTRAINTS'
preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
if [ -z "$preamble" ] ; then
fail "Could not read preamble"
fi
drop_indexes=$(cat "$drop_indexes_sql")
if [ -z "$drop_indexes" ] ; then
fail "Could not read DROP INDEXES file"
fi
# Print preamble and drop indexes
cat <<EOF
${preamble}
${drop_indexes}
EOF
# Print the rest of database.sql.gz. I don't understand this awk script but it
# prints all lines after the first match of 'SET CONSTRAINTS'.
zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
Loading
Loading
@@ -3,13 +3,14 @@ namespace :backup do
desc "GITLAB | Create a backup of the GitLab CI database"
task create: :environment do
configure_cron_mode
mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
 
$progress.puts "Applying final database migrations ... ".blue
Rake::Task['db:migrate'].invoke
$progress.puts "done".green
 
$progress.puts "Dumping database ... ".blue
Backup::Database.new.dump
Backup::Database.new.dump(mysql_to_postgresql)
$progress.puts "done".green
 
$progress.puts "Dumping builds ... ".blue
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment