Skip to content
Snippets Groups Projects
Commit d26b4fa5 authored by Marin Jankovski's avatar Marin Jankovski
Browse files

Merge branch 'export-script' into 'master'

Final export script



See merge request !251
parents e3041b90 caebd502
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -5,6 +5,15 @@
[![Dependency Status](https://gemnasium.com/gitlabhq/gitlab-ci.png)](https://gemnasium.com/gitlabhq/gitlab-ci)
[![Coverage Status](https://coveralls.io/repos/gitlabhq/gitlab-ci/badge.png?branch=master)](https://coveralls.io/r/gitlabhq/gitlab-ci)
 
## GitLab CI 8.0
GitLab CI is now integrated in GitLab. The last 'stand-alone' version of GitLab
CI was version 7.14. The sole purpose of GitLab CI 8.0 is to help you migrate
data from your existing (pre-8.0) CI server into GitLab 8.0.
The migration procedure is documented [in
GitLab](https://gitlab.com/gitlab-org/gitlab-ce/blob/8-0-stable/doc/migrate_ci_to_ce/README.md).
### Information
 
Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
\ No newline at end of file
Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
class MigrateCiTables < ActiveRecord::Migration
def up
rename_table :application_settings, :ci_application_settings
rename_table :builds, :ci_builds
rename_table :commits, :ci_commits
rename_table :events, :ci_events
rename_table :jobs, :ci_jobs
rename_table :projects, :ci_projects
rename_table :runner_projects, :ci_runner_projects
rename_table :runners, :ci_runners
rename_table :services, :ci_services
rename_table :tags, :ci_tags
rename_table :taggings, :ci_taggings
rename_table :trigger_requests, :ci_trigger_requests
rename_table :triggers, :ci_triggers
rename_table :variables, :ci_variables
rename_table :web_hooks, :ci_web_hooks
end
end
class RenameTaggingsIdx < ActiveRecord::Migration
def up
remove_index :ci_taggings, name: 'taggings_idx'
add_index :ci_taggings,
[:tag_id, :taggable_id, :taggable_type, :context, :tagger_id, :tagger_type],
unique: true, name: 'ci_taggings_idx'
end
end
Loading
Loading
@@ -11,19 +11,19 @@
#
# It's strongly recommended that you check this file into your version control system.
 
ActiveRecord::Schema.define(version: 20150824202238) do
ActiveRecord::Schema.define(version: 20150921081619) do
 
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
 
create_table "application_settings", force: true do |t|
create_table "ci_application_settings", force: true do |t|
t.boolean "all_broken_builds"
t.boolean "add_pusher"
t.datetime "created_at"
t.datetime "updated_at"
end
 
create_table "builds", force: true do |t|
create_table "ci_builds", force: true do |t|
t.integer "project_id"
t.string "status"
t.datetime "finished_at"
Loading
Loading
@@ -44,12 +44,12 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.integer "trigger_request_id"
end
 
add_index "builds", ["commit_id"], name: "index_builds_on_commit_id", using: :btree
add_index "builds", ["project_id", "commit_id"], name: "index_builds_on_project_id_and_commit_id", using: :btree
add_index "builds", ["project_id"], name: "index_builds_on_project_id", using: :btree
add_index "builds", ["runner_id"], name: "index_builds_on_runner_id", using: :btree
add_index "ci_builds", ["commit_id"], name: "index_ci_builds_on_commit_id", using: :btree
add_index "ci_builds", ["project_id", "commit_id"], name: "index_ci_builds_on_project_id_and_commit_id", using: :btree
add_index "ci_builds", ["project_id"], name: "index_ci_builds_on_project_id", using: :btree
add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree
 
create_table "commits", force: true do |t|
create_table "ci_commits", force: true do |t|
t.integer "project_id"
t.string "ref"
t.string "sha"
Loading
Loading
@@ -62,13 +62,13 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.datetime "committed_at"
end
 
add_index "commits", ["project_id", "committed_at", "id"], name: "index_commits_on_project_id_and_committed_at_and_id", using: :btree
add_index "commits", ["project_id", "committed_at"], name: "index_commits_on_project_id_and_committed_at", using: :btree
add_index "commits", ["project_id", "sha"], name: "index_commits_on_project_id_and_sha", using: :btree
add_index "commits", ["project_id"], name: "index_commits_on_project_id", using: :btree
add_index "commits", ["sha"], name: "index_commits_on_sha", using: :btree
add_index "ci_commits", ["project_id", "committed_at", "id"], name: "index_ci_commits_on_project_id_and_committed_at_and_id", using: :btree
add_index "ci_commits", ["project_id", "committed_at"], name: "index_ci_commits_on_project_id_and_committed_at", using: :btree
add_index "ci_commits", ["project_id", "sha"], name: "index_ci_commits_on_project_id_and_sha", using: :btree
add_index "ci_commits", ["project_id"], name: "index_ci_commits_on_project_id", using: :btree
add_index "ci_commits", ["sha"], name: "index_ci_commits_on_sha", using: :btree
 
create_table "events", force: true do |t|
create_table "ci_events", force: true do |t|
t.integer "project_id"
t.integer "user_id"
t.integer "is_admin"
Loading
Loading
@@ -77,11 +77,11 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.datetime "updated_at"
end
 
add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree
add_index "events", ["is_admin"], name: "index_events_on_is_admin", using: :btree
add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree
add_index "ci_events", ["created_at"], name: "index_ci_events_on_created_at", using: :btree
add_index "ci_events", ["is_admin"], name: "index_ci_events_on_is_admin", using: :btree
add_index "ci_events", ["project_id"], name: "index_ci_events_on_project_id", using: :btree
 
create_table "jobs", force: true do |t|
create_table "ci_jobs", force: true do |t|
t.integer "project_id", null: false
t.text "commands"
t.boolean "active", default: true, null: false
Loading
Loading
@@ -95,10 +95,10 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.datetime "deleted_at"
end
 
add_index "jobs", ["deleted_at"], name: "index_jobs_on_deleted_at", using: :btree
add_index "jobs", ["project_id"], name: "index_jobs_on_project_id", using: :btree
add_index "ci_jobs", ["deleted_at"], name: "index_ci_jobs_on_deleted_at", using: :btree
add_index "ci_jobs", ["project_id"], name: "index_ci_jobs_on_project_id", using: :btree
 
create_table "projects", force: true do |t|
create_table "ci_projects", force: true do |t|
t.string "name", null: false
t.integer "timeout", default: 3600, null: false
t.datetime "created_at"
Loading
Loading
@@ -121,17 +121,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.text "generated_yaml_config"
end
 
create_table "runner_projects", force: true do |t|
create_table "ci_runner_projects", force: true do |t|
t.integer "runner_id", null: false
t.integer "project_id", null: false
t.datetime "created_at"
t.datetime "updated_at"
end
 
add_index "runner_projects", ["project_id"], name: "index_runner_projects_on_project_id", using: :btree
add_index "runner_projects", ["runner_id"], name: "index_runner_projects_on_runner_id", using: :btree
add_index "ci_runner_projects", ["project_id"], name: "index_ci_runner_projects_on_project_id", using: :btree
add_index "ci_runner_projects", ["runner_id"], name: "index_ci_runner_projects_on_runner_id", using: :btree
 
create_table "runners", force: true do |t|
create_table "ci_runners", force: true do |t|
t.string "token"
t.datetime "created_at"
t.datetime "updated_at"
Loading
Loading
@@ -146,7 +146,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.string "architecture"
end
 
create_table "services", force: true do |t|
create_table "ci_services", force: true do |t|
t.string "type"
t.string "title"
t.integer "project_id", null: false
Loading
Loading
@@ -156,19 +156,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.text "properties"
end
 
add_index "services", ["project_id"], name: "index_services_on_project_id", using: :btree
add_index "ci_services", ["project_id"], name: "index_ci_services_on_project_id", using: :btree
 
create_table "sessions", force: true do |t|
t.string "session_id", null: false
t.text "data"
t.datetime "created_at"
t.datetime "updated_at"
end
add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
create_table "taggings", force: true do |t|
create_table "ci_taggings", force: true do |t|
t.integer "tag_id"
t.integer "taggable_id"
t.string "taggable_type"
Loading
Loading
@@ -178,17 +168,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.datetime "created_at"
end
 
add_index "taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "taggings_idx", unique: true, using: :btree
add_index "taggings", ["taggable_id", "taggable_type", "context"], name: "index_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
add_index "ci_taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "ci_taggings_idx", unique: true, using: :btree
add_index "ci_taggings", ["taggable_id", "taggable_type", "context"], name: "index_ci_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
 
create_table "tags", force: true do |t|
create_table "ci_tags", force: true do |t|
t.string "name"
t.integer "taggings_count", default: 0
end
 
add_index "tags", ["name"], name: "index_tags_on_name", unique: true, using: :btree
add_index "ci_tags", ["name"], name: "index_ci_tags_on_name", unique: true, using: :btree
 
create_table "trigger_requests", force: true do |t|
create_table "ci_trigger_requests", force: true do |t|
t.integer "trigger_id", null: false
t.text "variables"
t.datetime "created_at"
Loading
Loading
@@ -196,7 +186,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.integer "commit_id"
end
 
create_table "triggers", force: true do |t|
create_table "ci_triggers", force: true do |t|
t.string "token"
t.integer "project_id", null: false
t.datetime "deleted_at"
Loading
Loading
@@ -204,9 +194,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.datetime "updated_at"
end
 
add_index "triggers", ["deleted_at"], name: "index_triggers_on_deleted_at", using: :btree
add_index "ci_triggers", ["deleted_at"], name: "index_ci_triggers_on_deleted_at", using: :btree
 
create_table "variables", force: true do |t|
create_table "ci_variables", force: true do |t|
t.integer "project_id", null: false
t.string "key"
t.text "value"
Loading
Loading
@@ -215,13 +205,23 @@ ActiveRecord::Schema.define(version: 20150824202238) do
t.string "encrypted_value_iv"
end
 
add_index "variables", ["project_id"], name: "index_variables_on_project_id", using: :btree
add_index "ci_variables", ["project_id"], name: "index_ci_variables_on_project_id", using: :btree
 
create_table "web_hooks", force: true do |t|
create_table "ci_web_hooks", force: true do |t|
t.string "url", null: false
t.integer "project_id", null: false
t.datetime "created_at"
t.datetime "updated_at"
end
 
create_table "sessions", force: true do |t|
t.string "session_id", null: false
t.text "data"
t.datetime "created_at"
t.datetime "updated_at"
end
add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
end
module Backup
class Builds
attr_reader :app_builds_dir, :backup_builds_dir, :backup_dir
attr_reader :app_builds_dir, :backup_builds_tarball, :backup_dir
 
def initialize
@app_builds_dir = File.realpath(Rails.root.join('builds'))
@backup_dir = GitlabCi.config.backup.path
@backup_builds_dir = File.join(GitlabCi.config.backup.path, 'builds')
@backup_builds_tarball = File.join(GitlabCi.config.backup.path, 'builds/builds.tar.gz')
end
 
# Copy builds from builds directory to backup/builds
def dump
FileUtils.mkdir_p(backup_builds_dir)
FileUtils.cp_r(app_builds_dir, backup_dir)
FileUtils.mkdir_p(File.dirname(backup_builds_tarball))
FileUtils.rm_f(backup_builds_tarball)
# Use 'tar -czf -' instead of 'tar -cz' because on some systems the
# default behavior of tar is to talk to a tape device instead of
# stdin/stdout.
system(
*%W(tar -C #{app_builds_dir} -czf - -- .),
out: [backup_builds_tarball, 'w', 0600]
)
end
 
def restore
backup_existing_builds_dir
Dir.mkdir(app_builds_dir, 0700)
 
FileUtils.cp_r(backup_builds_dir, app_builds_dir)
# Use 'tar -xzf -' instead of 'tar -xz' because on some systems the
# default behavior of tar is to talk to a tape device instead of
# stdin/stdout.
system(
*%W(tar -C #{app_builds_dir} -xzf - -- .),
in: backup_builds_tarball
)
end
 
def backup_existing_builds_dir
Loading
Loading
require 'yaml'
require 'open3'
 
module Backup
class Database
# These are the final CI tables (final prior to integration in GitLab)
TABLES = %w{
ci_application_settings ci_builds ci_commits ci_events ci_jobs ci_projects
ci_runner_projects ci_runners ci_services ci_tags ci_taggings ci_trigger_requests
ci_triggers ci_variables ci_web_hooks
}
attr_reader :config, :db_dir
 
def initialize
Loading
Loading
@@ -10,34 +18,86 @@ module Backup
FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
end
 
def dump
success = case config["adapter"]
def dump(mysql_to_postgresql=false)
FileUtils.rm_f(db_file_name)
compress_rd, compress_wr = IO.pipe
compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600])
compress_rd.close
dump_pid = case config["adapter"]
when /^mysql/ then
$progress.print "Dumping MySQL database #{config['database']} ... "
system('mysqldump', *mysql_args, config['database'], out: db_file_name)
args = mysql_args
args << '--compatible=postgresql' if mysql_to_postgresql
spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
when "postgresql" then
$progress.print "Dumping PostgreSQL database #{config['database']} ... "
pg_env
system('pg_dump', config['database'], out: db_file_name)
spawn('pg_dump', '--clean', *TABLES.map { |t| "--table=#{t}" }, config['database'], out: compress_wr)
end
compress_wr.close
success = [compress_pid, dump_pid].all? { |pid| Process.waitpid(pid); $?.success? }
report_success(success)
abort 'Backup failed' unless success
convert_to_postgresql if mysql_to_postgresql
end
def convert_to_postgresql
mysql_dump_gz = db_file_name + '.mysql'
psql_dump_gz = db_file_name + '.psql'
drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
File.rename(db_file_name, mysql_dump_gz)
$progress.print "Converting MySQL database dump to Postgres ... "
statuses = Open3.pipeline(
%W(gzip -cd #{mysql_dump_gz}),
%W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
%W(gzip -1 -c),
out: [psql_dump_gz, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "mysql-to-postgresql-converter failed"
end
$progress.puts '[DONE]'.green
FileUtils.rm_f(mysql_dump_gz) # save disk space during conversion
$progress.print "Splicing in 'DROP INDEX' statements ... "
statuses = Open3.pipeline(
%W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
%W(gzip -1 -c),
out: [db_file_name, 'w', 0600]
)
if !statuses.compact.all?(&:success?)
abort "Failed to splice in 'DROP INDEXES' statements"
end
$progress.puts '[DONE]'.green
ensure
FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
end
 
def restore
success = case config["adapter"]
decompress_rd, decompress_wr = IO.pipe
decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name)
decompress_wr.close
restore_pid = case config["adapter"]
when /^mysql/ then
$progress.print "Restoring MySQL database #{config['database']} ... "
system('mysql', *mysql_args, config['database'], in: db_file_name)
spawn('mysql', *mysql_args, config['database'], in: decompress_rd)
when "postgresql" then
$progress.print "Restoring PostgreSQL database #{config['database']} ... "
# Drop all tables because PostgreSQL DB dumps do not contain DROP TABLE
# statements like MySQL.
drop_all_tables
drop_all_postgres_sequences
pg_env
system('psql', config['database'], '-f', db_file_name)
spawn('psql', config['database'], in: decompress_rd)
end
decompress_rd.close
success = [decompress_pid, restore_pid].all? { |pid| Process.waitpid(pid); $?.success? }
report_success(success)
abort 'Restore failed' unless success
end
Loading
Loading
@@ -45,7 +105,7 @@ module Backup
protected
 
def db_file_name
File.join(db_dir, 'database.sql')
File.join(db_dir, 'database.sql.gz')
end
 
def mysql_args
Loading
Loading
@@ -74,19 +134,5 @@ module Backup
$progress.puts '[FAILED]'.red
end
end
def drop_all_tables
connection = ActiveRecord::Base.connection
connection.tables.each do |table|
connection.drop_table(table)
end
end
def drop_all_postgres_sequences
connection = ActiveRecord::Base.connection
connection.execute("SELECT c.relname FROM pg_class c WHERE c.relkind = 'S';").each do |sequence|
connection.execute("DROP SEQUENCE #{sequence['relname']}")
end
end
end
end
Loading
Loading
@@ -7,7 +7,7 @@ module Backup
s[:backup_created_at] = Time.now
s[:gitlab_version] = GitlabCi::VERSION
s[:tar_version] = tar_version
tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar.gz"
tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar"
 
Dir.chdir(GitlabCi.config.backup.path) do
File.open("#{GitlabCi.config.backup.path}/backup_information.yml",
Loading
Loading
@@ -20,7 +20,7 @@ module Backup
# create archive
$progress.print "Creating backup archive: #{tar_file} ... "
orig_umask = File.umask(0077)
if Kernel.system('tar', '-czf', tar_file, *backup_contents)
if Kernel.system('tar', '-cf', tar_file, *backup_contents)
$progress.puts "done".green
else
puts "creating archive #{tar_file} failed".red
Loading
Loading
@@ -78,11 +78,11 @@ module Backup
removed = 0
Dir.chdir(GitlabCi.config.backup.path) do
file_list = Dir.glob('*_gitlab_ci_backup.tar.gz')
file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar.gz/ }
file_list = Dir.glob('*_gitlab_ci_backup.tar')
file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar/ }
file_list.sort.each do |timestamp|
if Time.at(timestamp) < (Time.now - keep_time)
if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar.gz))
if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar))
removed += 1
end
end
Loading
Loading
@@ -99,7 +99,7 @@ module Backup
Dir.chdir(GitlabCi.config.backup.path)
 
# check for existing backups in the backup dir
file_list = Dir.glob("*_gitlab_ci_backup.tar.gz").each.map { |f| f.split(/_/).first.to_i }
file_list = Dir.glob("*_gitlab_ci_backup.tar").each.map { |f| f.split(/_/).first.to_i }
puts "no backups found" if file_list.count == 0
 
if file_list.count > 1 && ENV["BACKUP"].nil?
Loading
Loading
@@ -108,7 +108,7 @@ module Backup
exit 1
end
 
tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar.gz") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar.gz")
tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar")
 
unless File.exists?(tar_file)
puts "The specified backup doesn't exist!"
Loading
Loading
@@ -117,7 +117,7 @@ module Backup
 
$progress.print "Unpacking backup ... "
 
unless Kernel.system(*%W(tar -xzf #{tar_file}))
unless Kernel.system(*%W(tar -xf #{tar_file}))
puts "unpacking backup failed".red
exit 1
else
Loading
Loading
Copyright (c) 2012 Lanyrd Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
MySQL to PostgreSQL Converter
=============================
Lanyrd's MySQL to PostgreSQL conversion script. Use with care.
This script was designed for our specific database and column requirements -
notably, it doubles the lengths of VARCHARs due to a unicode size problem we
had, places indexes on all foreign keys, and presumes you're using Django
for column typing purposes.
GitLab-specific changes
-----------------------
The `gitlab` branch of this fork contains the following changes made for
GitLab.
- Guard against replacing '0000-00-00 00:00:00' inside SQL text fields.
- Replace all MySQL zero-byte string literals `\0`. This is safe as of GitLab
6.8 because the GitLab database schema contains no binary columns.
- Never set 'NOT NULL' constraints on datetimes.
- Drop sequences before creating them.
- Preserve default values of boolean (originally `tinyint(1)`) columns.
- Import all indexes.
- Import index names.
- Drop tables before creating.
- Drop indexes before creating.
How to use
----------
First, dump your MySQL database in PostgreSQL-compatible format
mysqldump --compatible=postgresql --default-character-set=utf8 \
-r databasename.mysql -u root gitlabhq_production -p
Then, convert it using the dbconverter.py script.
python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
It'll print progress to the terminal
Now we have a DB dump that can be imported but the dump will be slow due
to existing indexes.
./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
Next, load your new dump into a fresh PostgreSQL database using:
`psql -f databasename.psql -d gitlabhq_production`
More information
----------------
You can learn more about the move which this powered at http://lanyrd.com/blog/2012/lanyrds-big-move/ and some technical details of it at http://www.aeracode.org/2012/11/13/one-change-not-enough/.
#!/usr/bin/env python
"""
Fixes a MySQL dump made with the right format so it can be directly
imported to a new PostgreSQL database.
Dump using:
mysqldump --compatible=postgresql --default-character-set=utf8 -r databasename.mysql -u root databasename
"""
import re
import sys
import os
import time
import subprocess
def parse(input_filename, output_filename, drop_index_filename):
"Feed it a file, and it'll output a fixed one"
# State storage
if input_filename == "-":
num_lines = -1
else:
num_lines = int(subprocess.check_output(["wc", "-l", input_filename]).strip().split()[0])
tables = {}
current_table = None
creation_lines = []
enum_types = []
foreign_key_lines = []
index_lines = []
drop_index_lines = []
sequence_lines = []
cast_lines = []
num_inserts = 0
started = time.time()
# Open output file and write header. Logging file handle will be stdout
# unless we're writing output to stdout, in which case NO PROGRESS FOR YOU.
if output_filename == "-":
output = sys.stdout
logging = open(os.devnull, "w")
else:
output = open(output_filename, "w")
logging = sys.stdout
drop_index = open(drop_index_filename, "w")
if input_filename == "-":
input_fh = sys.stdin
else:
input_fh = open(input_filename)
output.write("-- Converted by db_converter\n")
output.write("START TRANSACTION;\n")
output.write("SET standard_conforming_strings=off;\n")
output.write("SET escape_string_warning=off;\n")
output.write("SET CONSTRAINTS ALL DEFERRED;\n\n")
for i, line in enumerate(input_fh):
time_taken = time.time() - started
percentage_done = (i+1) / float(num_lines)
secs_left = (time_taken / percentage_done) - time_taken
logging.write("\rLine %i (of %s: %.2f%%) [%s tables] [%s inserts] [ETA: %i min %i sec]" % (
i + 1,
num_lines,
((i+1)/float(num_lines))*100,
len(tables),
num_inserts,
secs_left // 60,
secs_left % 60,
))
logging.flush()
line = line.decode("utf8").strip().replace(r"\\", "WUBWUBREALSLASHWUB").replace(r"\0", "").replace(r"\'", "''").replace("WUBWUBREALSLASHWUB", r"\\")
# Ignore comment lines
if line.startswith("--") or line.startswith("/*") or line.startswith("LOCK TABLES") or line.startswith("DROP TABLE") or line.startswith("UNLOCK TABLES") or not line:
continue
# Outside of anything handling
if current_table is None:
# Start of a table creation statement?
if line.startswith("CREATE TABLE"):
current_table = line.split('"')[1]
tables[current_table] = {"columns": []}
creation_lines = []
# Inserting data into a table?
elif line.startswith("INSERT INTO"):
output.write(re.sub(r"([^'])'0000-00-00 00:00:00'", r"\1NULL", line.encode("utf8")) + "\n")
num_inserts += 1
# ???
else:
print "\n ! Unknown line in main body: %s" % line
# Inside-create-statement handling
else:
# Is it a column?
if line.startswith('"'):
useless, name, definition = line.strip(",").split('"',2)
try:
type, extra = definition.strip().split(" ", 1)
# This must be a tricky enum
if ')' in extra:
type, extra = definition.strip().split(")")
except ValueError:
type = definition.strip()
extra = ""
extra = re.sub("CHARACTER SET [\w\d]+\s*", "", extra.replace("unsigned", ""))
extra = re.sub("COLLATE [\w\d]+\s*", "", extra.replace("unsigned", ""))
# See if it needs type conversion
final_type = None
final_default = None
set_sequence = None
if type == "tinyint(1)":
type = "int4"
set_sequence = True
final_type = "boolean"
if "DEFAULT '0'" in extra:
final_default = "FALSE"
elif "DEFAULT '1'" in extra:
final_default = "TRUE"
elif type.startswith("int("):
type = "integer"
set_sequence = True
elif type.startswith("bigint("):
type = "bigint"
set_sequence = True
elif type == "longtext":
type = "text"
elif type == "mediumtext":
type = "text"
elif type == "tinytext":
type = "text"
elif type.startswith("varchar("):
size = int(type.split("(")[1].rstrip(")"))
type = "varchar(%s)" % (size * 2)
elif type.startswith("smallint("):
type = "int2"
set_sequence = True
elif type == "datetime":
type = "timestamp with time zone"
extra = extra.replace("NOT NULL", "")
elif type == "double":
type = "double precision"
elif type == "blob":
type = "bytea"
elif type.startswith("enum(") or type.startswith("set("):
types_str = type.split("(")[1].rstrip(")").rstrip('"')
types_arr = [type_str.strip('\'') for type_str in types_str.split(",")]
# Considered using values to make a name, but its dodgy
# enum_name = '_'.join(types_arr)
enum_name = "{0}_{1}".format(current_table, name)
if enum_name not in enum_types:
output.write("CREATE TYPE {0} AS ENUM ({1}); \n".format(enum_name, types_str));
enum_types.append(enum_name)
type = enum_name
if final_type:
cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" DROP DEFAULT" % (current_table, name))
cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" TYPE %s USING CAST(\"%s\" as %s)" % (current_table, name, final_type, name, final_type))
if final_default:
cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" SET DEFAULT %s" % (current_table, name, final_default))
# ID fields need sequences [if they are integers?]
if name == "id" and set_sequence is True:
sequence_lines.append("DROP SEQUENCE IF EXISTS %s_id_seq" % (current_table))
sequence_lines.append("CREATE SEQUENCE %s_id_seq" % (current_table))
sequence_lines.append("SELECT setval('%s_id_seq', max(id)) FROM %s" % (current_table, current_table))
sequence_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"id\" SET DEFAULT nextval('%s_id_seq')" % (current_table, current_table))
# Record it
creation_lines.append('"%s" %s %s' % (name, type, extra))
tables[current_table]['columns'].append((name, type, extra))
# Is it a constraint or something?
elif line.startswith("PRIMARY KEY"):
creation_lines.append(line.rstrip(","))
elif line.startswith("CONSTRAINT"):
foreign_key_lines.append("ALTER TABLE \"%s\" ADD CONSTRAINT %s DEFERRABLE INITIALLY DEFERRED" % (current_table, line.split("CONSTRAINT")[1].strip().rstrip(",")))
foreign_key_lines.append("CREATE INDEX ON \"%s\" %s" % (current_table, line.split("FOREIGN KEY")[1].split("REFERENCES")[0].strip().rstrip(",")))
elif line.startswith("UNIQUE KEY \""):
index_name = line.split('"')[1].split('"')[0]
index_columns = line.split("(")[1].split(")")[0]
index_lines.append("CREATE UNIQUE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
elif line.startswith("UNIQUE KEY"):
index_columns = line.split("(")[1].split(")")[0]
index_lines.append("CREATE UNIQUE INDEX ON %s (%s)" % (current_table, index_columns))
elif line.startswith("KEY \""):
index_name = line.split('"')[1].split('"')[0]
index_columns = line.split("(")[1].split(")")[0]
index_lines.append("CREATE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
elif line.startswith("KEY"):
index_columns = line.split("(")[1].split(")")[0]
index_lines.append("CREATE INDEX ON %s (%s)" % (current_table, index_columns))
elif line.startswith("FULLTEXT KEY"):
fulltext_keys = " || ' ' || ".join( line.split('(')[-1].split(')')[0].replace('"', '').split(',') )
index_lines.append("CREATE INDEX ON %s USING gin(to_tsvector('english', %s))" % (current_table, fulltext_keys))
# Is it the end of the table?
elif line == ");":
output.write("DROP TABLE IF EXISTS \"%s\";\n" % current_table)
output.write("CREATE TABLE \"%s\" (\n" % current_table)
for i, line in enumerate(creation_lines):
output.write(" %s%s\n" % (line, "," if i != (len(creation_lines) - 1) else ""))
output.write(');\n\n')
current_table = None
# ???
else:
print "\n ! Unknown line inside table creation: %s" % line
# Finish file
output.write("\n-- Post-data save --\n")
output.write("COMMIT;\n")
output.write("START TRANSACTION;\n")
# Write typecasts out
output.write("\n-- Typecasts --\n")
for line in cast_lines:
output.write("%s;\n" % line)
# Write FK constraints out
output.write("\n-- Foreign keys --\n")
for line in foreign_key_lines:
output.write("%s;\n" % line)
# Write sequences out
output.write("\n-- Sequences --\n")
for line in sequence_lines:
output.write("%s;\n" % line)
drop_index.write("-- Drop indexes --\n")
for line in drop_index_lines:
drop_index.write("%s;\n" % line)
# Write indexes out
output.write("\n-- Indexes --\n")
for line in index_lines:
output.write("%s;\n" % line)
# Finish file
output.write("\n")
output.write("COMMIT;\n")
print ""
if __name__ == "__main__":
parse(sys.argv[1], sys.argv[2], sys.argv[3])
#!/bin/sh
# This script reorders database dumps generated by db_converter.py for
# efficient consumption by Postgres.
fail() {
echo "$@" 1>&2
exit 1
}
db_gz=$1
drop_indexes_sql=$2
if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
fail "Usage: $0 database.sql.gz drop_indexes.sql"
fi
# Capture all text up to the first occurence of 'SET CONSTRAINTS'
preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
if [ -z "$preamble" ] ; then
fail "Could not read preamble"
fi
drop_indexes=$(cat "$drop_indexes_sql")
if [ -z "$drop_indexes" ] ; then
fail "Could not read DROP INDEXES file"
fi
# Print preamble and drop indexes
cat <<EOF
${preamble}
${drop_indexes}
EOF
# Print the rest of database.sql.gz. I don't understand this awk script but it
# prints all lines after the first match of 'SET CONSTRAINTS'.
zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
Loading
Loading
@@ -3,9 +3,14 @@ namespace :backup do
desc "GITLAB | Create a backup of the GitLab CI database"
task create: :environment do
configure_cron_mode
mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
$progress.puts "Applying final database migrations ... ".blue
Rake::Task['db:migrate'].invoke
$progress.puts "done".green
 
$progress.puts "Dumping database ... ".blue
Backup::Database.new.dump
Backup::Database.new.dump(mysql_to_postgresql)
$progress.puts "done".green
 
$progress.puts "Dumping builds ... ".blue
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment