Merge branch 'export-script' into 'master'

Final export script See merge request !251

Merge branch 'export-script' into 'master'
d26b4fa5 · Marin Jankovski · e3041b90 · caebd502 · d26b4fa5 · d26b4fa5
Commit d26b4fa5 authored 9 years ago by Marin Jankovski
--- a/README.md
+++ b/README.md
@@ -5,6 +5,15 @@
 [![Dependency Status](https://gemnasium.com/gitlabhq/gitlab-ci.png)](https://gemnasium.com/gitlabhq/gitlab-ci)
 [![Coverage Status](https://coveralls.io/repos/gitlabhq/gitlab-ci/badge.png?branch=master)](https://coveralls.io/r/gitlabhq/gitlab-ci)
  
+## GitLab CI 8.0
+
+GitLab CI is now integrated in GitLab. The last 'stand-alone' version of GitLab
+CI was version 7.14. The sole purpose of GitLab CI 8.0 is to help you migrate
+data from your existing (pre-8.0) CI server into GitLab 8.0.
+
+The migration procedure is documented [in
+GitLab](https://gitlab.com/gitlab-org/gitlab-ce/blob/8-0-stable/doc/migrate_ci_to_ce/README.md).
+
 ### Information
  
-Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
\ No newline at end of file
+Please see the [GitLab CI page on the website](https://about.gitlab.com/gitlab-ci/) for all information.
--- a/db/migrate/20150914102123_migrate_ci_tables.rb
+++ b/db/migrate/20150914102123_migrate_ci_tables.rb
+class MigrateCiTables < ActiveRecord::Migration
+  def up
+    rename_table :application_settings, :ci_application_settings
+    rename_table :builds, :ci_builds
+    rename_table :commits, :ci_commits
+    rename_table :events, :ci_events
+    rename_table :jobs, :ci_jobs
+    rename_table :projects, :ci_projects
+    rename_table :runner_projects, :ci_runner_projects
+    rename_table :runners, :ci_runners
+    rename_table :services, :ci_services
+    rename_table :tags, :ci_tags
+    rename_table :taggings, :ci_taggings
+    rename_table :trigger_requests, :ci_trigger_requests
+    rename_table :triggers, :ci_triggers
+    rename_table :variables, :ci_variables
+    rename_table :web_hooks, :ci_web_hooks
+  end
+end
--- a/db/migrate/20150921081619_rename_taggings_idx.rb
+++ b/db/migrate/20150921081619_rename_taggings_idx.rb
+class RenameTaggingsIdx < ActiveRecord::Migration
+  def up
+    remove_index :ci_taggings, name: 'taggings_idx'
+    add_index :ci_taggings,
+              [:tag_id, :taggable_id, :taggable_type, :context, :tagger_id, :tagger_type],
+              unique: true, name: 'ci_taggings_idx'
+  end
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -11,19 +11,19 @@
 #
 # It's strongly recommended that you check this file into your version control system.
  
-ActiveRecord::Schema.define(version: 20150824202238) do
+ActiveRecord::Schema.define(version: 20150921081619) do
  
  # These are extensions that must be enabled in order to support this database
  enable_extension "plpgsql"
  
-  create_table "application_settings", force: true do |t|
+  create_table "ci_application_settings", force: true do |t|
    t.boolean  "all_broken_builds"
    t.boolean  "add_pusher"
    t.datetime "created_at"
    t.datetime "updated_at"
  end
  
-  create_table "builds", force: true do |t|
+  create_table "ci_builds", force: true do |t|
    t.integer  "project_id"
    t.string   "status"
    t.datetime "finished_at"
@@ -44,12 +44,12 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.integer  "trigger_request_id"
  end
  
-  add_index "builds", ["commit_id"], name: "index_builds_on_commit_id", using: :btree
-  add_index "builds", ["project_id", "commit_id"], name: "index_builds_on_project_id_and_commit_id", using: :btree
-  add_index "builds", ["project_id"], name: "index_builds_on_project_id", using: :btree
-  add_index "builds", ["runner_id"], name: "index_builds_on_runner_id", using: :btree
+  add_index "ci_builds", ["commit_id"], name: "index_ci_builds_on_commit_id", using: :btree
+  add_index "ci_builds", ["project_id", "commit_id"], name: "index_ci_builds_on_project_id_and_commit_id", using: :btree
+  add_index "ci_builds", ["project_id"], name: "index_ci_builds_on_project_id", using: :btree
+  add_index "ci_builds", ["runner_id"], name: "index_ci_builds_on_runner_id", using: :btree
  
-  create_table "commits", force: true do |t|
+  create_table "ci_commits", force: true do |t|
    t.integer  "project_id"
    t.string   "ref"
    t.string   "sha"
@@ -62,13 +62,13 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.datetime "committed_at"
  end
  
-  add_index "commits", ["project_id", "committed_at", "id"], name: "index_commits_on_project_id_and_committed_at_and_id", using: :btree
-  add_index "commits", ["project_id", "committed_at"], name: "index_commits_on_project_id_and_committed_at", using: :btree
-  add_index "commits", ["project_id", "sha"], name: "index_commits_on_project_id_and_sha", using: :btree
-  add_index "commits", ["project_id"], name: "index_commits_on_project_id", using: :btree
-  add_index "commits", ["sha"], name: "index_commits_on_sha", using: :btree
+  add_index "ci_commits", ["project_id", "committed_at", "id"], name: "index_ci_commits_on_project_id_and_committed_at_and_id", using: :btree
+  add_index "ci_commits", ["project_id", "committed_at"], name: "index_ci_commits_on_project_id_and_committed_at", using: :btree
+  add_index "ci_commits", ["project_id", "sha"], name: "index_ci_commits_on_project_id_and_sha", using: :btree
+  add_index "ci_commits", ["project_id"], name: "index_ci_commits_on_project_id", using: :btree
+  add_index "ci_commits", ["sha"], name: "index_ci_commits_on_sha", using: :btree
  
-  create_table "events", force: true do |t|
+  create_table "ci_events", force: true do |t|
    t.integer  "project_id"
    t.integer  "user_id"
    t.integer  "is_admin"
@@ -77,11 +77,11 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.datetime "updated_at"
  end
  
-  add_index "events", ["created_at"], name: "index_events_on_created_at", using: :btree
-  add_index "events", ["is_admin"], name: "index_events_on_is_admin", using: :btree
-  add_index "events", ["project_id"], name: "index_events_on_project_id", using: :btree
+  add_index "ci_events", ["created_at"], name: "index_ci_events_on_created_at", using: :btree
+  add_index "ci_events", ["is_admin"], name: "index_ci_events_on_is_admin", using: :btree
+  add_index "ci_events", ["project_id"], name: "index_ci_events_on_project_id", using: :btree
  
-  create_table "jobs", force: true do |t|
+  create_table "ci_jobs", force: true do |t|
    t.integer  "project_id",                          null: false
    t.text     "commands"
    t.boolean  "active",         default: true,       null: false
@@ -95,10 +95,10 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.datetime "deleted_at"
  end
  
-  add_index "jobs", ["deleted_at"], name: "index_jobs_on_deleted_at", using: :btree
-  add_index "jobs", ["project_id"], name: "index_jobs_on_project_id", using: :btree
+  add_index "ci_jobs", ["deleted_at"], name: "index_ci_jobs_on_deleted_at", using: :btree
+  add_index "ci_jobs", ["project_id"], name: "index_ci_jobs_on_project_id", using: :btree
  
-  create_table "projects", force: true do |t|
+  create_table "ci_projects", force: true do |t|
    t.string   "name",                                     null: false
    t.integer  "timeout",                  default: 3600,  null: false
    t.datetime "created_at"
@@ -121,17 +121,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.text     "generated_yaml_config"
  end
  
-  create_table "runner_projects", force: true do |t|
+  create_table "ci_runner_projects", force: true do |t|
    t.integer  "runner_id",  null: false
    t.integer  "project_id", null: false
    t.datetime "created_at"
    t.datetime "updated_at"
  end
  
-  add_index "runner_projects", ["project_id"], name: "index_runner_projects_on_project_id", using: :btree
-  add_index "runner_projects", ["runner_id"], name: "index_runner_projects_on_runner_id", using: :btree
+  add_index "ci_runner_projects", ["project_id"], name: "index_ci_runner_projects_on_project_id", using: :btree
+  add_index "ci_runner_projects", ["runner_id"], name: "index_ci_runner_projects_on_runner_id", using: :btree
  
-  create_table "runners", force: true do |t|
+  create_table "ci_runners", force: true do |t|
    t.string   "token"
    t.datetime "created_at"
    t.datetime "updated_at"
@@ -146,7 +146,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.string   "architecture"
  end
  
-  create_table "services", force: true do |t|
+  create_table "ci_services", force: true do |t|
    t.string   "type"
    t.string   "title"
    t.integer  "project_id",                 null: false
@@ -156,19 +156,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.text     "properties"
  end
  
-  add_index "services", ["project_id"], name: "index_services_on_project_id", using: :btree
+  add_index "ci_services", ["project_id"], name: "index_ci_services_on_project_id", using: :btree
  
-  create_table "sessions", force: true do |t|
-    t.string   "session_id", null: false
-    t.text     "data"
-    t.datetime "created_at"
-    t.datetime "updated_at"
-  end
-
-  add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
-  add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
-
-  create_table "taggings", force: true do |t|
+  create_table "ci_taggings", force: true do |t|
    t.integer  "tag_id"
    t.integer  "taggable_id"
    t.string   "taggable_type"
@@ -178,17 +168,17 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.datetime "created_at"
  end
  
-  add_index "taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "taggings_idx", unique: true, using: :btree
-  add_index "taggings", ["taggable_id", "taggable_type", "context"], name: "index_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
+  add_index "ci_taggings", ["tag_id", "taggable_id", "taggable_type", "context", "tagger_id", "tagger_type"], name: "ci_taggings_idx", unique: true, using: :btree
+  add_index "ci_taggings", ["taggable_id", "taggable_type", "context"], name: "index_ci_taggings_on_taggable_id_and_taggable_type_and_context", using: :btree
  
-  create_table "tags", force: true do |t|
+  create_table "ci_tags", force: true do |t|
    t.string  "name"
    t.integer "taggings_count", default: 0
  end
  
-  add_index "tags", ["name"], name: "index_tags_on_name", unique: true, using: :btree
+  add_index "ci_tags", ["name"], name: "index_ci_tags_on_name", unique: true, using: :btree
  
-  create_table "trigger_requests", force: true do |t|
+  create_table "ci_trigger_requests", force: true do |t|
    t.integer  "trigger_id", null: false
    t.text     "variables"
    t.datetime "created_at"
@@ -196,7 +186,7 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.integer  "commit_id"
  end
  
-  create_table "triggers", force: true do |t|
+  create_table "ci_triggers", force: true do |t|
    t.string   "token"
    t.integer  "project_id", null: false
    t.datetime "deleted_at"
@@ -204,9 +194,9 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.datetime "updated_at"
  end
  
-  add_index "triggers", ["deleted_at"], name: "index_triggers_on_deleted_at", using: :btree
+  add_index "ci_triggers", ["deleted_at"], name: "index_ci_triggers_on_deleted_at", using: :btree
  
-  create_table "variables", force: true do |t|
+  create_table "ci_variables", force: true do |t|
    t.integer "project_id",           null: false
    t.string  "key"
    t.text    "value"
@@ -215,13 +205,23 @@ ActiveRecord::Schema.define(version: 20150824202238) do
    t.string  "encrypted_value_iv"
  end
  
-  add_index "variables", ["project_id"], name: "index_variables_on_project_id", using: :btree
+  add_index "ci_variables", ["project_id"], name: "index_ci_variables_on_project_id", using: :btree
  
-  create_table "web_hooks", force: true do |t|
+  create_table "ci_web_hooks", force: true do |t|
    t.string   "url",        null: false
    t.integer  "project_id", null: false
    t.datetime "created_at"
    t.datetime "updated_at"
  end
  
+  create_table "sessions", force: true do |t|
+    t.string   "session_id", null: false
+    t.text     "data"
+    t.datetime "created_at"
+    t.datetime "updated_at"
+  end
+
+  add_index "sessions", ["session_id"], name: "index_sessions_on_session_id", using: :btree
+  add_index "sessions", ["updated_at"], name: "index_sessions_on_updated_at", using: :btree
+
 end
--- a/lib/backup/builds.rb
+++ b/lib/backup/builds.rb
 module Backup
  class Builds
-    attr_reader :app_builds_dir, :backup_builds_dir, :backup_dir
+    attr_reader :app_builds_dir, :backup_builds_tarball, :backup_dir
  
    def initialize
      @app_builds_dir = File.realpath(Rails.root.join('builds'))
      @backup_dir = GitlabCi.config.backup.path
-      @backup_builds_dir = File.join(GitlabCi.config.backup.path, 'builds')
+      @backup_builds_tarball = File.join(GitlabCi.config.backup.path, 'builds/builds.tar.gz')
    end
  
    # Copy builds from builds directory to backup/builds
    def dump
-      FileUtils.mkdir_p(backup_builds_dir)
-      FileUtils.cp_r(app_builds_dir, backup_dir)
+      FileUtils.mkdir_p(File.dirname(backup_builds_tarball))
+      FileUtils.rm_f(backup_builds_tarball)
+
+      # Use 'tar -czf -' instead of 'tar -cz' because on some systems the
+      # default behavior of tar is to talk to a tape device instead of
+      # stdin/stdout.
+      system(
+        *%W(tar -C #{app_builds_dir} -czf - -- .),
+        out: [backup_builds_tarball, 'w', 0600]
+      )
    end
  
    def restore
      backup_existing_builds_dir
+      Dir.mkdir(app_builds_dir, 0700)
  
-      FileUtils.cp_r(backup_builds_dir, app_builds_dir)
+      # Use 'tar -xzf -' instead of 'tar -xz' because on some systems the
+      # default behavior of tar is to talk to a tape device instead of
+      # stdin/stdout.
+      system(
+        *%W(tar -C #{app_builds_dir} -xzf - -- .),
+        in: backup_builds_tarball
+      )
    end
  
    def backup_existing_builds_dir

--- a/lib/backup/database.rb
+++ b/lib/backup/database.rb
 require 'yaml'
+require 'open3'
  
 module Backup
  class Database
+    # These are the final CI tables (final prior to integration in GitLab)
+    TABLES = %w{
+      ci_application_settings ci_builds ci_commits ci_events ci_jobs ci_projects 
+      ci_runner_projects ci_runners ci_services ci_tags ci_taggings ci_trigger_requests 
+      ci_triggers ci_variables ci_web_hooks
+    }
+
    attr_reader :config, :db_dir
  
    def initialize
@@ -10,34 +18,86 @@ module Backup
      FileUtils.mkdir_p(@db_dir) unless Dir.exists?(@db_dir)
    end
  
-    def dump
-      success = case config["adapter"]
+    def dump(mysql_to_postgresql=false)
+      FileUtils.rm_f(db_file_name)
+      compress_rd, compress_wr = IO.pipe
+      compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600])
+      compress_rd.close
+
+      dump_pid = case config["adapter"]
      when /^mysql/ then
        $progress.print "Dumping MySQL database #{config['database']} ... "
-        system('mysqldump', *mysql_args, config['database'], out: db_file_name)
+        args = mysql_args
+        args << '--compatible=postgresql' if mysql_to_postgresql
+        spawn('mysqldump', *args, config['database'], *TABLES, out: compress_wr)
      when "postgresql" then
        $progress.print "Dumping PostgreSQL database #{config['database']} ... "
        pg_env
-        system('pg_dump', config['database'], out: db_file_name)
+        spawn('pg_dump', '--clean', *TABLES.map { |t| "--table=#{t}" }, config['database'], out: compress_wr)
      end
+      compress_wr.close
+
+      success = [compress_pid, dump_pid].all? { |pid| Process.waitpid(pid); $?.success? }
+
      report_success(success)
      abort 'Backup failed' unless success
+      convert_to_postgresql if mysql_to_postgresql
+    end
+
+    def convert_to_postgresql
+      mysql_dump_gz = db_file_name + '.mysql'
+      psql_dump_gz = db_file_name + '.psql'
+      drop_indexes_sql = File.join(db_dir, 'drop_indexes.sql')
+
+      File.rename(db_file_name, mysql_dump_gz)
+
+      $progress.print "Converting MySQL database dump to Postgres ... "
+      statuses = Open3.pipeline(
+        %W(gzip -cd #{mysql_dump_gz}),
+        %W(python lib/support/mysql-postgresql-converter/db_converter.py - - #{drop_indexes_sql}),
+        %W(gzip -1 -c),
+        out: [psql_dump_gz, 'w', 0600]
+      )
+
+      if !statuses.compact.all?(&:success?)
+        abort "mysql-to-postgresql-converter failed"
+      end
+      $progress.puts '[DONE]'.green
+      FileUtils.rm_f(mysql_dump_gz) # save disk space during conversion
+
+      $progress.print "Splicing in 'DROP INDEX' statements ... "
+      statuses = Open3.pipeline(
+        %W(lib/support/mysql-postgresql-converter/splice_drop_indexes #{psql_dump_gz} #{drop_indexes_sql}),
+        %W(gzip -1 -c),
+        out: [db_file_name, 'w', 0600]
+      )
+      if !statuses.compact.all?(&:success?)
+        abort "Failed to splice in 'DROP INDEXES' statements"
+      end
+
+      $progress.puts '[DONE]'.green
+    ensure
+      FileUtils.rm_f([mysql_dump_gz, psql_dump_gz, drop_indexes_sql])
    end
  
    def restore
-      success = case config["adapter"]
+      decompress_rd, decompress_wr = IO.pipe
+      decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name)
+      decompress_wr.close
+
+      restore_pid = case config["adapter"]
      when /^mysql/ then
        $progress.print "Restoring MySQL database #{config['database']} ... "
-        system('mysql', *mysql_args, config['database'], in: db_file_name)
+        spawn('mysql', *mysql_args, config['database'], in: decompress_rd)
      when "postgresql" then
        $progress.print "Restoring PostgreSQL database #{config['database']} ... "
-        # Drop all tables because PostgreSQL DB dumps do not contain DROP TABLE
-        # statements like MySQL.
-        drop_all_tables
-        drop_all_postgres_sequences
        pg_env
-        system('psql', config['database'], '-f', db_file_name)
+        spawn('psql', config['database'], in: decompress_rd)
      end
+      decompress_rd.close
+
+      success = [decompress_pid, restore_pid].all? { |pid| Process.waitpid(pid); $?.success? }
+
      report_success(success)
      abort 'Restore failed' unless success
    end
@@ -45,7 +105,7 @@ module Backup
    protected
  
    def db_file_name
-      File.join(db_dir, 'database.sql')
+      File.join(db_dir, 'database.sql.gz')
    end
  
    def mysql_args
@@ -74,19 +134,5 @@ module Backup
        $progress.puts '[FAILED]'.red
      end
    end
-
-    def drop_all_tables
-      connection = ActiveRecord::Base.connection
-      connection.tables.each do |table|
-        connection.drop_table(table)
-      end
-    end
-
-    def drop_all_postgres_sequences
-      connection = ActiveRecord::Base.connection
-      connection.execute("SELECT c.relname FROM pg_class c WHERE c.relkind = 'S';").each do |sequence|
-        connection.execute("DROP SEQUENCE #{sequence['relname']}")
-      end
-    end
  end
 end
--- a/lib/backup/manager.rb
+++ b/lib/backup/manager.rb
@@ -7,7 +7,7 @@ module Backup
      s[:backup_created_at]  = Time.now
      s[:gitlab_version]     = GitlabCi::VERSION
      s[:tar_version]        = tar_version
-      tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar.gz"
+      tar_file = "#{s[:backup_created_at].to_i}_gitlab_ci_backup.tar"
  
      Dir.chdir(GitlabCi.config.backup.path) do
        File.open("#{GitlabCi.config.backup.path}/backup_information.yml",
@@ -20,7 +20,7 @@ module Backup
        # create archive
        $progress.print "Creating backup archive: #{tar_file} ... "
        orig_umask = File.umask(0077)
-        if Kernel.system('tar', '-czf', tar_file, *backup_contents)
+        if Kernel.system('tar', '-cf', tar_file, *backup_contents)
          $progress.puts "done".green
        else
          puts "creating archive #{tar_file} failed".red
@@ -78,11 +78,11 @@ module Backup
        removed = 0
        
        Dir.chdir(GitlabCi.config.backup.path) do
-          file_list = Dir.glob('*_gitlab_ci_backup.tar.gz')
-          file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar.gz/ }
+          file_list = Dir.glob('*_gitlab_ci_backup.tar')
+          file_list.map! { |f| $1.to_i if f =~ /(\d+)_gitlab_ci_backup.tar/ }
          file_list.sort.each do |timestamp|
            if Time.at(timestamp) < (Time.now - keep_time)
-              if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar.gz))
+              if Kernel.system(*%W(rm #{timestamp}_gitlab_ci_backup.tar))
                removed += 1
              end
            end
@@ -99,7 +99,7 @@ module Backup
      Dir.chdir(GitlabCi.config.backup.path)
  
      # check for existing backups in the backup dir
-      file_list = Dir.glob("*_gitlab_ci_backup.tar.gz").each.map { |f| f.split(/_/).first.to_i }
+      file_list = Dir.glob("*_gitlab_ci_backup.tar").each.map { |f| f.split(/_/).first.to_i }
      puts "no backups found" if file_list.count == 0
  
      if file_list.count > 1 && ENV["BACKUP"].nil?
@@ -108,7 +108,7 @@ module Backup
        exit 1
      end
  
-      tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar.gz") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar.gz")
+      tar_file = ENV["BACKUP"].nil? ? File.join("#{file_list.first}_gitlab_ci_backup.tar") : File.join(ENV["BACKUP"] + "_gitlab_ci_backup.tar")
  
      unless File.exists?(tar_file)
        puts "The specified backup doesn't exist!"
@@ -117,7 +117,7 @@ module Backup
  
      $progress.print "Unpacking backup ... "
  
-      unless Kernel.system(*%W(tar -xzf #{tar_file}))
+      unless Kernel.system(*%W(tar -xf #{tar_file}))
        puts "unpacking backup failed".red
        exit 1
      else

--- a/lib/support/mysql-postgresql-converter/LICENSE
+++ b/lib/support/mysql-postgresql-converter/LICENSE
+Copyright (c) 2012 Lanyrd Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/lib/support/mysql-postgresql-converter/README.md
+++ b/lib/support/mysql-postgresql-converter/README.md
+MySQL to PostgreSQL Converter
+=============================
+
+Lanyrd's MySQL to PostgreSQL conversion script. Use with care.
+
+This script was designed for our specific database and column requirements -
+notably, it doubles the lengths of VARCHARs due to a unicode size problem we
+had, places indexes on all foreign keys, and presumes you're using Django
+for column typing purposes.
+
+GitLab-specific changes
+-----------------------
+
+The `gitlab` branch of this fork contains the following changes made for
+GitLab.
+
+- Guard against replacing '0000-00-00 00:00:00' inside SQL text fields.
+- Replace all MySQL zero-byte string literals `\0`. This is safe as of GitLab
+  6.8 because the GitLab database schema contains no binary columns.
+- Never set 'NOT NULL' constraints on datetimes.
+- Drop sequences before creating them.
+- Preserve default values of boolean (originally `tinyint(1)`) columns.
+- Import all indexes.
+- Import index names.
+- Drop tables before creating.
+- Drop indexes before creating.
+
+How to use
+----------
+
+First, dump your MySQL database in PostgreSQL-compatible format
+
+    mysqldump --compatible=postgresql --default-character-set=utf8 \
+    -r databasename.mysql -u root gitlabhq_production -p
+
+Then, convert it using the dbconverter.py script.
+
+    python db_converter.py databasename.mysql - drop_indexes.sql | gzip -c > databasename.unfinished.psql.gz
+
+It'll print progress to the terminal
+
+Now we have a DB dump that can be imported but the dump will be slow due
+to existing indexes.
+
+    ./splice_drop_indexes databasename.unfinished.psql.gz drop_indexes.sql > databasename.psql
+
+Next, load your new dump into a fresh PostgreSQL database using: 
+
+`psql -f databasename.psql -d gitlabhq_production`
+
+More information
+----------------
+
+You can learn more about the move which this powered at http://lanyrd.com/blog/2012/lanyrds-big-move/ and some technical details of it at http://www.aeracode.org/2012/11/13/one-change-not-enough/.
--- a/lib/support/mysql-postgresql-converter/db_converter.py
+++ b/lib/support/mysql-postgresql-converter/db_converter.py
+#!/usr/bin/env python
+
+"""
+Fixes a MySQL dump made with the right format so it can be directly
+imported to a new PostgreSQL database.
+
+Dump using:
+mysqldump --compatible=postgresql --default-character-set=utf8 -r databasename.mysql -u root databasename
+"""
+
+import re
+import sys
+import os
+import time
+import subprocess
+
+
+def parse(input_filename, output_filename, drop_index_filename):
+    "Feed it a file, and it'll output a fixed one"
+
+    # State storage
+    if input_filename == "-":
+        num_lines = -1
+    else:
+        num_lines = int(subprocess.check_output(["wc", "-l", input_filename]).strip().split()[0])
+    tables = {}
+    current_table = None
+    creation_lines = []
+    enum_types = []
+    foreign_key_lines = []
+    index_lines = []
+    drop_index_lines = []
+    sequence_lines = []
+    cast_lines = []
+    num_inserts = 0
+    started = time.time()
+
+    # Open output file and write header. Logging file handle will be stdout
+    # unless we're writing output to stdout, in which case NO PROGRESS FOR YOU.
+    if output_filename == "-":
+        output = sys.stdout
+        logging = open(os.devnull, "w")
+    else:
+        output = open(output_filename, "w")
+        logging = sys.stdout
+
+    drop_index = open(drop_index_filename, "w")
+
+    if input_filename == "-":
+        input_fh = sys.stdin
+    else:
+        input_fh = open(input_filename)
+
+
+    output.write("-- Converted by db_converter\n")
+    output.write("START TRANSACTION;\n")
+    output.write("SET standard_conforming_strings=off;\n")
+    output.write("SET escape_string_warning=off;\n")
+    output.write("SET CONSTRAINTS ALL DEFERRED;\n\n")
+
+    for i, line in enumerate(input_fh):
+        time_taken = time.time() - started
+        percentage_done = (i+1) / float(num_lines)
+        secs_left = (time_taken / percentage_done) - time_taken
+        logging.write("\rLine %i (of %s: %.2f%%) [%s tables] [%s inserts] [ETA: %i min %i sec]" % (
+            i + 1,
+            num_lines,
+            ((i+1)/float(num_lines))*100,
+            len(tables),
+            num_inserts,
+            secs_left // 60,
+            secs_left % 60,
+        ))
+        logging.flush()
+        line = line.decode("utf8").strip().replace(r"\\", "WUBWUBREALSLASHWUB").replace(r"\0", "").replace(r"\'", "''").replace("WUBWUBREALSLASHWUB", r"\\")
+        # Ignore comment lines
+        if line.startswith("--") or line.startswith("/*") or line.startswith("LOCK TABLES") or line.startswith("DROP TABLE") or line.startswith("UNLOCK TABLES") or not line:
+            continue
+
+        # Outside of anything handling
+        if current_table is None:
+            # Start of a table creation statement?
+            if line.startswith("CREATE TABLE"):
+                current_table = line.split('"')[1]
+                tables[current_table] = {"columns": []}
+                creation_lines = []
+            # Inserting data into a table?
+            elif line.startswith("INSERT INTO"):
+                output.write(re.sub(r"([^'])'0000-00-00 00:00:00'", r"\1NULL", line.encode("utf8")) + "\n")
+                num_inserts += 1
+            # ???
+            else:
+                print "\n ! Unknown line in main body: %s" % line
+
+        # Inside-create-statement handling
+        else:
+            # Is it a column?
+            if line.startswith('"'):
+                useless, name, definition = line.strip(",").split('"',2)
+                try:
+                    type, extra = definition.strip().split(" ", 1)
+
+                    # This must be a tricky enum
+                    if ')' in extra:
+                        type, extra = definition.strip().split(")")
+
+                except ValueError:
+                    type = definition.strip()
+                    extra = ""
+                extra = re.sub("CHARACTER SET [\w\d]+\s*", "", extra.replace("unsigned", ""))
+                extra = re.sub("COLLATE [\w\d]+\s*", "", extra.replace("unsigned", ""))
+
+                # See if it needs type conversion
+                final_type = None
+                final_default = None
+                set_sequence = None
+                if type == "tinyint(1)":
+                    type = "int4"
+                    set_sequence = True
+                    final_type = "boolean"
+
+                    if "DEFAULT '0'" in extra:
+                        final_default = "FALSE"
+                    elif "DEFAULT '1'" in extra:
+                        final_default = "TRUE"
+
+                elif type.startswith("int("):
+                    type = "integer"
+                    set_sequence = True
+                elif type.startswith("bigint("):
+                    type = "bigint"
+                    set_sequence = True
+                elif type == "longtext":
+                    type = "text"
+                elif type == "mediumtext":
+                    type = "text"
+                elif type == "tinytext":
+                    type = "text"
+                elif type.startswith("varchar("):
+                    size = int(type.split("(")[1].rstrip(")"))
+                    type = "varchar(%s)" % (size * 2)
+                elif type.startswith("smallint("):
+                    type = "int2"
+                    set_sequence = True
+                elif type == "datetime":
+                    type = "timestamp with time zone"
+                    extra = extra.replace("NOT NULL", "")
+                elif type == "double":
+                    type = "double precision"
+                elif type == "blob":
+                    type = "bytea"
+                elif type.startswith("enum(") or type.startswith("set("):
+
+                    types_str = type.split("(")[1].rstrip(")").rstrip('"')
+                    types_arr = [type_str.strip('\'') for type_str in types_str.split(",")]
+
+                    # Considered using values to make a name, but its dodgy
+                    # enum_name = '_'.join(types_arr)
+                    enum_name = "{0}_{1}".format(current_table, name)
+
+                    if enum_name not in enum_types:
+                        output.write("CREATE TYPE {0} AS ENUM ({1}); \n".format(enum_name, types_str));
+                        enum_types.append(enum_name)
+
+                    type = enum_name
+
+                if final_type:
+                    cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" DROP DEFAULT" % (current_table, name))
+                    cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" TYPE %s USING CAST(\"%s\" as %s)" % (current_table, name, final_type, name, final_type))
+                    if final_default:
+                        cast_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"%s\" SET DEFAULT %s" % (current_table, name, final_default))
+                # ID fields need sequences [if they are integers?]
+                if name == "id" and set_sequence is True:
+                    sequence_lines.append("DROP SEQUENCE IF EXISTS %s_id_seq" % (current_table))
+                    sequence_lines.append("CREATE SEQUENCE %s_id_seq" % (current_table))
+                    sequence_lines.append("SELECT setval('%s_id_seq', max(id)) FROM %s" % (current_table, current_table))
+                    sequence_lines.append("ALTER TABLE \"%s\" ALTER COLUMN \"id\" SET DEFAULT nextval('%s_id_seq')" % (current_table, current_table))
+                # Record it
+                creation_lines.append('"%s" %s %s' % (name, type, extra))
+                tables[current_table]['columns'].append((name, type, extra))
+            # Is it a constraint or something?
+            elif line.startswith("PRIMARY KEY"):
+                creation_lines.append(line.rstrip(","))
+            elif line.startswith("CONSTRAINT"):
+                foreign_key_lines.append("ALTER TABLE \"%s\" ADD CONSTRAINT %s DEFERRABLE INITIALLY DEFERRED" % (current_table, line.split("CONSTRAINT")[1].strip().rstrip(",")))
+                foreign_key_lines.append("CREATE INDEX ON \"%s\" %s" % (current_table, line.split("FOREIGN KEY")[1].split("REFERENCES")[0].strip().rstrip(",")))
+            elif line.startswith("UNIQUE KEY \""):
+                index_name      = line.split('"')[1].split('"')[0]
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE UNIQUE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
+                drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
+            elif line.startswith("UNIQUE KEY"):
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE UNIQUE INDEX ON %s (%s)" % (current_table, index_columns))
+            elif line.startswith("KEY \""):
+                index_name      = line.split('"')[1].split('"')[0]
+                index_columns   = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE INDEX \"%s\" ON %s (%s)" % (index_name, current_table, index_columns))
+                drop_index_lines.append("DROP INDEX IF EXISTS \"%s\"" % index_name)
+            elif line.startswith("KEY"):
+                index_columns = line.split("(")[1].split(")")[0]
+                index_lines.append("CREATE INDEX ON %s (%s)" % (current_table, index_columns))
+            elif line.startswith("FULLTEXT KEY"):
+                fulltext_keys = " || ' ' || ".join( line.split('(')[-1].split(')')[0].replace('"', '').split(',') )
+                index_lines.append("CREATE INDEX ON %s USING gin(to_tsvector('english', %s))" % (current_table, fulltext_keys))
+            # Is it the end of the table?
+            elif line == ");":
+                output.write("DROP TABLE IF EXISTS \"%s\";\n" % current_table)
+                output.write("CREATE TABLE \"%s\" (\n" % current_table)
+                for i, line in enumerate(creation_lines):
+                    output.write("    %s%s\n" % (line, "," if i != (len(creation_lines) - 1) else ""))
+                output.write(');\n\n')
+                current_table = None
+            # ???
+            else:
+                print "\n ! Unknown line inside table creation: %s" % line
+
+
+    # Finish file
+    output.write("\n-- Post-data save --\n")
+    output.write("COMMIT;\n")
+    output.write("START TRANSACTION;\n")
+
+    # Write typecasts out
+    output.write("\n-- Typecasts --\n")
+    for line in cast_lines:
+        output.write("%s;\n" % line)
+
+    # Write FK constraints out
+    output.write("\n-- Foreign keys --\n")
+    for line in foreign_key_lines:
+        output.write("%s;\n" % line)
+
+    # Write sequences out
+    output.write("\n-- Sequences --\n")
+    for line in sequence_lines:
+        output.write("%s;\n" % line)
+
+    drop_index.write("-- Drop indexes --\n")
+    for line in drop_index_lines:
+        drop_index.write("%s;\n" % line)
+
+    # Write indexes out
+    output.write("\n-- Indexes --\n")
+    for line in index_lines:
+        output.write("%s;\n" % line)
+
+    # Finish file
+    output.write("\n")
+    output.write("COMMIT;\n")
+    print ""
+
+
+if __name__ == "__main__":
+    parse(sys.argv[1], sys.argv[2], sys.argv[3])
--- a/lib/support/mysql-postgresql-converter/splice_drop_indexes
+++ b/lib/support/mysql-postgresql-converter/splice_drop_indexes
+#!/bin/sh
+# This script reorders database dumps generated by db_converter.py for
+# efficient consumption by Postgres.
+
+fail() {
+  echo "$@" 1>&2
+  exit 1
+}
+
+db_gz=$1
+drop_indexes_sql=$2
+
+if [ -z "$db_gz" ] || [ -z "$drop_indexes_sql" ] ; then
+  fail "Usage: $0 database.sql.gz drop_indexes.sql"
+fi
+
+# Capture all text up to the first occurence of 'SET CONSTRAINTS'
+preamble=$(zcat "$db_gz" | sed '/SET CONSTRAINTS/q')
+if [ -z "$preamble" ] ; then
+  fail "Could not read preamble"
+fi
+
+drop_indexes=$(cat "$drop_indexes_sql")
+if [ -z "$drop_indexes" ] ; then
+  fail "Could not read DROP INDEXES file"
+fi
+
+# Print preamble and drop indexes
+cat <<EOF
+${preamble}
+
+${drop_indexes}
+EOF
+
+# Print the rest of database.sql.gz. I don't understand this awk script but it
+# prints all lines after the first match of 'SET CONSTRAINTS'.
+zcat "$db_gz" | awk 'f; /SET CONSTRAINTS/ { f = 1 }'
--- a/lib/tasks/backup.rake
+++ b/lib/tasks/backup.rake
@@ -3,9 +3,14 @@ namespace :backup do
  desc "GITLAB | Create a backup of the GitLab CI database"
  task create: :environment do
    configure_cron_mode
+    mysql_to_postgresql = (ENV['MYSQL_TO_POSTGRESQL'] == '1')
+
+    $progress.puts "Applying final database migrations ... ".blue
+    Rake::Task['db:migrate'].invoke
+    $progress.puts "done".green
  
    $progress.puts "Dumping database ... ".blue
-    Backup::Database.new.dump
+    Backup::Database.new.dump(mysql_to_postgresql)
    $progress.puts "done".green
  
    $progress.puts "Dumping builds ... ".blue