From a97dcc077c68f4f320cd7a5686b9056adfef6c09 Mon Sep 17 00:00:00 2001
From: Yorick Peterse <yorickpeterse@gmail.com>
Date: Wed, 8 Feb 2017 18:15:47 +0100
Subject: [PATCH] Add method for creating foreign keys concurrently

This method allows one to create foreign keys without blocking access to
the source table, but only on PostgreSQL.

When creating a regular foreign key the "ALTER TABLE" statement used for
this won't return until all data has been validated. This statement in
turn will acquire a lock on the source table. As a result this lock can
be held for quite a long amount of time, depending on the number of rows
and system load.

By breaking up the foreign key creation process in two steps (creation,
and validation) we can reduce the amount of locking to a minimum.
Locking is still necessary for the "ALTER TABLE" statement that adds the
constraint, but this is a fast process and so will only block access for
a few milliseconds.
---
 lib/gitlab/database/migration_helpers.rb      | 50 +++++++++++-
 .../gitlab/database/migration_helpers_spec.rb | 76 +++++++++++++++++--
 2 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb
index 0bd6e148ba8..4800a509b37 100644
--- a/lib/gitlab/database/migration_helpers.rb
+++ b/lib/gitlab/database/migration_helpers.rb
@@ -26,11 +26,59 @@ module Gitlab
         add_index(table_name, column_name, options)
       end
 
+      # Adds a foreign key with only minimal locking on the tables involved.
+      #
+      # This method only requires minimal locking when using PostgreSQL. When
+      # using MySQL this method will use Rails' default `add_foreign_key`.
+      #
+      # source - The source table containing the foreign key.
+      # target - The target table the key points to.
+      # column - The name of the column to create the foreign key on.
+      # on_delete - The action to perform when associated data is removed,
+      #             defaults to "CASCADE".
+      def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade)
+        # Transactions would result in ALTER TABLE locks being held for the
+        # duration of the transaction, defeating the purpose of this method.
+        if transaction_open?
+          raise 'add_concurrent_foreign_key can not be run inside a transaction'
+        end
+
+        # While MySQL does allow disabling of foreign keys it has no equivalent
+        # of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
+        # back to the normal foreign key procedure.
+        if Database.mysql?
+          return add_foreign_key(source, target,
+                                 column: column,
+                                 on_delete: on_delete)
+        end
+
+        disable_statement_timeout
+
+        key_name = "fk_#{source}_#{target}_#{column}"
+
+        # Using NOT VALID allows us to create a key without immediately
+        # validating it. This means we keep the ALTER TABLE lock only for a
+        # short period of time. The key _is_ enforced for any newly created
+        # data.
+        execute <<-EOF.strip_heredoc
+        ALTER TABLE #{source}
+        ADD CONSTRAINT #{key_name}
+        FOREIGN KEY (#{column})
+        REFERENCES #{target} (id)
+        ON DELETE #{on_delete} NOT VALID;
+        EOF
+
+        # Validate the existing constraint. This can potentially take a very
+        # long time to complete, but fortunately does not lock the source table
+        # while running.
+        execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
+      end
+
       # Long-running migrations may take more than the timeout allowed by
       # the database. Disable the session's statement timeout to ensure
       # migrations don't get killed prematurely. (PostgreSQL only)
       def disable_statement_timeout
-        ActiveRecord::Base.connection.execute('SET statement_timeout TO 0') if Database.postgresql?
+        execute('SET statement_timeout TO 0') if Database.postgresql?
       end
 
       # Updates the value of a column in batches.
diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb
index 7fd25b9e5bf..e94ca4fcfd2 100644
--- a/spec/lib/gitlab/database/migration_helpers_spec.rb
+++ b/spec/lib/gitlab/database/migration_helpers_spec.rb
@@ -12,15 +12,14 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
   describe '#add_concurrent_index' do
     context 'outside a transaction' do
       before do
-        expect(model).to receive(:transaction_open?).and_return(false)
-
-        unless Gitlab::Database.postgresql?
-          allow_any_instance_of(Gitlab::Database::MigrationHelpers).to receive(:disable_statement_timeout)
-        end
+        allow(model).to receive(:transaction_open?).and_return(false)
       end
 
       context 'using PostgreSQL' do
-        before { expect(Gitlab::Database).to receive(:postgresql?).and_return(true) }
+        before do
+          allow(Gitlab::Database).to receive(:postgresql?).and_return(true)
+          allow(model).to receive(:disable_statement_timeout)
+        end
 
         it 'creates the index concurrently' do
           expect(model).to receive(:add_index).
@@ -59,6 +58,71 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
     end
   end
 
+  describe '#add_concurrent_foreign_key' do
+    context 'inside a transaction' do
+      it 'raises an error' do
+        expect(model).to receive(:transaction_open?).and_return(true)
+
+        expect do
+          model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
+        end.to raise_error(RuntimeError)
+      end
+    end
+
+    context 'outside a transaction' do
+      before do
+        allow(model).to receive(:transaction_open?).and_return(false)
+      end
+
+      context 'using MySQL' do
+        it 'creates a regular foreign key' do
+          allow(Gitlab::Database).to receive(:mysql?).and_return(true)
+
+          expect(model).to receive(:add_foreign_key).
+            with(:projects, :users, column: :user_id, on_delete: :cascade)
+
+          model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
+        end
+      end
+
+      context 'using PostgreSQL' do
+        before do
+          allow(Gitlab::Database).to receive(:mysql?).and_return(false)
+        end
+
+        it 'creates a concurrent foreign key' do
+          expect(model).to receive(:disable_statement_timeout)
+          expect(model).to receive(:execute).ordered.with(/NOT VALID/)
+          expect(model).to receive(:execute).ordered.with(/VALIDATE CONSTRAINT/)
+
+          model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
+        end
+      end
+    end
+  end
+
+  describe '#disable_statement_timeout' do
+    context 'using PostgreSQL' do
+      it 'disables statement timeouts' do
+        expect(Gitlab::Database).to receive(:postgresql?).and_return(true)
+
+        expect(model).to receive(:execute).with('SET statement_timeout TO 0')
+
+        model.disable_statement_timeout
+      end
+    end
+
+    context 'using MySQL' do
+      it 'does nothing' do
+        expect(Gitlab::Database).to receive(:postgresql?).and_return(false)
+
+        expect(model).not_to receive(:execute)
+
+        model.disable_statement_timeout
+      end
+    end
+  end
+
   describe '#update_column_in_batches' do
     before do
       create_list(:empty_project, 5)
-- 
GitLab