diff --git a/.rubocop.yml b/.rubocop.yml index 562197300b4d4ee63c06510420f90b2dbc13434c..2d2055d76b54e38c5d461016f7310958def4ef0c 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -21,6 +21,7 @@ AllCops: - 'lib/email_validator.rb' - 'lib/gitlab/upgrader.rb' - 'lib/gitlab/seeder.rb' + - 'lib/templates/**/*' ##################### Style ################################## diff --git a/doc/development/migration_style_guide.md b/doc/development/migration_style_guide.md index 28dedf3978cd8fb41c72a06fd6114d7ddcbbc9d6..1c13b0945820e4decb1ebbff38ef85e95c15186e 100644 --- a/doc/development/migration_style_guide.md +++ b/doc/development/migration_style_guide.md @@ -8,7 +8,10 @@ In addition, having to take a server offline for a an upgrade small or big is a big burden for most organizations. For this reason it is important that your migrations are written carefully, can be applied online and adhere to the style guide below. -It's advised to have offline migrations only in major GitLab releases. +Migrations should not require GitLab installations to be taken offline unless +_absolutely_ necessary. If a migration requires downtime this should be +clearly mentioned during the review process as well as being documented in the +monthly release post. When writing your migrations, also consider that databases might have stale data or inconsistencies and guard for that. Try to make as little assumptions as possible @@ -58,6 +61,45 @@ remove_index :namespaces, column: :name if index_exists?(:namespaces, :name) If you need to add an unique index please keep in mind there is possibility of existing duplicates. If it is possible write a separate migration for handling this situation. It can be just removing or removing with overwriting all references to these duplicates depend on situation. +When adding an index make sure to use the method `add_concurrent_index` instead +of the regular `add_index` method. The `add_concurrent_index` method +automatically creates concurrent indexes when using PostgreSQL, removing the +need for downtime. To use this method you must disable transactions by calling +the method `disable_ddl_transaction!` in the body of your migration class like +so: + +``` +class MyMigration < ActiveRecord::Migration + disable_ddl_transaction! + + def change + + end +end +``` + +## Adding Columns With Default Values + +When adding columns with default values you should use the method +`add_column_with_default`. This method ensures the table is updated without +requiring downtime. This method is not reversible so you must manually define +the `up` and `down` methods in your migration class. + +For example, to add the column `foo` to the `projects` table with a default +value of `10` you'd write the following: + +``` +class MyMigration < ActiveRecord::Migration + def up + add_column_with_default(:projects, :foo, :integer, 10) + end + + def down + remove_column(:projects, :foo) + end +end +``` + ## Testing Make sure that your migration works with MySQL and PostgreSQL with data. An empty database does not guarantee that your migration is correct. @@ -89,4 +131,4 @@ select_all("SELECT name, COUNT(id) as cnt FROM tags GROUP BY name HAVING COUNT(i execute("UPDATE taggings SET tag_id = #{origin_tag_id} WHERE tag_id IN(#{duplicate_ids.join(",")})") execute("DELETE FROM tags WHERE id IN(#{duplicate_ids.join(",")})") end -``` \ No newline at end of file +``` diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb new file mode 100644 index 0000000000000000000000000000000000000000..31a87b73fbe7d78ddee38f57fda7b04ff94123ac --- /dev/null +++ b/lib/gitlab/database/migration_helpers.rb @@ -0,0 +1,134 @@ +module Gitlab + module Database + module MigrationHelpers + # Creates a new index, concurrently when supported + # + # On PostgreSQL this method creates an index concurrently, on MySQL this + # creates a regular index. + # + # Example: + # + # add_concurrent_index :users, :some_column + # + # See Rails' `add_index` for more info on the available arguments. + def add_concurrent_index(*args) + if transaction_open? + raise 'add_concurrent_index can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + if Database.postgresql? + args << { algorithm: :concurrently } + end + + add_index(*args) + end + + # Updates the value of a column in batches. + # + # This method updates the table in batches of 5% of the total row count. + # Any data inserted while running this method (or after it has finished + # running) is _not_ updated automatically. + # + # This method _only_ updates rows where the column's value is set to NULL. + # + # table - The name of the table. + # column - The name of the column to update. + # value - The value for the column. + def update_column_in_batches(table, column, value) + quoted_table = quote_table_name(table) + quoted_column = quote_column_name(column) + quoted_value = quote(value) + processed = 0 + + total = exec_query("SELECT COUNT(*) AS count FROM #{quoted_table}"). + to_hash. + first['count']. + to_i + + # Update in batches of 5% with an upper limit of 5000 rows. + batch_size = ((total / 100.0) * 5.0).ceil + + while processed < total + start_row = exec_query(%Q{ + SELECT id + FROM #{quoted_table} + ORDER BY id ASC + LIMIT 1 OFFSET #{processed} + }).to_hash.first + + stop_row = exec_query(%Q{ + SELECT id + FROM #{quoted_table} + ORDER BY id ASC + LIMIT 1 OFFSET #{processed + batch_size} + }).to_hash.first + + query = %Q{ + UPDATE #{quoted_table} + SET #{quoted_column} = #{quoted_value} + WHERE id >= #{start_row['id']} + } + + if stop_row + query += " AND id < #{stop_row['id']}" + end + + execute(query) + + processed += batch_size + end + end + + # Adds a column with a default value without locking an entire table. + # + # This method runs the following steps: + # + # 1. Add the column with a default value of NULL. + # 2. Update all existing rows in batches. + # 3. Change the default value of the column to the specified value. + # 4. Update any remaining rows. + # + # These steps ensure a column can be added to a large and commonly used + # table without locking the entire table for the duration of the table + # modification. + # + # table - The name of the table to update. + # column - The name of the column to add. + # type - The column type (e.g. `:integer`). + # default - The default value for the column. + # allow_null - When set to `true` the column will allow NULL values, the + # default is to not allow NULL values. + def add_column_with_default(table, column, type, default:, allow_null: false) + if transaction_open? + raise 'add_column_with_default can not be run inside a transaction, ' \ + 'you can disable transactions by calling disable_ddl_transaction! ' \ + 'in the body of your migration class' + end + + transaction do + add_column(table, column, type, default: nil) + + # Changing the default before the update ensures any newly inserted + # rows already use the proper default value. + change_column_default(table, column, default) + end + + begin + transaction do + update_column_in_batches(table, column, default) + end + # We want to rescue _all_ exceptions here, even those that don't inherit + # from StandardError. + rescue Exception => error # rubocop: disable all + remove_column(table, column) + + raise error + end + + change_column_null(table, column, false) unless allow_null + end + end + end +end diff --git a/lib/templates/active_record/migration/create_table_migration.rb b/lib/templates/active_record/migration/create_table_migration.rb new file mode 100644 index 0000000000000000000000000000000000000000..27acc75dcc433d9980e92871bdc035af80044c3a --- /dev/null +++ b/lib/templates/active_record/migration/create_table_migration.rb @@ -0,0 +1,35 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class <%= migration_class_name %> < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # When using the methods "add_concurrent_index" or "add_column_with_default" + # you must disable the use of transactions as these methods can not run in an + # existing transaction. When using "add_concurrent_index" make sure that this + # method is the _only_ method called in the migration, any other changes + # should go in a separate migration. This ensures that upon failure _only_ the + # index creation fails and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + # disable_ddl_transaction! + + def change + create_table :<%= table_name %> do |t| +<% attributes.each do |attribute| -%> +<% if attribute.password_digest? -%> + t.string :password_digest<%= attribute.inject_options %> +<% else -%> + t.<%= attribute.type %> :<%= attribute.name %><%= attribute.inject_options %> +<% end -%> +<% end -%> +<% if options[:timestamps] %> + t.timestamps null: false +<% end -%> + end +<% attributes_with_index.each do |attribute| -%> + add_index :<%= table_name %>, :<%= attribute.index_name %><%= attribute.inject_index_options %> +<% end -%> + end +end diff --git a/lib/templates/active_record/migration/migration.rb b/lib/templates/active_record/migration/migration.rb new file mode 100644 index 0000000000000000000000000000000000000000..06bdea113670ab16b0370f222bd02ab5fc42b3d1 --- /dev/null +++ b/lib/templates/active_record/migration/migration.rb @@ -0,0 +1,55 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class <%= migration_class_name %> < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + # When using the methods "add_concurrent_index" or "add_column_with_default" + # you must disable the use of transactions as these methods can not run in an + # existing transaction. When using "add_concurrent_index" make sure that this + # method is the _only_ method called in the migration, any other changes + # should go in a separate migration. This ensures that upon failure _only_ the + # index creation fails and can be retried or reverted easily. + # + # To disable transactions uncomment the following line and remove these + # comments: + # disable_ddl_transaction! + +<%- if migration_action == 'add' -%> + def change +<% attributes.each do |attribute| -%> + <%- if attribute.reference? -%> + add_reference :<%= table_name %>, :<%= attribute.name %><%= attribute.inject_options %> + <%- else -%> + add_column :<%= table_name %>, :<%= attribute.name %>, :<%= attribute.type %><%= attribute.inject_options %> + <%- if attribute.has_index? -%> + add_index :<%= table_name %>, :<%= attribute.index_name %><%= attribute.inject_index_options %> + <%- end -%> + <%- end -%> +<%- end -%> + end +<%- elsif migration_action == 'join' -%> + def change + create_join_table :<%= join_tables.first %>, :<%= join_tables.second %> do |t| + <%- attributes.each do |attribute| -%> + <%= '# ' unless attribute.has_index? -%>t.index <%= attribute.index_name %><%= attribute.inject_index_options %> + <%- end -%> + end + end +<%- else -%> + def change +<% attributes.each do |attribute| -%> +<%- if migration_action -%> + <%- if attribute.reference? -%> + remove_reference :<%= table_name %>, :<%= attribute.name %><%= attribute.inject_options %> + <%- else -%> + <%- if attribute.has_index? -%> + remove_index :<%= table_name %>, :<%= attribute.index_name %><%= attribute.inject_index_options %> + <%- end -%> + remove_column :<%= table_name %>, :<%= attribute.name %>, :<%= attribute.type %><%= attribute.inject_options %> + <%- end -%> +<%- end -%> +<%- end -%> + end +<%- end -%> +end diff --git a/spec/lib/gitlab/database/migration_helpers_spec.rb b/spec/lib/gitlab/database/migration_helpers_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..ec43165bb530963c0c6b642d7d27f997e336d85f --- /dev/null +++ b/spec/lib/gitlab/database/migration_helpers_spec.rb @@ -0,0 +1,124 @@ +require 'spec_helper' + +describe Gitlab::Database::MigrationHelpers, lib: true do + let(:model) do + Class.new do + include Gitlab::Database::MigrationHelpers + + def method_missing(name, *args, &block) + ActiveRecord::Base.connection.send(name, *args, &block) + end + end.new + end + + describe '#add_concurrent_index' do + context 'outside a transaction' do + before do + expect(model).to receive(:transaction_open?).and_return(false) + end + + context 'using PostgreSQL' do + it 'creates the index concurrently' do + expect(Gitlab::Database).to receive(:postgresql?).and_return(true) + + expect(model).to receive(:add_index). + with(:users, :foo, algorithm: :concurrently) + + model.add_concurrent_index(:users, :foo) + end + end + + context 'using MySQL' do + it 'creates a regular index' do + expect(Gitlab::Database).to receive(:postgresql?).and_return(false) + + expect(model).to receive(:add_index). + with(:users, :foo) + + model.add_concurrent_index(:users, :foo) + end + end + end + + context 'inside a transaction' do + it 'raises RuntimeError' do + expect(model).to receive(:transaction_open?).and_return(true) + + expect { model.add_concurrent_index(:users, :foo) }. + to raise_error(RuntimeError) + end + end + end + + describe '#update_column_in_batches' do + before do + create_list(:empty_project, 5) + end + + it 'updates all the rows in a table' do + model.update_column_in_batches(:projects, :import_error, 'foo') + + expect(Project.where(import_error: 'foo').count).to eq(5) + end + end + + describe '#add_column_with_default' do + context 'outside of a transaction' do + before do + expect(model).to receive(:transaction_open?).and_return(false) + + expect(model).to receive(:transaction).twice.and_yield + + expect(model).to receive(:add_column). + with(:projects, :foo, :integer, default: nil) + + expect(model).to receive(:change_column_default). + with(:projects, :foo, 10) + end + + it 'adds the column while allowing NULL values' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10) + + expect(model).not_to receive(:change_column_null) + + model.add_column_with_default(:projects, :foo, :integer, + default: 10, + allow_null: true) + end + + it 'adds the column while not allowing NULL values' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10) + + expect(model).to receive(:change_column_null). + with(:projects, :foo, false) + + model.add_column_with_default(:projects, :foo, :integer, default: 10) + end + + it 'removes the added column whenever updating the rows fails' do + expect(model).to receive(:update_column_in_batches). + with(:projects, :foo, 10). + and_raise(RuntimeError) + + expect(model).to receive(:remove_column). + with(:projects, :foo) + + expect do + model.add_column_with_default(:projects, :foo, :integer, default: 10) + end.to raise_error(RuntimeError) + end + end + + context 'inside a transaction' do + it 'raises RuntimeError' do + expect(model).to receive(:transaction_open?).and_return(true) + + expect do + model.add_column_with_default(:projects, :foo, :integer, default: 10) + end.to raise_error(RuntimeError) + end + end + end +end