Skip to content
Snippets Groups Projects
Commit 7bc16889 authored by Gabriel Mazetto's avatar Gabriel Mazetto
Browse files

Refactor Storage Migration

Specs were reviewed and improved to better cover the current behavior.
There was some standardization done as well to facilitate the
implementation of the rollback functionality.

StorageMigratorWorker was extracted to HashedStorage namespace were
RollbackerWorker will live one as well.
parent b88f27c8
No related branches found
No related tags found
No related merge requests found
Showing
with 147 additions and 98 deletions
Loading
Loading
@@ -34,22 +34,22 @@ module Projects
 
private
 
def move_folder!(old_disk_path, new_disk_path)
unless File.directory?(old_disk_path)
logger.info("Skipped attachments migration from '#{old_disk_path}' to '#{new_disk_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
return
def move_folder!(old_path, new_path)
unless File.directory?(old_path)
logger.info("Skipped attachments migration from '#{old_path}' to '#{new_path}', source path doesn't exist or is not a directory (PROJECT_ID=#{project.id})")
return true
end
 
if File.exist?(new_disk_path)
logger.error("Cannot migrate attachments from '#{old_disk_path}' to '#{new_disk_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_disk_path}' already exist"
if File.exist?(new_path)
logger.error("Cannot migrate attachments from '#{old_path}' to '#{new_path}', target path already exist (PROJECT_ID=#{project.id})")
raise AttachmentMigrationError, "Target path '#{new_path}' already exist"
end
 
# Create hashed storage base path folder
FileUtils.mkdir_p(File.dirname(new_disk_path))
FileUtils.mkdir_p(File.dirname(new_path))
 
FileUtils.mv(old_disk_path, new_disk_path)
logger.info("Migrated project attachments from '#{old_disk_path}' to '#{new_disk_path}' (PROJECT_ID=#{project.id})")
FileUtils.mv(old_path, new_path)
logger.info("Migrated project attachments from '#{old_path}' to '#{new_path}' (PROJECT_ID=#{project.id})")
 
true
end
Loading
Loading
Loading
Loading
@@ -14,16 +14,26 @@ module Projects
def execute
# Migrate repository from Legacy to Hashed Storage
unless project.hashed_storage?(:repository)
return unless HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute
return false unless migrate_repository
end
 
# Migrate attachments from Legacy to Hashed Storage
unless project.hashed_storage?(:attachments)
HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute
return false unless migrate_attachments
end
 
true
end
private
def migrate_repository
HashedStorage::MigrateRepositoryService.new(project, old_disk_path, logger: logger).execute
end
def migrate_attachments
HashedStorage::MigrateAttachmentsService.new(project, old_disk_path, logger: logger).execute
end
end
end
end
Loading
Loading
@@ -45,6 +45,8 @@
- github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository
 
- hashed_storage:hashed_storage_migrator
- mail_scheduler:mail_scheduler_issue_due
- mail_scheduler:mail_scheduler_notification_service
 
Loading
Loading
@@ -131,7 +133,6 @@
- repository_fork
- repository_import
- repository_remove_remote
- storage_migrator
- system_hook_push
- update_merge_requests
- upload_checksum
Loading
Loading
# frozen_string_literal: true
module HashedStorage
class MigratorWorker
include ApplicationWorker
queue_namespace :hashed_storage
# @param [Integer] start initial ID of the batch
# @param [Integer] finish last ID of the batch
def perform(start, finish)
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_migrate(start: start, finish: finish)
end
end
end
Loading
Loading
@@ -4,21 +4,25 @@ class ProjectMigrateHashedStorageWorker
include ApplicationWorker
 
LEASE_TIMEOUT = 30.seconds.to_i
LEASE_KEY_SEGMENT = 'project_migrate_hashed_storage_worker'.freeze
 
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, old_disk_path = nil)
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
uuid = lease_for(project_id).try_obtain
if uuid
::Projects::HashedStorage::MigrationService.new(project, old_disk_path || project.full_path, logger: logger).execute
project = Project.find_by(id: project_id)
return if project.nil? || project.pending_delete?
old_disk_path ||= project.disk_path
::Projects::HashedStorage::MigrationService.new(project, old_disk_path, logger: logger).execute
else
false
return false
end
rescue => ex
ensure
cancel_lease_for(project_id, uuid) if uuid
raise ex
end
# rubocop: enable CodeReuse/ActiveRecord
 
Loading
Loading
@@ -29,7 +33,8 @@ class ProjectMigrateHashedStorageWorker
private
 
def lease_key(project_id)
"project_migrate_hashed_storage_worker:#{project_id}"
# we share the same lease key for both migration and rollback so they don't run simultaneously
"#{LEASE_KEY_SEGMENT}:#{project_id}"
end
 
def cancel_lease_for(project_id, uuid)
Loading
Loading
# frozen_string_literal: true
class StorageMigratorWorker
include ApplicationWorker
# @param [Integer] start initial ID of the batch
# @param [Integer] finish last ID of the batch
# @param [String] operation the operation to be performed: ['migrate', 'rollback']
def perform(start, finish, operation = :migrate)
# when scheduling a job symbols are converted to string, we need to convert back
operation = operation.to_sym if operation
migrator = Gitlab::HashedStorage::Migrator.new
migrator.bulk_migrate(start: start, finish: finish, operation: operation)
end
end
Loading
Loading
@@ -68,7 +68,7 @@
- [background_migration, 1]
- [gcp_cluster, 1]
- [project_migrate_hashed_storage, 1]
- [storage_migrator, 1]
- [hashed_storage, 1]
- [pages_domain_verification, 1]
- [object_storage_upload, 1]
- [object_storage, 1]
Loading
Loading
Loading
Loading
@@ -13,35 +13,28 @@ module Gitlab
#
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
# @param [Symbol] operation [:migrate, :rollback]
def bulk_schedule(start:, finish:, operation: :migrate)
StorageMigratorWorker.perform_async(start, finish, operation)
def bulk_schedule(start:, finish:)
::HashedStorage::MigratorWorker.perform_async(start, finish)
end
 
# Start migration of projects from specified range
#
# Flagging a project to be migrated is a synchronous action,
# Flagging a project to be migrated is a synchronous action
# but the migration runs through async jobs
#
# @param [Integer] start first project id for the range
# @param [Integer] finish last project id for the range
# @param [Symbol] operation [:migrate, :rollback]
# rubocop: disable CodeReuse/ActiveRecord
def bulk_migrate(start:, finish:, operation: :migrate)
def bulk_migrate(start:, finish:)
projects = build_relation(start, finish)
 
projects.with_route.find_each(batch_size: BATCH_SIZE) do |project|
case operation
when :migrate
migrate(project)
when :rollback
rollback(project)
end
migrate(project)
end
end
# rubocop: enable CodeReuse/ActiveRecord
 
# Flag a project to be migrated
# Flag a project to be migrated to Hashed Storage
#
# @param [Project] project that will be migrated
def migrate(project)
Loading
Loading
Loading
Loading
@@ -37,7 +37,7 @@ namespace :gitlab do
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"
 
helper.project_id_batches do |start, finish|
storage_migrator.bulk_schedule(start: start, finish: finish, operation: :migrate)
storage_migrator.bulk_schedule(start: start, finish: finish)
 
print '.'
end
Loading
Loading
Loading
Loading
@@ -4,7 +4,7 @@ describe Gitlab::HashedStorage::Migrator do
describe '#bulk_schedule' do
it 'schedules job to StorageMigratorWorker' do
Sidekiq::Testing.fake! do
expect { subject.bulk_schedule(start: 1, finish: 5) }.to change(StorageMigratorWorker.jobs, :size).by(1)
expect { subject.bulk_schedule(start: 1, finish: 5) }.to change(HashedStorage::MigratorWorker.jobs, :size).by(1)
end
end
end
Loading
Loading
@@ -46,7 +46,7 @@ describe Gitlab::HashedStorage::Migrator do
describe '#migrate' do
let(:project) { create(:project, :legacy_storage, :empty_repo) }
 
it 'enqueues job to ProjectMigrateHashedStorageWorker' do
it 'enqueues project migration job' do
Sidekiq::Testing.fake! do
expect { subject.migrate(project) }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)
end
Loading
Loading
@@ -58,7 +58,7 @@ describe Gitlab::HashedStorage::Migrator do
expect { subject.migrate(project) }.not_to raise_error
end
 
it 'migrate project' do
it 'migrates project storage' do
perform_enqueued_jobs do
subject.migrate(project)
end
Loading
Loading
@@ -73,5 +73,19 @@ describe Gitlab::HashedStorage::Migrator do
 
expect(project.reload.repository_read_only?).to be_falsey
end
context 'when project is already on hashed storage' do
let(:project) { create(:project, :empty_repo) }
it 'doesnt enqueue any migration job' do
Sidekiq::Testing.fake! do
expect { subject.migrate(project) }.not_to change(ProjectMigrateHashedStorageWorker.jobs, :size)
end
end
it 'returns false' do
expect(subject.migrate(project)).to be_falsey
end
end
end
end
Loading
Loading
@@ -3224,7 +3224,7 @@ describe Project do
end
 
context 'legacy storage' do
let(:project) { create(:project, :repository, :legacy_storage) }
set(:project) { create(:project, :repository, :legacy_storage) }
let(:gitlab_shell) { Gitlab::Shell.new }
let(:project_storage) { project.send(:storage) }
 
Loading
Loading
@@ -3279,13 +3279,14 @@ describe Project do
end
 
describe '#migrate_to_hashed_storage!' do
let(:project) { create(:project, :empty_repo, :legacy_storage) }
it 'returns true' do
expect(project.migrate_to_hashed_storage!).to be_truthy
end
 
it 'does not validate project visibility' do
expect(project).not_to receive(:visibility_level_allowed_as_fork)
expect(project).not_to receive(:visibility_level_allowed_by_group)
it 'does not run validation' do
expect(project).not_to receive(:valid?)
 
project.migrate_to_hashed_storage!
end
Loading
Loading
@@ -3315,7 +3316,7 @@ describe Project do
end
 
context 'hashed storage' do
let(:project) { create(:project, :repository, skip_disk_validation: true) }
set(:project) { create(:project, :repository, skip_disk_validation: true) }
let(:gitlab_shell) { Gitlab::Shell.new }
let(:hash) { Digest::SHA2.hexdigest(project.id.to_s) }
let(:hashed_prefix) { File.join('@hashed', hash[0..1], hash[2..3]) }
Loading
Loading
@@ -3372,6 +3373,8 @@ describe Project do
end
 
describe '#migrate_to_hashed_storage!' do
let(:project) { create(:project, :repository, skip_disk_validation: true) }
it 'returns nil' do
expect(project.migrate_to_hashed_storage!).to be_nil
end
Loading
Loading
@@ -3381,10 +3384,12 @@ describe Project do
end
 
context 'when partially migrated' do
it 'returns true' do
it 'enqueues a job' do
project = create(:project, storage_version: 1, skip_disk_validation: true)
 
expect(project.migrate_to_hashed_storage!).to be_truthy
Sidekiq::Testing.fake! do
expect { project.migrate_to_hashed_storage! }.to change(ProjectMigrateHashedStorageWorker.jobs, :size).by(1)
end
end
end
end
Loading
Loading
Loading
Loading
@@ -3,7 +3,7 @@ require 'spec_helper'
describe Projects::HashedStorage::MigrateAttachmentsService do
subject(:service) { described_class.new(project, project.full_path, logger: nil) }
 
let(:project) { create(:project, :legacy_storage) }
let(:project) { create(:project, :repository, storage_version: 1, skip_disk_validation: true) }
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
 
Loading
Loading
@@ -28,6 +28,10 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
expect(File.file?(old_disk_path)).to be_falsey
expect(File.file?(new_disk_path)).to be_truthy
end
it 'returns true' do
expect(service.execute).to be_truthy
end
end
 
context 'when original folder does not exist anymore' do
Loading
Loading
@@ -43,6 +47,12 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
expect(File.exist?(base_path(hashed_storage))).to be_falsey
expect(File.file?(new_disk_path)).to be_falsey
end
it 'returns true' do
expect(FileUtils).not_to receive(:mv).with(base_path(legacy_storage), base_path(hashed_storage))
expect(service.execute).to be_truthy
end
end
 
context 'when target folder already exists' do
Loading
Loading
@@ -58,6 +68,18 @@ describe Projects::HashedStorage::MigrateAttachmentsService do
end
end
 
context '#old_disk_path' do
it 'returns old disk_path for project' do
expect(service.old_disk_path).to eq(project.full_path)
end
end
context '#new_disk_path' do
it 'returns new disk_path for project' do
expect(service.new_disk_path).to eq(project.disk_path)
end
end
def base_path(storage)
File.join(FileUploader.root, storage.disk_path)
end
Loading
Loading
Loading
Loading
@@ -8,9 +8,12 @@ describe Projects::HashedStorage::MigrateRepositoryService do
let(:legacy_storage) { Storage::LegacyProject.new(project) }
let(:hashed_storage) { Storage::HashedProject.new(project) }
 
subject(:service) { described_class.new(project, project.full_path) }
subject(:service) { described_class.new(project, project.disk_path) }
 
describe '#execute' do
let(:old_disk_path) { legacy_storage.disk_path }
let(:new_disk_path) { hashed_storage.disk_path }
before do
allow(service).to receive(:gitlab_shell) { gitlab_shell }
end
Loading
Loading
@@ -33,8 +36,8 @@ describe Projects::HashedStorage::MigrateRepositoryService do
it 'renames project and wiki repositories' do
service.execute
 
expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.git")).to be_truthy
expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.wiki.git")).to be_truthy
expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.git")).to be_truthy
expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.wiki.git")).to be_truthy
end
 
it 'updates project to be hashed and not read-only' do
Loading
Loading
@@ -45,8 +48,8 @@ describe Projects::HashedStorage::MigrateRepositoryService do
end
 
it 'move operation is called for both repositories' do
expect_move_repository(project.disk_path, hashed_storage.disk_path)
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
expect_move_repository(old_disk_path, new_disk_path)
expect_move_repository("#{old_disk_path}.wiki", "#{new_disk_path}.wiki")
 
service.execute
end
Loading
Loading
@@ -62,32 +65,27 @@ describe Projects::HashedStorage::MigrateRepositoryService do
 
context 'when one move fails' do
it 'rollsback repositories to original name' do
from_name = project.disk_path
to_name = hashed_storage.disk_path
allow(service).to receive(:move_repository).and_call_original
allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only
allow(service).to receive(:move_repository).with(old_disk_path, new_disk_path).once { false } # will disable first move only
 
expect(service).to receive(:rollback_folder_move).and_call_original
 
service.execute
 
expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.git")).to be_falsey
expect(gitlab_shell.exists?(project.repository_storage, "#{hashed_storage.disk_path}.wiki.git")).to be_falsey
expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.git")).to be_falsey
expect(gitlab_shell.exists?(project.repository_storage, "#{new_disk_path}.wiki.git")).to be_falsey
expect(project.repository_read_only?).to be_falsey
end
 
context 'when rollback fails' do
let(:from_name) { legacy_storage.disk_path }
let(:to_name) { hashed_storage.disk_path }
before do
hashed_storage.ensure_storage_path_exists
gitlab_shell.mv_repository(project.repository_storage, from_name, to_name)
gitlab_shell.mv_repository(project.repository_storage, old_disk_path, new_disk_path)
end
 
it 'does not try to move nil repository over hashed' do
expect(gitlab_shell).not_to receive(:mv_repository).with(project.repository_storage, from_name, to_name)
expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki")
it 'does not try to move nil repository over existing' do
expect(gitlab_shell).not_to receive(:mv_repository).with(project.repository_storage, old_disk_path, new_disk_path)
expect_move_repository("#{old_disk_path}.wiki", "#{new_disk_path}.wiki")
 
service.execute
end
Loading
Loading
Loading
Loading
@@ -58,7 +58,7 @@ describe 'rake gitlab:storage:*' do
 
context '0 legacy projects' do
it 'does nothing' do
expect(StorageMigratorWorker).not_to receive(:perform_async)
expect(::HashedStorage::MigratorWorker).not_to receive(:perform_async)
 
run_rake_task(task)
end
Loading
Loading
@@ -72,9 +72,9 @@ describe 'rake gitlab:storage:*' do
stub_env('BATCH' => 1)
end
 
it 'enqueues one StorageMigratorWorker per project' do
it 'enqueues one HashedStorage::MigratorWorker per project' do
projects.each do |project|
expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id, :migrate)
expect(::HashedStorage::MigratorWorker).to receive(:perform_async).with(project.id, project.id)
end
 
run_rake_task(task)
Loading
Loading
@@ -86,10 +86,10 @@ describe 'rake gitlab:storage:*' do
stub_env('BATCH' => 2)
end
 
it 'enqueues one StorageMigratorWorker per 2 projects' do
it 'enqueues one HashedStorage::MigratorWorker per 2 projects' do
projects.map(&:id).sort.each_slice(2) do |first, last|
last ||= first
expect(StorageMigratorWorker).to receive(:perform_async).with(first, last, :migrate)
expect(::HashedStorage::MigratorWorker).to receive(:perform_async).with(first, last)
end
 
run_rake_task(task)
Loading
Loading
require 'spec_helper'
 
describe StorageMigratorWorker do
describe HashedStorage::MigratorWorker do
subject(:worker) { described_class.new }
let(:projects) { create_list(:project, 2, :legacy_storage, :empty_repo) }
let(:ids) { projects.map(&:id) }
 
describe '#perform' do
it 'delegates to MigratorService' do
expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(start: 5, finish: 10, operation: :migrate)
expect_any_instance_of(Gitlab::HashedStorage::Migrator).to receive(:bulk_migrate).with(start: 5, finish: 10)
 
worker.perform(5, 10)
end
Loading
Loading
Loading
Loading
@@ -4,12 +4,13 @@ describe ProjectMigrateHashedStorageWorker, :clean_gitlab_redis_shared_state do
include ExclusiveLeaseHelpers
 
describe '#perform' do
let(:project) { create(:project, :empty_repo) }
let(:project) { create(:project, :empty_repo, :legacy_storage) }
let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" }
let(:lease_timeout) { ProjectMigrateHashedStorageWorker::LEASE_TIMEOUT }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
 
it 'skips when project no longer exists' do
expect(::Projects::HashedStorage::MigrationService).not_to receive(:new)
expect(migration_service).not_to receive(:new)
 
subject.perform(-1)
end
Loading
Loading
@@ -17,29 +18,29 @@ describe ProjectMigrateHashedStorageWorker, :clean_gitlab_redis_shared_state do
it 'skips when project is pending delete' do
pending_delete_project = create(:project, :empty_repo, pending_delete: true)
 
expect(::Projects::HashedStorage::MigrationService).not_to receive(:new)
expect(migration_service).not_to receive(:new)
 
subject.perform(pending_delete_project.id)
end
 
it 'delegates removal to service class when have exclusive lease' do
it 'delegates migration to service class when we have exclusive lease' do
stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout)
 
migration_service = spy
service_spy = spy
 
allow(::Projects::HashedStorage::MigrationService)
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(migration_service)
.and_return(service_spy)
 
subject.perform(project.id)
 
expect(migration_service).to have_received(:execute)
expect(service_spy).to have_received(:execute)
end
 
it 'skips when dont have lease when dont have exclusive lease' do
it 'skips when it cant acquire the exclusive lease' do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout)
 
expect(::Projects::HashedStorage::MigrationService).not_to receive(:new)
expect(migration_service).not_to receive(:new)
 
subject.perform(project.id)
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment