diff --git a/app/models/project.rb b/app/models/project.rb index 2d4cec34a118a57bfaa008649dd8b4ec29f565ee..6098e5ca1411d92bc588ca9f1471640da960bd37 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -249,6 +249,9 @@ class Project < ActiveRecord::Base scope :pending_delete, -> { where(pending_delete: true) } scope :without_deleted, -> { where(pending_delete: false) } + scope :with_hashed_storage, -> { where('storage_version >= 1') } + scope :with_legacy_storage, -> { where(storage_version: [nil, 0]) } + scope :sorted_by_activity, -> { reorder(last_activity_at: :desc) } scope :sorted_by_stars, -> { reorder('projects.star_count DESC') } @@ -1550,18 +1553,44 @@ def forks_count end def legacy_storage? - self.storage_version.nil? + [nil, 0].include?(self.storage_version) + end + + def hashed_storage? + self.storage_version && self.storage_version >= 1 end def renamed? persisted? && path_changed? end + def migrate_to_hashed_storage! + return if hashed_storage? + + update!(repository_read_only: true) + + if repo_reference_count > 0 || wiki_reference_count > 0 + ProjectMigrateHashedStorageWorker.perform_in(Gitlab::ReferenceCounter::REFERENCE_EXPIRE_TIME, id) + else + ProjectMigrateHashedStorageWorker.perform_async(id) + end + end + + def storage_version=(value) + super + + @storage = nil if storage_version_changed? + end + + def gl_repository(is_wiki:) + Gitlab::GlRepository.gl_repository(self, is_wiki) + end + private def storage @storage ||= - if self.storage_version && self.storage_version >= 1 + if hashed_storage? Storage::HashedProject.new(self) else Storage::LegacyProject.new(self) @@ -1574,6 +1603,14 @@ def use_hashed_storage end end + def repo_reference_count + Gitlab::ReferenceCounter.new(gl_repository(is_wiki: false)).value + end + + def wiki_reference_count + Gitlab::ReferenceCounter.new(gl_repository(is_wiki: true)).value + end + # set last_activity_at to the same as created_at def set_last_activity_at update_column(:last_activity_at, self.created_at) diff --git a/app/models/storage/hashed_project.rb b/app/models/storage/hashed_project.rb index fae1b64961af66dc324ea76c5392496cb859a6a1..f025f40994eaea3c52885c5cfbaf4ec32f705689 100644 --- a/app/models/storage/hashed_project.rb +++ b/app/models/storage/hashed_project.rb @@ -4,6 +4,7 @@ class HashedProject delegate :gitlab_shell, :repository_storage_path, to: :project ROOT_PATH_PREFIX = '@hashed'.freeze + STORAGE_VERSION = 1 def initialize(project) @project = project diff --git a/app/services/geo/repository_renamed_event_store.rb b/app/services/geo/repository_renamed_event_store.rb index 93c0737a7bbce3a7a5ecb12e6d5387bf7ee561cf..c5dc5ec78e70a6edcb7c31322ecee1bc4c857c70 100644 --- a/app/services/geo/repository_renamed_event_store.rb +++ b/app/services/geo/repository_renamed_event_store.rb @@ -10,14 +10,18 @@ def build_event repository_storage_name: project.repository.storage, repository_storage_path: project.repository_storage_path, old_path_with_namespace: old_path_with_namespace, - new_path_with_namespace: project.full_path, + new_path_with_namespace: project.disk_path, old_wiki_path_with_namespace: old_wiki_path_with_namespace, new_wiki_path_with_namespace: new_wiki_path_with_namespace, - old_path: params.fetch(:old_path), + old_path: old_path, new_path: project.path ) end + def old_path + params.fetch(:old_path) + end + def old_path_with_namespace params.fetch(:old_path_with_namespace) end @@ -27,7 +31,7 @@ def old_wiki_path_with_namespace end def new_wiki_path_with_namespace - project.wiki.full_path + "#{project.disk_path}.wiki" end end end diff --git a/app/services/projects/hashed_storage_migration_service.rb b/app/services/projects/hashed_storage_migration_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..4445560c6b42c508caf8d0189704a51a6cb6a110 --- /dev/null +++ b/app/services/projects/hashed_storage_migration_service.rb @@ -0,0 +1,70 @@ +module Projects + class HashedStorageMigrationService < BaseService + include Gitlab::ShellAdapter + + prepend ::EE::Projects::HashedStorageMigrationService + + attr_reader :old_disk_path, :new_disk_path + + def initialize(project, logger = nil) + @project = project + @logger ||= Rails.logger + end + + def execute + return if project.hashed_storage? + + @old_disk_path = project.disk_path + has_wiki = project.wiki.repository_exists? + + project.storage_version = Storage::HashedProject::STORAGE_VERSION + project.ensure_storage_path_exists + + @new_disk_path = project.disk_path + + result = move_repository(@old_disk_path, @new_disk_path) + + if has_wiki + result &&= move_repository("#{@old_disk_path}.wiki", "#{@new_disk_path}.wiki") + end + + unless result + rollback_folder_move + return + end + + project.repository_read_only = false + project.save! + + block_given? ? yield : result + end + + private + + def move_repository(from_name, to_name) + from_exists = gitlab_shell.exists?(project.repository_storage_path, "#{from_name}.git") + to_exists = gitlab_shell.exists?(project.repository_storage_path, "#{to_name}.git") + + # If we don't find the repository on either original or target we should log that as it could be an issue if the + # project was not originally empty. + if !from_exists && !to_exists + logger.warn "Can't find a repository on either source or target paths for #{project.full_path} (ID=#{project.id}) ..." + return false + elsif !from_exists + # Repository have been moved already. + return true + end + + gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name) + end + + def rollback_folder_move + move_repository(@new_disk_path, @old_disk_path) + move_repository("#{@new_disk_path}.wiki", "#{@old_disk_path}.wiki") + end + + def logger + @logger + end + end +end diff --git a/app/workers/project_migrate_hashed_storage_worker.rb b/app/workers/project_migrate_hashed_storage_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..ca276d7801cfaa28a493b3d3bdf3b7d3d5d3f5f3 --- /dev/null +++ b/app/workers/project_migrate_hashed_storage_worker.rb @@ -0,0 +1,11 @@ +class ProjectMigrateHashedStorageWorker + include Sidekiq::Worker + include DedicatedSidekiqQueue + + def perform(project_id) + project = Project.find_by(id: project_id) + return if project.nil? || project.pending_delete? + + ::Projects::HashedStorageMigrationService.new(project, logger).execute + end +end diff --git a/app/workers/storage_migrator_worker.rb b/app/workers/storage_migrator_worker.rb new file mode 100644 index 0000000000000000000000000000000000000000..b48ead799b91e78f2f3a604222bdc3042d6ab62a --- /dev/null +++ b/app/workers/storage_migrator_worker.rb @@ -0,0 +1,30 @@ +class StorageMigratorWorker + include Sidekiq::Worker + include DedicatedSidekiqQueue + + BATCH_SIZE = 100 + + def perform(start, finish) + projects = build_relation(start, finish) + + projects.with_route.find_each(batch_size: BATCH_SIZE) do |project| + Rails.logger.info "Starting storage migration of #{project.full_path} (ID=#{project.id})..." + + begin + project.migrate_to_hashed_storage! + rescue => err + Rails.logger.error("#{err.message} migrating storage of #{project.full_path} (ID=#{project.id}), trace - #{err.backtrace}") + end + end + end + + def build_relation(start, finish) + relation = Project + table = Project.arel_table + + relation = relation.where(table[:id].gteq(start)) if start + relation = relation.where(table[:id].lteq(finish)) if finish + + relation + end +end diff --git a/changelogs/unreleased/hashed-storage-migration-path.yml b/changelogs/unreleased/hashed-storage-migration-path.yml new file mode 100644 index 0000000000000000000000000000000000000000..5890eb09c38398b114de81d00175b7857f622eb9 --- /dev/null +++ b/changelogs/unreleased/hashed-storage-migration-path.yml @@ -0,0 +1,5 @@ +--- +title: Script to migrate project's repositories to new Hashed Storage +merge_request: 14067 +author: +type: added diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index b453fd9d47d3f777ae30bef532dc88b577b1cf22..3ad9a580c39ca65f32b0c73e029c014d336aaaf7 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -62,6 +62,8 @@ - [update_user_activity, 1] - [propagate_service_template, 1] - [background_migration, 1] + - [project_migrate_hashed_storage, 1] + - [storage_migrator, 1] # EE specific queues - [ldap_group_sync, 2] - [geo, 1] diff --git a/doc/administration/raketasks/storage.md b/doc/administration/raketasks/storage.md new file mode 100644 index 0000000000000000000000000000000000000000..bac8fa4bd9dd685fe6f7b12e8fc7735e02bbeee3 --- /dev/null +++ b/doc/administration/raketasks/storage.md @@ -0,0 +1,107 @@ +# Repository Storage Rake Tasks + +This is a collection of rake tasks you can use to help you list and migrate +existing projects from Legacy storage to the new Hashed storage type. + +You can read more about the storage types [here][storage-types]. + +## List projects on Legacy storage + +To have a simple summary of projects using **Legacy** storage: + +**Omnibus Installation** + +```bash +gitlab-rake gitlab:storage:legacy_projects +``` + +**Source Installation** + +```bash +rake gitlab:storage:legacy_projects + +``` + +------ + +To list projects using **Legacy** storage: + +**Omnibus Installation** + +```bash +gitlab-rake gitlab:storage:list_legacy_projects +``` + +**Source Installation** + +```bash +rake gitlab:storage:list_legacy_projects + +``` + +## List projects on Hashed storage + +To have a simple summary of projects using **Hashed** storage: + +**Omnibus Installation** + +```bash +gitlab-rake gitlab:storage:hashed_projects +``` + +**Source Installation** + +```bash +rake gitlab:storage:hashed_projects + +``` + +------ + +To list projects using **Hashed** storage: + +**Omnibus Installation** + +```bash +gitlab-rake gitlab:storage:list_hashed_projects +``` + +**Source Installation** + +```bash +rake gitlab:storage:list_hashed_projects + +``` + +## Migrate existing projects to Hashed storage + +Before migrating your existing projects, you should +[enable hashed storage][storage-migration] for the new projects as well. + +This task will schedule all your existing projects to be migrated to the +**Hashed** storage type: + +**Omnibus Installation** + +```bash +gitlab-rake gitlab:storage:migrate_to_hashed +``` + +**Source Installation** + +```bash +rake gitlab:storage:migrate_to_hashed + +``` + +You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen. +There is a specific Queue you can watch to see how long it will take to finish: **project_migrate_hashed_storage** + +After it reaches zero, you can confirm every project has been migrated by running the commands above. +If you find it necessary, you can run this migration script again to schedule missing projects. + +Any error or warning will be logged in the sidekiq log file. + + +[storage-types]: ../repository_storage_types.md +[storage-migration]: ../repository_storage_types.md#how-to-migrate-to-hashed-storage diff --git a/doc/administration/repository_storage_types.md b/doc/administration/repository_storage_types.md new file mode 100644 index 0000000000000000000000000000000000000000..fa882bbe28a768f07dc26c1f6e2d0cbfac342a21 --- /dev/null +++ b/doc/administration/repository_storage_types.md @@ -0,0 +1,69 @@ +# Repository Storage Types + +> [Introduced][ce-28283] in GitLab 10.0. + +## Legacy Storage + +Legacy Storage is the storage behavior prior to version 10.0. For historical reasons, GitLab replicated the same +mapping structure from the projects URLs: + + * Project's repository: `#{namespace}/#{project_name}.git` + * Project's wiki: `#{namespace}/#{project_name}.wiki.git` + +This structure made simple to migrate from existing solutions to GitLab and easy for Administrators to find where the +repository is stored. + +On the other hand this has some drawbacks: + +Storage location will concentrate huge amount of top-level namespaces. The impact can be reduced by the introduction of [multiple storage paths][storage-paths]. + +Because Backups are a snapshot of the same URL mapping, if you try to recover a very old backup, you need to verify +if any project has taken the place of an old removed project sharing the same URL. This means that `mygroup/myproject` +from your backup may not be the same original project that is today in the same URL. + +Any change in the URL will need to be reflected on disk (when groups / users or projects are renamed). This can add a lot +of load in big installations, and can be even worst if they are using any type of network based filesystem. + +Last, for GitLab Geo, this storage type means we have to synchronize the disk state, replicate renames in the correct +order or we may end-up with wrong repository or missing data temporarily. + +## Hashed Storage + +Hashed Storage is the new storage behavior we are rolling out with 10.0. It's not enabled by default yet, but we +encourage everyone to try-it and take the time to fix any script you may have that depends on the old behavior. + +Instead of coupling project URL and the folder structure where the repository will be stored on disk, we are coupling +a hash, based on the project's ID. + +This makes the folder structure immutable, and therefore eliminates any requirement to synchronize state from URLs to +disk structure. This means that renaming a group, user or project will cost only the database transaction, and will take +effect immediately. + +The hash also helps to spread the repositories more evenly on the disk, so the top-level directory will contain less +folders than the total amount of top-level namespaces. + +Hash format is based on hexadecimal representation of SHA256: `SHA256(project.id)`. +Top-level folder uses first 2 characters, followed by another folder with the next 2 characters. They are both stored in +a special folder `@hashed`, to co-exist with existing Legacy projects: + +```ruby +# Project's repository: +"@hashed/#{hash[0..1]}/#{hash[2..3]}/#{hash}.git" + +# Wiki's repository: +"@hashed/#{hash[0..1]}/#{hash[2..3]}/#{hash}.wiki.git" +``` + +This new format also makes possible to restore backups with confidence, as when restoring a repository from the backup, +you will never mistakenly restore a repository in the wrong project (considering the backup is made after the migration). + +### How to migrate to Hashed Storage + +In GitLab, go to **Admin > Settings**, find the **Repository Storage** section and select +"_Create new projects using hashed storage paths_". + +To migrate your existing projects to the new storage type, check the specific [rake tasks]. + +[ce-28283]: https://gitlab.com/gitlab-org/gitlab-ce/issues/28283 +[rake tasks]: raketasks/storage.md#migrate-existing-projects-to-hashed-storage +[storage-paths]: repository_storage_types.md diff --git a/doc/gitlab-geo/README.md b/doc/gitlab-geo/README.md index eeb85d023e8e3541a2c2b641b0318446e34d435a..3c07478c78c03a787c492b0fe03f74aef7a06320 100644 --- a/doc/gitlab-geo/README.md +++ b/doc/gitlab-geo/README.md @@ -9,7 +9,7 @@ and there is significant chance of data loss. For the latest updates, check the > **Notes:** - GitLab Geo is part of [GitLab Enterprise Edition Premium][ee]. - Introduced in GitLab Enterprise Edition 8.9. - We recommend you use it with at least GitLab Enterprise Edition 8.14 for + We recommend you use it with at least GitLab Enterprise Edition 10.0 for basic Geo features, or latest version for a better experience. - You should make sure that all nodes run the same GitLab version. diff --git a/doc/gitlab-geo/configuration.md b/doc/gitlab-geo/configuration.md index 32228bcd3be06c63187e3590eb9a86720a94db78..eb72aa2012b18cc64bcae79081e257a84319f856 100644 --- a/doc/gitlab-geo/configuration.md +++ b/doc/gitlab-geo/configuration.md @@ -157,7 +157,19 @@ Regenerate the keys for `~/.ssh/authorized_keys` This will enable `git` operations to authorize against your existing users. New users and SSH keys updated after this step, will be replicated automatically. -### Step 5. Enabling the secondary GitLab node +### Step 5. Enabling hashed storage (from GitLab 10.0) + +1. Visit the **primary** node's **Admin Area âž” Settings** + (`/admin/application_settings`) in your browser +1. In the `Repository Storages` section, check `Create new projects using hashed storage paths`: + + ![](img/hashed-storage.png) + +Using hashed storage significantly improves Geo replication - project and group +renames no longer require synchronization between nodes - so we recommend it is +used for all GitLab Geo installations. + +### Step 6. Enabling the secondary GitLab node 1. SSH into the **secondary** node and login as root: @@ -194,7 +206,7 @@ The two most obvious issues that replication can have here are: [Troubleshooting](#troubleshooting) section) - Instance is firewalled (check your firewall rules) -### Step 6. Replicating the repositories data +### Step 7. Replicating the repositories data Getting a new secondary Geo node up and running, will also require the repositories data to be synced. @@ -284,7 +296,7 @@ namespaces to be replicated. ## Adding another secondary Geo node To add another Geo node in an already Geo configured infrastructure, just follow -[the steps starting form step 2](#step-2-updating-the-known_hosts-file-of-the-secondary-nodes). +[the steps starting from step 2](#step-2-updating-the-known_hosts-file-of-the-secondary-nodes). Just omit the first step that sets up the primary node. ## Additional information for the SSH key pairs @@ -300,6 +312,18 @@ not create them manually. ### Upgrading Geo +#### Upgrading to GitLab 10.1 + +[Hashed storage](../administration/repository_storage_types.md) was introduced +in GitLab 10.0, and a [migration path](../administration/raketasks/storage.md) +for existing repositories was added in GitLab 10.1. + +After upgrading to GitLab 10.1, we recommend that you +[enable hashed storage for all new projects](#step-5-enabling-hashed-storage-from-gitlab-100), +then [migrate existing projects to hashed storage](../administration/raketasks/storage.md). +This will significantly reduce the amount of synchronization required between +nodes in the event of project or group renames. + #### Upgrading to GitLab 10.0 Since GitLab 10.0, we require all **Geo** systems to [use SSH key lookups via diff --git a/doc/gitlab-geo/configuration_source.md b/doc/gitlab-geo/configuration_source.md index 11493a6049a079ce1782d124f78dd2f6e43c6c0f..19f83ea2338a66b619f3df3eef995cabc06542af 100644 --- a/doc/gitlab-geo/configuration_source.md +++ b/doc/gitlab-geo/configuration_source.md @@ -150,7 +150,20 @@ Regenerate the keys for `~/.ssh/authorized_keys` This will enable `git` operations to authorize against your existing users. New users and SSH keys updated after this step, will be replicated automatically. -### Step 5. Enabling the secondary GitLab node +### Step 5. Enabling hashed storage (from GitLab 10.0) + +1. Visit the **primary** node's **Admin Area âž” Settings** + (`/admin/application_settings`) in your browser +1. In the `Repository Storages` section, check `Create new projects using hashed storage paths`: + + ![](img/hashed-storage.png) + +Using hashed storage significantly improves Geo replication - project and group +renames no longer require synchronization between nodes - so we recommend it is +used for all GitLab Geo installations. + + +### Step 6. Enabling the secondary GitLab node 1. SSH into the **secondary** node and login as root: @@ -195,7 +208,7 @@ The two most obvious issues that replication can have here are: [Troubleshooting](configuration.md#troubleshooting) section) - Instance is firewalled (check your firewall rules) -### Step 6. Replicating the repositories data +### Step 7. Replicating the repositories data Getting a new secondary Geo node up and running, will also require the repositories data to be synced. diff --git a/doc/gitlab-geo/img/hashed-storage.png b/doc/gitlab-geo/img/hashed-storage.png new file mode 100644 index 0000000000000000000000000000000000000000..3afc9443395efb48034bddc1ee5c9b01ff0889b7 Binary files /dev/null and b/doc/gitlab-geo/img/hashed-storage.png differ diff --git a/ee/app/services/ee/projects/hashed_storage_migration_service.rb b/ee/app/services/ee/projects/hashed_storage_migration_service.rb new file mode 100644 index 0000000000000000000000000000000000000000..a91c023f541938875c58eb10208e6d77b19e72a7 --- /dev/null +++ b/ee/app/services/ee/projects/hashed_storage_migration_service.rb @@ -0,0 +1,17 @@ +module EE + module Projects + module HashedStorageMigrationService + def execute + raise NotImplementedError.new unless defined?(super) + + super do + ::Geo::RepositoryRenamedEventStore.new( + project, + old_path: File.basename(old_disk_path), + old_path_with_namespace: old_disk_path + ).create + end + end + end + end +end diff --git a/lib/tasks/gitlab/storage.rake b/lib/tasks/gitlab/storage.rake new file mode 100644 index 0000000000000000000000000000000000000000..e05be4a34053a27ed6d78fab89993964b5530760 --- /dev/null +++ b/lib/tasks/gitlab/storage.rake @@ -0,0 +1,85 @@ +namespace :gitlab do + namespace :storage do + desc 'GitLab | Storage | Migrate existing projects to Hashed Storage' + task migrate_to_hashed: :environment do + legacy_projects_count = Project.with_legacy_storage.count + + if legacy_projects_count == 0 + puts 'There are no projects using legacy storage. Nothing to do!' + + next + end + + print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{batch_size}" + + project_id_batches do |start, finish| + StorageMigratorWorker.perform_async(start, finish) + + print '.' + end + + puts ' Done!' + end + + desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage' + task legacy_projects: :environment do + projects_summary(Project.with_legacy_storage) + end + + desc 'Gitlab | Storage | List existing projects using Legacy Storage' + task list_legacy_projects: :environment do + projects_list(Project.with_legacy_storage) + end + + desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage' + task hashed_projects: :environment do + projects_summary(Project.with_hashed_storage) + end + + desc 'Gitlab | Storage | List existing projects using Hashed Storage' + task list_hashed_projects: :environment do + projects_list(Project.with_hashed_storage) + end + + def batch_size + ENV.fetch('BATCH', 200).to_i + end + + def project_id_batches(&block) + Project.with_legacy_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches + ids = relation.pluck(:id) + + yield ids.min, ids.max + end + end + + def projects_summary(relation) + projects_count = relation.count + puts "* Found #{projects_count} projects".color(:green) + + projects_count + end + + def projects_list(relation) + projects_count = projects_summary(relation) + + projects = relation.with_route + limit = ENV.fetch('LIMIT', 500).to_i + + return unless projects_count > 0 + + puts " ! Displaying first #{limit} projects..." if projects_count > limit + + counter = 0 + projects.find_in_batches(batch_size: batch_size) do |batch| + batch.each do |project| + counter += 1 + + puts " - #{project.full_path} (id: #{project.id})".color(:red) + + return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator + end + end + end + end +end diff --git a/spec/ee/spec/services/ee/projects/hashed_storage_migration_service_spec.rb b/spec/ee/spec/services/ee/projects/hashed_storage_migration_service_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..6b8ac6a635af54cf3080238a0ddca39e4b9f3d57 --- /dev/null +++ b/spec/ee/spec/services/ee/projects/hashed_storage_migration_service_spec.rb @@ -0,0 +1,39 @@ +require 'spec_helper' + +describe Projects::HashedStorageMigrationService do + let(:project) { create(:project, :empty_repo, :wiki_repo) } + let(:service) { described_class.new(project) } + let(:legacy_storage) { Storage::LegacyProject.new(project) } + let(:hashed_storage) { Storage::HashedProject.new(project) } + + describe '#execute' do + it 'creates a Geo::RepositoryRenamedEvent on success' do + allow(Gitlab::Geo).to receive(:primary?).and_return(true) + + expect { service.execute }.to change { Geo::EventLog.count }.by(1) + + event = Geo::EventLog.first.event + + expect(event).to be_a(Geo::RepositoryRenamedEvent) + expect(event).to have_attributes( + old_path: project.path, + new_path: project.path, + old_path_with_namespace: legacy_storage.disk_path, + new_path_with_namespace: hashed_storage.disk_path, + old_wiki_path_with_namespace: legacy_storage.disk_path + '.wiki', + new_wiki_path_with_namespace: hashed_storage.disk_path + '.wiki' + ) + end + + it 'does not create a Geo event on failure' do + from_name = project.disk_path + to_name = hashed_storage.disk_path + + allow(service).to receive(:move_repository).and_call_original + allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only + + allow(Gitlab::Geo).to receive(:primary?).and_return(true) + expect { service.execute }.not_to change { Geo::EventLog.count } + end + end +end diff --git a/spec/factories/projects.rb b/spec/factories/projects.rb index e88abed0fb15db51c1240035efe5176ad18b01ce..61b1426e68151188f3da9e61222c691fa6eab27c 100644 --- a/spec/factories/projects.rb +++ b/spec/factories/projects.rb @@ -168,7 +168,13 @@ end end - trait :read_only_repository do + trait :wiki_repo do + after(:create) do |project| + raise 'Failed to create wiki repository!' unless project.create_wiki + end + end + + trait :readonly do repository_read_only true end diff --git a/spec/lib/gitlab/git_access_spec.rb b/spec/lib/gitlab/git_access_spec.rb index 5c9e1179b78bfc8592a430fd17fe84949044323e..95d0c86205989ede3e71f3b13b966d3795803e84 100644 --- a/spec/lib/gitlab/git_access_spec.rb +++ b/spec/lib/gitlab/git_access_spec.rb @@ -944,7 +944,7 @@ def self.run_group_permission_checks(permissions_matrix) end context 'when the repository is read only' do - let(:project) { create(:project, :repository, :read_only_repository) } + let(:project) { create(:project, :repository, :readonly) } it 'denies push access' do project.add_master(user) diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb index ef6e04c4f206da51b59f0468f0d45f18863f616b..0eaeaeb4102c9ba1e5ddb1eeb585ca6666036640 100644 --- a/spec/models/project_spec.rb +++ b/spec/models/project_spec.rb @@ -2842,10 +2842,22 @@ def enable_lfs describe '#legacy_storage?' do it 'returns true when storage_version is nil' do - project = build(:project) + project = build(:project, storage_version: nil) expect(project.legacy_storage?).to be_truthy end + + it 'returns true when the storage_version is 0' do + project = build(:project, storage_version: 0) + + expect(project.legacy_storage?).to be_truthy + end + end + + describe '#hashed_storage?' do + it 'returns false' do + expect(project.hashed_storage?).to be_falsey + end end describe '#rename_repo' do @@ -2904,6 +2916,38 @@ def enable_lfs expect(project.pages_path).to eq(File.join(Settings.pages.path, project.namespace.full_path, project.path)) end end + + describe '#migrate_to_hashed_storage!' do + it 'returns true' do + expect(project.migrate_to_hashed_storage!).to be_truthy + end + + it 'flags as readonly' do + expect { project.migrate_to_hashed_storage! }.to change { project.repository_read_only }.to(true) + end + + it 'schedules ProjectMigrateHashedStorageWorker with delayed start when the project repo is in use' do + Gitlab::ReferenceCounter.new(project.gl_repository(is_wiki: false)).increase + + expect(ProjectMigrateHashedStorageWorker).to receive(:perform_in) + + project.migrate_to_hashed_storage! + end + + it 'schedules ProjectMigrateHashedStorageWorker with delayed start when the wiki repo is in use' do + Gitlab::ReferenceCounter.new(project.gl_repository(is_wiki: true)).increase + + expect(ProjectMigrateHashedStorageWorker).to receive(:perform_in) + + project.migrate_to_hashed_storage! + end + + it 'schedules ProjectMigrateHashedStorageWorker' do + expect(ProjectMigrateHashedStorageWorker).to receive(:perform_async).with(project.id) + + project.migrate_to_hashed_storage! + end + end end context 'hashed storage' do @@ -2917,6 +2961,18 @@ def enable_lfs allow(project).to receive(:gitlab_shell).and_return(gitlab_shell) end + describe '#legacy_storage?' do + it 'returns false' do + expect(project.legacy_storage?).to be_falsey + end + end + + describe '#hashed_storage?' do + it 'returns true' do + expect(project.hashed_storage?).to be_truthy + end + end + describe '#base_dir' do it 'returns base_dir based on hash of project id' do expect(project.base_dir).to eq('@hashed/6b/86') @@ -2987,6 +3043,26 @@ def enable_lfs expect(project.pages_path).to eq(File.join(Settings.pages.path, project.namespace.full_path, project.path)) end end + + describe '#migrate_to_hashed_storage!' do + it 'returns nil' do + expect(project.migrate_to_hashed_storage!).to be_nil + end + + it 'does not flag as readonly' do + expect { project.migrate_to_hashed_storage! }.not_to change { project.repository_read_only } + end + end + end + + describe '#gl_repository' do + let(:project) { create(:project) } + + it 'delegates to Gitlab::GlRepository.gl_repository' do + expect(Gitlab::GlRepository).to receive(:gl_repository).with(project, true) + + project.gl_repository(is_wiki: true) + end end describe '#has_ci?' do diff --git a/spec/services/geo/repository_renamed_event_store_spec.rb b/spec/services/geo/repository_renamed_event_store_spec.rb index 7513d26cef0bf2c5e2e412a1d1275968c4a9c506..c0f54231385980acefbbce5e9144bb2d0510eede 100644 --- a/spec/services/geo/repository_renamed_event_store_spec.rb +++ b/spec/services/geo/repository_renamed_event_store_spec.rb @@ -31,9 +31,9 @@ expect(event.repository_storage_name).to eq(project.repository_storage) expect(event.repository_storage_path).to eq(project.repository_storage_path) expect(event.old_path_with_namespace).to eq(old_path_with_namespace) - expect(event.new_path_with_namespace).to eq(project.full_path) + expect(event.new_path_with_namespace).to eq(project.disk_path) expect(event.old_wiki_path_with_namespace).to eq("#{old_path_with_namespace}.wiki") - expect(event.new_wiki_path_with_namespace).to eq("#{project.full_path}.wiki") + expect(event.new_wiki_path_with_namespace).to eq("#{project.disk_path}.wiki") expect(event.old_path).to eq(old_path) expect(event.new_path).to eq(project.path) end diff --git a/spec/services/projects/hashed_storage_migration_service_spec.rb b/spec/services/projects/hashed_storage_migration_service_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..1b61207b5502a343097780d4ded6a87965195b12 --- /dev/null +++ b/spec/services/projects/hashed_storage_migration_service_spec.rb @@ -0,0 +1,74 @@ +require 'spec_helper' + +describe Projects::HashedStorageMigrationService do + let(:gitlab_shell) { Gitlab::Shell.new } + let(:project) { create(:project, :empty_repo, :wiki_repo) } + let(:service) { described_class.new(project) } + let(:legacy_storage) { Storage::LegacyProject.new(project) } + let(:hashed_storage) { Storage::HashedProject.new(project) } + + describe '#execute' do + before do + allow(service).to receive(:gitlab_shell) { gitlab_shell } + end + + context 'when succeeds' do + it 'renames project and wiki repositories' do + service.execute + + expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_truthy + expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_truthy + end + + it 'updates project to be hashed and not readonly' do + service.execute + + expect(project.hashed_storage?).to be_truthy + expect(project.repository_read_only).to be_falsey + end + + it 'move operation is called for both repositories' do + expect_move_repository(project.disk_path, hashed_storage.disk_path) + expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki") + + service.execute + end + end + + context 'when one move fails' do + it 'rollsback repositories to original name' do + from_name = project.disk_path + to_name = hashed_storage.disk_path + allow(service).to receive(:move_repository).and_call_original + allow(service).to receive(:move_repository).with(from_name, to_name).once { false } # will disable first move only + + expect(service).to receive(:rollback_folder_move).and_call_original + + service.execute + + expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.git")).to be_falsey + expect(gitlab_shell.exists?(project.repository_storage_path, "#{hashed_storage.disk_path}.wiki.git")).to be_falsey + end + + context 'when rollback fails' do + before do + from_name = legacy_storage.disk_path + to_name = hashed_storage.disk_path + + hashed_storage.ensure_storage_path_exists + gitlab_shell.mv_repository(project.repository_storage_path, from_name, to_name) + end + + it 'does not try to move nil repository over hashed' do + expect_move_repository("#{project.disk_path}.wiki", "#{hashed_storage.disk_path}.wiki") + + service.execute + end + end + end + + def expect_move_repository(from_name, to_name) + expect(gitlab_shell).to receive(:mv_repository).with(project.repository_storage_path, from_name, to_name).and_call_original + end + end +end diff --git a/spec/tasks/gitlab/storage_rake_spec.rb b/spec/tasks/gitlab/storage_rake_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..f59792c3d367cc68075c5cefdacd462e086a90c0 --- /dev/null +++ b/spec/tasks/gitlab/storage_rake_spec.rb @@ -0,0 +1,52 @@ +require 'rake_helper' + +describe 'gitlab:storage rake tasks' do + before do + Rake.application.rake_require 'tasks/gitlab/storage' + + stub_warn_user_is_not_gitlab + end + + describe 'migrate_to_hashed rake task' do + context '0 legacy projects' do + it 'does nothing' do + expect(StorageMigratorWorker).not_to receive(:perform_async) + + run_rake_task('gitlab:storage:migrate_to_hashed') + end + end + + context '5 legacy projects' do + let(:projects) { create_list(:project, 5, storage_version: 0) } + + context 'in batches of 1' do + before do + stub_env('BATCH' => 1) + end + + it 'enqueues one StorageMigratorWorker per project' do + projects.each do |project| + expect(StorageMigratorWorker).to receive(:perform_async).with(project.id, project.id) + end + + run_rake_task('gitlab:storage:migrate_to_hashed') + end + end + + context 'in batches of 2' do + before do + stub_env('BATCH' => 2) + end + + it 'enqueues one StorageMigratorWorker per 2 projects' do + projects.map(&:id).sort.each_slice(2) do |first, last| + last ||= first + expect(StorageMigratorWorker).to receive(:perform_async).with(first, last) + end + + run_rake_task('gitlab:storage:migrate_to_hashed') + end + end + end + end +end diff --git a/spec/workers/project_migrate_hashed_storage_worker_spec.rb b/spec/workers/project_migrate_hashed_storage_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..f5226dee0addb5f04ca045617d55f0bae5b6cfd0 --- /dev/null +++ b/spec/workers/project_migrate_hashed_storage_worker_spec.rb @@ -0,0 +1,29 @@ +require 'spec_helper' + +describe ProjectMigrateHashedStorageWorker do + describe '#perform' do + let(:project) { create(:project, :empty_repo) } + let(:pending_delete_project) { create(:project, :empty_repo, pending_delete: true) } + + it 'skips when project no longer exists' do + nonexistent_id = 999999999999 + + expect(::Projects::HashedStorageMigrationService).not_to receive(:new) + subject.perform(nonexistent_id) + end + + it 'skips when project is pending delete' do + expect(::Projects::HashedStorageMigrationService).not_to receive(:new) + + subject.perform(pending_delete_project.id) + end + + it 'delegates removal to service class' do + service = double('service') + expect(::Projects::HashedStorageMigrationService).to receive(:new).with(project, subject.logger).and_return(service) + expect(service).to receive(:execute) + + subject.perform(project.id) + end + end +end diff --git a/spec/workers/storage_migrator_worker_spec.rb b/spec/workers/storage_migrator_worker_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8619ff2f7daee4d9764434555698f16fc2708205 --- /dev/null +++ b/spec/workers/storage_migrator_worker_spec.rb @@ -0,0 +1,30 @@ +require 'spec_helper' + +describe StorageMigratorWorker do + subject(:worker) { described_class.new } + let(:projects) { create_list(:project, 2) } + + describe '#perform' do + let(:ids) { projects.map(&:id) } + + it 'enqueue jobs to ProjectMigrateHashedStorageWorker' do + expect(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice + + worker.perform(ids.min, ids.max) + end + + it 'sets projects as read only' do + allow(ProjectMigrateHashedStorageWorker).to receive(:perform_async).twice + worker.perform(ids.min, ids.max) + + projects.each do |project| + expect(project.reload.repository_read_only?).to be_truthy + end + end + + it 'rescues and log exceptions' do + allow_any_instance_of(Project).to receive(:migrate_to_hashed_storage!).and_raise(StandardError) + expect { worker.perform(ids.min, ids.max) }.not_to raise_error + end + end +end