Skip to content
Snippets Groups Projects
Unverified Commit 3aedccb1 authored by Zeger-Jan van de Weg's avatar Zeger-Jan van de Weg
Browse files

Port cleanup tasks to use Gitaly

Rake tasks cleaning up the Git storage were still using direct disk
access, which won't work if these aren't attached. To mitigate a
migration issue was created.

To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was
required, ListDirectories. This was implemented in Gitaly, through
https://gitlab.com/gitlab-org/gitaly/merge_requests/868.

To be able to use the new RPC the Gitaly server was bumped to v0.120.

This is an RPC that will not use feature gates, as this doesn't scale on
.com so there is no way to test it at scale. Futhermore, we _know_ it
doesn't scale, but this might be a useful task for smaller instances.

Lastly, the tests are slightly updated to also work when the disk isn't
attached. Eventhough this is not planned, it was very little effort and
thus I applied the boy scout rule.

Closes https://gitlab.com/gitlab-org/gitaly/issues/954
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
parent c380d3ac
No related branches found
No related tags found
1 merge request!10495Merge Requests - Assignee
0.119.0
0.120.0
---
title: Administrative cleanup rake tasks now leverage Gitaly
merge_request: 21588
author:
type: changed
Loading
Loading
@@ -5,6 +5,14 @@ module Gitlab
@storage = storage
end
 
# Returns all directories in the git storage directory, lexically ordered
def list_directories(depth: 1)
request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
GitalyClient.call(@storage, :storage_service, :list_directories, request)
.flat_map(&:paths)
end
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
Loading
Loading
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954
#
# frozen_string_literal: true
require 'set'
namespace :gitlab do
namespace :cleanup do
HASHED_REPOSITORY_NAME = '@hashed'.freeze
desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do
warn_user_is_not_gitlab
namespaces = Set.new(Namespace.pluck(:path))
namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
 
namespaces = Namespace.pluck(:path)
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored
Gitlab.config.repositories.storages.each do |name, repository_storage|
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
all_dirs = Dir.glob(git_base_path + '/*')
Gitaly::Server.all.each do |server|
all_dirs = Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories(depth: 0)
.reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
 
puts git_base_path.color(:yellow)
puts "Looking for directories to remove... "
all_dirs.reject! do |dir|
# skip if git repo
dir =~ /.git$/
end
all_dirs.reject! do |dir|
dir_name = File.basename dir
# skip if namespace present
namespaces.include?(dir_name)
end
all_dirs.each do |dir_path|
if remove?
if FileUtils.rm_rf dir_path
puts "Removed...#{dir_path}".color(:red)
else
puts "Cannot remove #{dir_path}".color(:red)
begin
Gitlab::GitalyClient::NamespaceService.new(server.storage)
.remove(dir_path)
puts "Removed...#{dir_path}"
rescue StandardError => e
puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end
else
puts "Can be removed: #{dir_path}".color(:red)
Loading
Loading
@@ -49,29 +38,29 @@ namespace :gitlab do
 
desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do
warn_user_is_not_gitlab
move_suffix = "+orphaned+#{Time.now.to_i}"
Gitlab.config.repositories.storages.each do |name, repository_storage|
repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path }
# Look for global repos (legacy, depth 1) and normal repos (depth 2)
IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find|
find.each_line do |path|
path.chomp!
repo_with_namespace = path
.sub(repo_root, '')
.sub(%r{^/*}, '')
.chomp('.git')
.chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
File.rename(path, new_path)
Gitaly::Server.all.each do |server|
Gitlab::GitalyClient::StorageService
.new(server.storage)
.list_directories
.each do |path|
repo_with_namespace = path.chomp('.git').chomp('.wiki')
# TODO ignoring hashed repositories for now. But revisit to fully support
# possible orphaned hashed repos
next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
next if Project.find_by_full_path(repo_with_namespace)
new_path = path + move_suffix
puts path.inspect + ' -> ' + new_path.inspect
begin
Gitlab::GitalyClient::NamespaceService
.new(server.storage)
.rename(path, new_path)
rescue StandardError => e
puts "Error occured while moving the repository: #{e.message}".color(:red)
end
end
end
Loading
Loading
Loading
Loading
@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end
 
describe 'cleanup namespaces and repos' do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:storage) { storages.keys.first }
let(:storages) do
{
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
Loading
Loading
@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end
 
before do
FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end
 
after do
FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage'))
Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end
 
describe 'cleanup:repos' do
before do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git'))
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, 'broken/project.git')
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end
 
it 'moves it to an orphaned path' do
run_rake_task('gitlab:cleanup:repos')
repo_list = Dir['tmp/tests/default_storage/broken/*']
now = Time.now
Timecop.freeze(now) do
run_rake_task('gitlab:cleanup:repos')
repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
 
expect(repo_list.first).to include('+orphaned+')
expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
end
end
 
it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos')
 
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
 
describe 'cleanup:dirs' do
it 'removes missing namespaces' do
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git"))
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git"))
allow(Namespace).to receive(:pluck).and_return('namespace_1')
gitlab_shell.add_namespace(storage, "namespace_1/project.git")
gitlab_shell.add_namespace(storage, "namespace_2/project.git")
allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
 
stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs')
 
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey
expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end
 
it 'ignores @hashed directory' do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))
gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
 
run_rake_task('gitlab:cleanup:dirs')
 
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy
expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end
end
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment