Skip to content
Snippets Groups Projects
Commit 6aaeb6dc authored by Jarka Kadlecova's avatar Jarka Kadlecova
Browse files

Clean orphaned files in object storage

parent ab08f998
No related branches found
No related tags found
1 merge request!10495Merge Requests - Assignee
---
title: Clean orphaned files in object storage
merge_request: 20918
author:
type: added
Loading
Loading
@@ -52,4 +52,33 @@ D, [2018-07-27T12:08:33.293568 #89817] DEBUG -- : Processing batch of 500 projec
I, [2018-07-27T12:08:33.689869 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/test.out -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/test.out
I, [2018-07-27T12:08:33.755624 #89817] INFO -- : Did fix /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/qux/foo/bar/89a0f7b0b97008a4a18cedccfdcd93fb/foo.txt
I, [2018-07-27T12:08:33.760257 #89817] INFO -- : Did move to lost and found /opt/gitlab/embedded/service/gitlab-rails/public/uploads/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png -> /opt/gitlab/embedded/service/gitlab-rails/public/uploads/-/project-lost-found/foo/bar/1dd6f0f7eefd2acc4c2233f89a0f7b0b/image.png
```
\ No newline at end of file
```
Remove object store upload files if they don't exist in GitLab database.
```
# omnibus-gitlab
sudo gitlab-rake gitlab:cleanup:remote_upload_files
# installation from source
bundle exec rake gitlab:cleanup:remote_upload_files RAILS_ENV=production
```
Example output:
```
$ sudo gitlab-rake gitlab:cleanup:remote_upload_files
I, [2018-08-02T10:26:13.995978 #45011] INFO -- : Looking for orphaned remote uploads to remove. Dry run...
I, [2018-08-02T10:26:14.120400 #45011] INFO -- : Can be moved to lost and found: @hashed/6b/DSC_6152.JPG
I, [2018-08-02T10:26:14.120482 #45011] INFO -- : Can be moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
I, [2018-08-02T10:26:14.120634 #45011] INFO -- : To cleanup these files run this command with DRY_RUN=false
```
```
$ sudo gitlab-rake gitlab:cleanup:remote_upload_files DRY_RUN=false
I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote uploads to remove...
I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG
I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
```
# frozen_string_literal: true
module Gitlab
module Cleanup
class RemoteUploads
attr_reader :logger
BATCH_SIZE = 100
def initialize(logger: nil)
@logger = logger || Rails.logger
end
def run!(dry_run: false)
unless configuration.enabled
logger.warn "Object storage not enabled. Exit".color(:yellow)
return
end
logger.info "Looking for orphaned remote uploads to remove#{'. Dry run' if dry_run}..."
each_orphan_file do |file|
info = if dry_run
"Can be moved to lost and found: #{file.key}"
else
new_path = move_to_lost_and_found(file)
"Moved to lost and found: #{file.key} -> #{new_path}"
end
logger.info(info)
end
end
private
def each_orphan_file
# we want to skip files already moved to lost_and_found directory
lost_dir_match = "^#{lost_and_found_dir}\/"
remote_directory.files.each_slice(BATCH_SIZE) do |remote_files|
remote_files.reject! { |file| file.key.match(/#{lost_dir_match}/) }
file_paths = remote_files.map(&:key)
tracked_paths = Upload
.where(store: ObjectStorage::Store::REMOTE, path: file_paths)
.pluck(:path)
remote_files.reject! { |file| tracked_paths.include?(file.key) }
remote_files.each do |file|
yield file
end
end
end
def move_to_lost_and_found(file)
new_path = "#{lost_and_found_dir}/#{file.key}"
file.copy(configuration['remote_directory'], new_path)
file.destroy
new_path
end
def lost_and_found_dir
'lost_and_found'
end
def remote_directory
connection.directories.get(configuration['remote_directory'])
end
def connection
::Fog::Storage.new(configuration['connection'].symbolize_keys)
end
def configuration
Gitlab.config.uploads.object_store
end
end
end
end
Loading
Loading
@@ -116,6 +116,16 @@ namespace :gitlab do
end
end
 
desc 'GitLab | Cleanup | Clean orphan remote upload files that do not exist in the db'
task remote_upload_files: :environment do
cleaner = Gitlab::Cleanup::RemoteUploads.new(logger: logger)
cleaner.run!(dry_run: dry_run?)
if dry_run?
logger.info "To cleanup these files run this command with DRY_RUN=false".color(:yellow)
end
end
def remove?
ENV['REMOVE'] == 'true'
end
Loading
Loading
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::RemoteUploads do
context 'when object_storage is enabled' do
let(:connection) { double }
let(:directory) { double }
let!(:uploads) do
[
create(:upload, path: 'dir/file1', store: ObjectStorage::Store::REMOTE),
create(:upload, path: 'dir/file2', store: ObjectStorage::Store::LOCAL)
]
end
let(:remote_files) do
[
double(key: 'dir/file1'),
double(key: 'dir/file2'),
double(key: 'dir/file3'),
double(key: 'lost_and_found/dir/file3')
]
end
before do
stub_uploads_object_storage(FileUploader)
expect(::Fog::Storage).to receive(:new).and_return(connection)
expect(connection).to receive(:directories).and_return(double(get: directory))
expect(directory).to receive(:files).and_return(remote_files)
end
context 'when dry_run is set to false' do
subject { described_class.new.run!(dry_run: false) }
it 'moves files that are not in uploads table' do
expect(remote_files[0]).not_to receive(:copy)
expect(remote_files[0]).not_to receive(:destroy)
expect(remote_files[1]).to receive(:copy)
expect(remote_files[1]).to receive(:destroy)
expect(remote_files[2]).to receive(:copy)
expect(remote_files[2]).to receive(:destroy)
expect(remote_files[3]).not_to receive(:copy)
expect(remote_files[3]).not_to receive(:destroy)
subject
end
end
context 'when dry_run is set to true' do
subject { described_class.new.run!(dry_run: true) }
it 'does not move filese' do
expect(remote_files[0]).not_to receive(:copy)
expect(remote_files[0]).not_to receive(:destroy)
expect(remote_files[1]).not_to receive(:copy)
expect(remote_files[1]).not_to receive(:destroy)
expect(remote_files[2]).not_to receive(:copy)
expect(remote_files[2]).not_to receive(:destroy)
expect(remote_files[3]).not_to receive(:copy)
expect(remote_files[3]).not_to receive(:destroy)
subject
end
end
end
context 'when object_storage is not enabled' do
it 'does not connect to any storage' do
expect(::Fog::Storage).not_to receive(:new)
subject
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment