Skip to content
Snippets Groups Projects
Commit 239fdc78 authored by Jan Provaznik's avatar Jan Provaznik
Browse files

Use FastDestroy for deleting uploads

It gathers list of file paths to delete before destroying
the parent object. Then after the parent_object is destroyed
these paths are scheduled for deletion asynchronously.

Carrierwave needed associated model for deleting upload file.
To avoid this requirement, simple Fog/File layer is used directly
for file deletion, this allows us to use just a simple list of paths.
parent c3bbad76
No related branches found
No related tags found
No related merge requests found
Showing
with 322 additions and 22 deletions
Loading
Loading
@@ -70,6 +70,7 @@ module FastDestroyAll
 
module Helpers
extend ActiveSupport::Concern
include AfterCommitQueue
 
class_methods do
##
Loading
Loading
Loading
Loading
@@ -17,6 +17,7 @@
 
module WithUploads
extend ActiveSupport::Concern
include FastDestroyAll::Helpers
 
# Currently there is no simple way how to select only not-mounted
# uploads, it should be all FileUploaders so we select them by
Loading
Loading
@@ -25,18 +26,9 @@ module WithUploads
 
included do
has_many :uploads, as: :model
has_many :file_uploads, -> { where(uploader: FILE_UPLOADERS) }, class_name: 'Upload', as: :model, dependent: :delete_all # rubocop:disable Cop/ActiveRecordDependent
 
before_destroy :destroy_file_uploads
end
# mounted uploads are deleted in carrierwave's after_commit hook,
# but FileUploaders which are not mounted must be deleted explicitly and
# it can not be done in after_commit because FileUploader requires loads
# associated model on destroy (which is already deleted in after_commit)
def destroy_file_uploads
self.uploads.where(uploader: FILE_UPLOADERS).find_each do |upload|
upload.destroy
end
use_fast_destroy :file_uploads
end
 
def retrieve_upload(_identifier, paths)
Loading
Loading
Loading
Loading
@@ -25,6 +25,25 @@ class Upload < ActiveRecord::Base
Digest::SHA256.file(path).hexdigest
end
 
class << self
##
# FastDestroyAll concerns
def begin_fast_destroy
{
Uploads::Local => Uploads::Local.new.keys(with_files_stored_locally),
Uploads::Fog => Uploads::Fog.new.keys(with_files_stored_remotely)
}
end
##
# FastDestroyAll concerns
def finalize_fast_destroy(keys)
keys.each do |store_class, paths|
store_class.new.delete_keys_async(paths)
end
end
end
def absolute_path
raise ObjectStorage::RemoteStoreError, "Remote object has no absolute path." unless local?
return path unless relative_path?
Loading
Loading
# frozen_string_literal: true
module Uploads
class Base
BATCH_SIZE = 100
attr_reader :logger
def initialize(logger: nil)
@logger ||= Rails.logger
end
def delete_keys_async(keys_to_delete)
keys_to_delete.each_slice(BATCH_SIZE) do |batch|
DeleteStoredFilesWorker.perform_async(self.class, batch)
end
end
end
end
# frozen_string_literal: true
module Uploads
class Fog < Base
include ::Gitlab::Utils::StrongMemoize
def available?
object_store.enabled
end
def keys(relation)
return [] unless available?
relation.pluck(:path)
end
def delete_keys(keys)
keys.each do |key|
connection.delete_object(bucket_name, key)
end
end
private
def object_store
Gitlab.config.uploads.object_store
end
def bucket_name
return unless available?
object_store.remote_directory
end
def connection
return unless available?
strong_memoize(:connection) do
::Fog::Storage.new(object_store.connection.to_hash.deep_symbolize_keys)
end
end
end
end
# frozen_string_literal: true
module Uploads
class Local < Base
def keys(relation)
relation.includes(:model).find_each.map {|u| u.absolute_path }
end
def delete_keys(keys)
keys.each do |path|
delete_file(path)
end
end
private
def delete_file(path)
unless exists?(path)
logger.warn("File '#{path}' doesn't exist, skipping")
return
end
unless in_uploads?(path)
message = "Path '#{path}' is not in uploads dir, skipping"
logger.warn(message)
Gitlab::Sentry.track_exception(RuntimeError.new(message), extra: { uploads_dir: storage_dir })
return
end
FileUtils.rm(path)
delete_dir!(File.dirname(path))
end
def exists?(path)
path.present? && File.exist?(path)
end
def in_uploads?(path)
path.start_with?(storage_dir)
end
def delete_dir!(path)
Dir.rmdir(path)
rescue Errno::ENOENT
# Ignore: path does not exist
rescue Errno::ENOTDIR
# Ignore: path is not a dir
rescue Errno::ENOTEMPTY, Errno::EEXIST
# Ignore: dir is not empty
end
def storage_dir
@storage_dir ||= File.realpath(Gitlab.config.uploads.storage_path)
end
end
end
Loading
Loading
@@ -134,3 +134,4 @@
- delete_diff_files
- detect_repository_languages
- repository_cleanup
- delete_stored_files
# frozen_string_literal: true
class DeleteStoredFilesWorker
include ApplicationWorker
def perform(class_name, keys)
klass = begin
class_name.constantize
rescue NameError
nil
end
unless klass
message = "Unknown class '#{class_name}'"
logger.error(message)
Gitlab::Sentry.track_exception(RuntimeError.new(message))
return
end
klass.new(logger: logger).delete_keys(keys)
end
end
---
title: Delete file uploads asynchronously when deleting a project or other resources.
merge_request: 20977
author:
type: added
Loading
Loading
@@ -82,3 +82,4 @@
- [detect_repository_languages, 1]
- [auto_devops, 2]
- [repository_cleanup, 1]
- [delete_stored_files, 1]
Loading
Loading
@@ -287,6 +287,7 @@ project:
- statistics
- container_repositories
- uploads
- file_uploads
- import_state
- members_and_requesters
- build_trace_section_names
Loading
Loading
Loading
Loading
@@ -20,7 +20,7 @@ describe Appearance do
end
 
context 'with uploads' do
it_behaves_like 'model with mounted uploader', false do
it_behaves_like 'model with uploads', false do
let(:model_object) { create(:appearance, :with_logo) }
let(:upload_attribute) { :logo }
let(:uploader_class) { AttachmentUploader }
Loading
Loading
Loading
Loading
@@ -739,7 +739,7 @@ describe Group do
end
 
context 'with uploads' do
it_behaves_like 'model with mounted uploader', true do
it_behaves_like 'model with uploads', true do
let(:model_object) { create(:group, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
Loading
Loading
Loading
Loading
@@ -3898,7 +3898,7 @@ describe Project do
end
 
context 'with uploads' do
it_behaves_like 'model with mounted uploader', true do
it_behaves_like 'model with uploads', true do
let(:model_object) { create(:project, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
Loading
Loading
# frozen_string_literal: true
require 'spec_helper'
describe Uploads::Fog do
let(:data_store) { described_class.new }
before do
stub_uploads_object_storage(FileUploader)
end
describe '#available?' do
subject { data_store.available? }
context 'when object storage is enabled' do
it { is_expected.to be_truthy }
end
context 'when object storage is disabled' do
before do
stub_uploads_object_storage(FileUploader, enabled: false)
end
it { is_expected.to be_falsy }
end
end
context 'model with uploads' do
let(:project) { create(:project) }
let(:relation) { project.uploads }
describe '#keys' do
let!(:uploads) { create_list(:upload, 2, :object_storage, uploader: FileUploader, model: project) }
subject { data_store.keys(relation) }
it 'returns keys' do
is_expected.to match_array(relation.pluck(:path))
end
end
describe '#delete_keys' do
let(:keys) { data_store.keys(relation) }
let!(:uploads) { create_list(:upload, 2, :with_file, :issuable_upload, model: project) }
subject { data_store.delete_keys(keys) }
before do
uploads.each { |upload| upload.build_uploader.migrate!(2) }
end
it 'deletes multiple data' do
paths = relation.pluck(:path)
::Fog::Storage.new(FileUploader.object_store_credentials).tap do |connection|
paths.each do |path|
expect(connection.get_object('uploads', path)[:body]).not_to be_nil
end
end
subject
::Fog::Storage.new(FileUploader.object_store_credentials).tap do |connection|
paths.each do |path|
expect { connection.get_object('uploads', path)[:body] }.to raise_error(Excon::Error::NotFound)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Uploads::Local do
let(:data_store) { described_class.new }
before do
stub_uploads_object_storage(FileUploader)
end
context 'model with uploads' do
let(:project) { create(:project) }
let(:relation) { project.uploads }
describe '#keys' do
let!(:uploads) { create_list(:upload, 2, uploader: FileUploader, model: project) }
subject { data_store.keys(relation) }
it 'returns keys' do
is_expected.to match_array(relation.map(&:absolute_path))
end
end
describe '#delete_keys' do
let(:keys) { data_store.keys(relation) }
let!(:uploads) { create_list(:upload, 2, :with_file, :issuable_upload, model: project) }
subject { data_store.delete_keys(keys) }
it 'deletes multiple data' do
paths = relation.map(&:absolute_path)
paths.each do |path|
expect(File.exist?(path)).to be_truthy
end
subject
paths.each do |path|
expect(File.exist?(path)).to be_falsey
end
end
end
end
end
Loading
Loading
@@ -3231,7 +3231,7 @@ describe User do
end
 
context 'with uploads' do
it_behaves_like 'model with mounted uploader', false do
it_behaves_like 'model with uploads', false do
let(:model_object) { create(:user, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
Loading
Loading
require 'spec_helper'
 
shared_examples_for 'model with mounted uploader' do |supports_fileuploads|
shared_examples_for 'model with uploads' do |supports_fileuploads|
describe '.destroy' do
before do
stub_uploads_object_storage(uploader_class)
Loading
Loading
@@ -8,16 +8,42 @@ shared_examples_for 'model with mounted uploader' do |supports_fileuploads|
model_object.public_send(upload_attribute).migrate!(ObjectStorage::Store::REMOTE)
end
 
it 'deletes remote uploads' do
expect_any_instance_of(CarrierWave::Storage::Fog::File).to receive(:delete).and_call_original
context 'with mounted uploader' do
it 'deletes remote uploads' do
expect_any_instance_of(CarrierWave::Storage::Fog::File).to receive(:delete).and_call_original
 
expect { model_object.destroy }.to change { Upload.count }.by(-1)
expect { model_object.destroy }.to change { Upload.count }.by(-1)
end
end
 
it 'deletes any FileUploader uploads which are not mounted', skip: !supports_fileuploads do
create(:upload, uploader: FileUploader, model: model_object)
context 'with not mounted uploads', :sidekiq, skip: !supports_fileuploads do
context 'with local files' do
let!(:uploads) { create_list(:upload, 2, uploader: FileUploader, model: model_object) }
 
expect { model_object.destroy }.to change { Upload.count }.by(-2)
it 'deletes any FileUploader uploads which are not mounted' do
expect { model_object.destroy }.to change { Upload.count }.by(-3)
end
it 'deletes local files' do
expect_any_instance_of(Uploads::Local).to receive(:delete_keys).with(uploads.map(&:absolute_path))
model_object.destroy
end
end
context 'with remote files' do
let!(:uploads) { create_list(:upload, 2, :object_storage, uploader: FileUploader, model: model_object) }
it 'deletes any FileUploader uploads which are not mounted' do
expect { model_object.destroy }.to change { Upload.count }.by(-3)
end
it 'deletes remote files' do
expect_any_instance_of(Uploads::Fog).to receive(:delete_keys).with(uploads.map(&:path))
model_object.destroy
end
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment