Skip to content
Snippets Groups Projects
Commit 908aacdd authored by Michael Kozono's avatar Michael Kozono
Browse files

Filter existing uploads with one query

parent 7549d17f
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -50,37 +50,25 @@ module Gitlab
}
].freeze
 
def ensure_tracked!
add_to_uploads_if_needed
delete
end
def add_to_uploads_if_needed
# Even though we are checking relative paths, path is enough to
# uniquely identify uploads. There is no ambiguity between
# FileUploader paths and other Uploader paths because we use the /-/
# separator kind of like an escape character. Project full_path will
# never conflict with an upload path starting with "uploads/-/".
Upload.
where(path: upload_path).
first_or_create!(
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size
)
def to_h
{
path: upload_path,
uploader: uploader,
model_type: model_type,
model_id: model_id,
size: file_size
}
end
 
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
@upload_path ||= if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
 
def uploader
Loading
Loading
@@ -187,17 +175,8 @@ module Gitlab
return unless migrate?
 
files = UntrackedFile.where(id: start_id..end_id)
files.each do |untracked_file|
begin
untracked_file.ensure_tracked!
rescue StandardError => e
Rails.logger.warn "Failed to add untracked file to uploads: #{e.message}"
# The untracked rows will remain in the DB. We will be able to see
# which ones failed to become tracked, and then we can decide what
# to do.
end
end
insert_uploads_if_needed(files)
files.delete_all
 
drop_temp_table_if_finished
end
Loading
Loading
@@ -208,6 +187,31 @@ module Gitlab
UntrackedFile.table_exists? && Upload.table_exists?
end
 
def insert_uploads_if_needed(files)
filtered_files = filter_existing_uploads(files)
filtered_files = filter_deleted_models(filtered_files)
insert(filtered_files)
end
def filter_existing_uploads(files)
paths = files.map(&:upload_path)
existing_paths = Upload.where(path: paths).pluck(:path).to_set
files.reject do |file|
existing_paths.include?(file.upload_path)
end
end
def filter_deleted_models(files)
files # TODO
end
def insert(files)
files.each do |file|
Upload.create!(file.to_h)
end
end
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
end
Loading
Loading
Loading
Loading
@@ -126,50 +126,11 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads, :migration, :sid
end.not_to change { uploads.count }.from(0)
end
end
end
describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
include TrackUntrackedUploadsHelpers
let(:upload_class) { Gitlab::BackgroundMigration::PopulateUntrackedUploads::Upload }
before(:all) do
ensure_temporary_tracking_table_exists
end
after(:all) do
drop_temp_table_if_exists
end
 
describe '#ensure_tracked!' do
let!(:user1) { create(:user, :with_avatar) }
let!(:untracked_file) { described_class.create!(path: user1.uploads.first.path) }
context 'when the file is already in the uploads table' do
it 'does not add an upload' do
expect do
untracked_file.ensure_tracked!
end.not_to change { upload_class.count }.from(1)
end
end
context 'when the file is not already in the uploads table' do
before do
user1.uploads.delete_all
end
it 'adds an upload' do
expect do
untracked_file.ensure_tracked!
end.to change { upload_class.count }.from(0).to(1)
end
end
end
describe '#add_to_uploads_if_needed' do
shared_examples_for 'add_to_uploads_non_markdown_files' do
describe 'upload outcomes for each path pattern' do
shared_examples_for 'non_markdown_file' do
let!(:expected_upload_attrs) { model.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum') }
let!(:untracked_file) { described_class.create!(path: expected_upload_attrs['path']) }
let!(:untracked_file) { untracked_files_for_uploads.create!(path: expected_upload_attrs['path']) }
 
before do
model.uploads.delete_all
Loading
Loading
@@ -177,7 +138,7 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
 
it 'creates an Upload record' do
expect do
untracked_file.add_to_uploads_if_needed
subject.perform(1, 1000)
end.to change { model.reload.uploads.count }.from(0).to(1)
 
expect(model.uploads.first.attributes).to include(expected_upload_attrs)
Loading
Loading
@@ -187,13 +148,13 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
context 'for an appearance logo file path' do
let(:model) { create(:appearance, logo: uploaded_file) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for an appearance header_logo file path' do
let(:model) { create(:appearance, header_logo: uploaded_file) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for a pre-Markdown Note attachment file path' do
Loading
Loading
@@ -203,39 +164,36 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
 
let(:model) { create(:note, :with_attachment) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for a user avatar file path' do
let(:model) { create(:user, :with_avatar) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for a group avatar file path' do
let(:model) { create(:group, :with_avatar) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for a project avatar file path' do
let(:model) { create(:project, :with_avatar) }
 
it_behaves_like 'add_to_uploads_non_markdown_files'
it_behaves_like 'non_markdown_file'
end
 
context 'for a project Markdown attachment (notes, issues, MR descriptions) file path' do
let(:model) { create(:project) }
 
# UntrackedFile.path is different than Upload.path
let(:untracked_file) { create_untracked_file("/#{model.full_path}/#{model.uploads.first.path}") }
before do
# Upload the file
UploadService.new(model, uploaded_file, FileUploader).execute
 
# Create the untracked_files_for_uploads record
untracked_file
untracked_files_for_uploads.create!(path: "#{Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR}/#{model.full_path}/#{model.uploads.first.path}")
 
# Save the expected upload attributes
@expected_upload_attrs = model.reload.uploads.first.attributes.slice('path', 'uploader', 'size', 'checksum')
Loading
Loading
@@ -246,13 +204,27 @@ describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
 
it 'creates an Upload record' do
expect do
untracked_file.add_to_uploads_if_needed
subject.perform(1, 1000)
end.to change { model.reload.uploads.count }.from(0).to(1)
 
expect(model.uploads.first.attributes).to include(@expected_upload_attrs)
end
end
end
end
describe Gitlab::BackgroundMigration::PopulateUntrackedUploads::UntrackedFile do
include TrackUntrackedUploadsHelpers
let(:upload_class) { Gitlab::BackgroundMigration::PopulateUntrackedUploads::Upload }
before(:all) do
ensure_temporary_tracking_table_exists
end
after(:all) do
drop_temp_table_if_exists
end
 
describe '#upload_path' do
def assert_upload_path(file_path, expected_upload_path)
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment