Skip to content
Snippets Groups Projects
Commit b6ea41d1 authored by Michael Kozono's avatar Michael Kozono
Browse files

Find and store unhashed upload file paths

parent ab814e4d
No related branches found
No related tags found
No related merge requests found
module Gitlab
module BackgroundMigration
class PrepareUnhashedUploads
FILE_PATH_BATCH_SIZE = 500
UPLOAD_DIR = "#{CarrierWave.root}/uploads"
class UnhashedUploadFile < ActiveRecord::Base
self.table_name = 'unhashed_upload_files'
end
Loading
Loading
@@ -8,8 +11,8 @@ module Gitlab
def perform
return unless migrate?
 
clear_unhashed_upload_files
store_unhashed_upload_files
clear_unhashed_upload_file_paths
store_unhashed_upload_file_paths
schedule_populate_untracked_uploads_jobs
end
 
Loading
Loading
@@ -19,12 +22,55 @@ module Gitlab
UnhashedUploadFile.table_exists?
end
 
def clear_unhashed_upload_files
# TODO
def clear_unhashed_upload_file_paths
UnhashedUploadFile.delete_all
end
 
def store_unhashed_upload_files
# TODO
def store_unhashed_upload_file_paths
return unless Dir.exists?(UPLOAD_DIR)
file_paths = []
each_file_path(UPLOAD_DIR) do |file_path|
file_paths << file_path
if file_paths.size >= FILE_PATH_BATCH_SIZE
insert_file_paths(file_paths)
file_paths = []
end
end
insert_file_paths(file_paths) if file_paths.any?
end
def each_file_path(search_dir, &block)
cmd = build_find_command(search_dir)
Open3.popen2(*cmd) do |stdin, stdout, status_thread|
stdout.each_line("\0") do |line|
yield(line.chomp("\0"))
end
raise "Find command failed" unless status_thread.value.success?
end
end
def build_find_command(search_dir)
cmd = ['find', search_dir, '-type', 'f', '!', '-path', "#{UPLOAD_DIR}/@hashed/*", '!', '-path', "#{UPLOAD_DIR}/tmp/*", '-print0']
['ionice', '-c', 'Idle'] + cmd if ionice_is_available?
cmd
end
def ionice_is_available?
Gitlab::Utils.which('ionice')
rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
false
end
def insert_file_paths(file_paths)
file_paths.each do |file_path|
UnhashedUploadFile.create!(path: file_path)
end
end
 
def schedule_populate_untracked_uploads_jobs
Loading
Loading
require 'spec_helper'
describe Gitlab::BackgroundMigration::PrepareUnhashedUploads, :migration, schema: 20171103140253 do
let!(:unhashed_upload_files) { table(:unhashed_upload_files) }
let(:user1) { create(:user) }
let(:user2) { create(:user) }
let(:project1) { create(:project) }
let(:project2) { create(:project) }
let(:appearance) { create(:appearance) }
context 'when files were uploaded before and after hashed storage was enabled' do
before do
fixture = Rails.root.join('spec', 'fixtures', 'rails_sample.jpg')
uploaded_file = fixture_file_upload(fixture)
user1.update(avatar: uploaded_file)
project1.update(avatar: uploaded_file)
appearance.update(logo: uploaded_file, header_logo: uploaded_file)
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project1, uploaded_file, FileUploader).execute # Markdown upload
stub_application_setting(hashed_storage_enabled: true)
# Hashed files
uploaded_file = fixture_file_upload(fixture)
UploadService.new(project2, uploaded_file, FileUploader).execute
end
it 'adds unhashed files to the unhashed_upload_files table' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
it 'does not add hashed files to the unhashed_upload_files table' do
described_class.new.perform
hashed_file_path = project2.uploads.where(uploader: 'FileUploader').first.path
expect(unhashed_upload_files.where("path like '%#{hashed_file_path}%'").exists?).to be_falsey
end
# E.g. from a previous failed run of this background migration
context 'when there is existing data in unhashed_upload_files' do
before do
unhashed_upload_files.create(path: '/foo/bar.jpg')
end
it 'clears existing data before adding new data' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(1).to(5)
end
end
# E.g. The installation is in use at the time of migration, and someone has
# just uploaded a file
context 'when there are files in /uploads/tmp' do
before do
FileUtils.touch(Rails.root.join(described_class::UPLOAD_DIR, 'tmp', 'some_file.jpg'))
end
it 'does not add files from /uploads/tmp' do
expect do
described_class.new.perform
end.to change { unhashed_upload_files.count }.from(0).to(5)
end
end
end
# Very new or lightly-used installations that are running this migration
# may not have an upload directory because they have no uploads.
context 'when no files were ever uploaded' do
it 'does not add to the unhashed_upload_files table (and does not raise error)' do
expect do
described_class.new.perform
end.not_to change { unhashed_upload_files.count }.from(0)
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment