Skip to content
Snippets Groups Projects
Commit 74b3870a authored by Michael Kozono's avatar Michael Kozono
Browse files

Address Rubocop offenses

parent dd4b35f8
No related branches found
No related tags found
No related merge requests found
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class PopulateUntrackedUploads
class UntrackedFile < ActiveRecord::Base
# This class processes a batch of rows in `untracked_files_for_uploads` by
# adding each file to the `uploads` table if it does not exist.
class PopulateUntrackedUploads # rubocop:disable Metrics/ClassLength
# This class is responsible for producing the attributes necessary to
# track an uploaded file in the `uploads` table.
class UntrackedFile < ActiveRecord::Base # rubocop:disable Metrics/ClassLength, Metrics/LineLength
self.table_name = 'untracked_files_for_uploads'
 
# Ends with /:random_hex/:filename
FILE_UPLOADER_PATH_PATTERN = %r{/\h+/[^/]+\z}
FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN = %r{\A(.+)#{FILE_UPLOADER_PATH_PATTERN}}
FILE_UPLOADER_PATH = %r{/\h+/[^/]+\z}
FULL_PATH_CAPTURE = %r{\A(.+)#{FILE_UPLOADER_PATH}}
 
# These regex patterns are tested against a relative path, relative to
# the upload directory.
Loading
Loading
@@ -44,7 +50,7 @@ module Gitlab
model_type: 'Project'
},
{
pattern: FILE_UPLOADER_PATH_PATTERN,
pattern: FILE_UPLOADER_PATH,
uploader: 'FileUploader',
model_type: 'Project'
}
Loading
Loading
@@ -63,13 +69,14 @@ module Gitlab
 
def upload_path
# UntrackedFile#path is absolute, but Upload#path depends on uploader
@upload_path ||= if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH_PATTERN)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
@upload_path ||=
if uploader == 'FileUploader'
# Path relative to project directory in uploads
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_PATH)
matchd[0].sub(%r{\A/}, '') # remove leading slash
else
path
end
end
 
def uploader
Loading
Loading
@@ -83,7 +90,8 @@ module Gitlab
def model_id
return @model_id if defined?(@model_id)
 
matchd = path_relative_to_upload_dir.match(matching_pattern_map[:pattern])
pattern = matching_pattern_map[:pattern]
matchd = path_relative_to_upload_dir.match(pattern)
 
# If something is captured (matchd[1] is not nil), it is a model_id
# Only the FileUploader pattern will not match an ID
Loading
Loading
@@ -105,14 +113,20 @@ module Gitlab
path_relative_to_upload_dir.match(path_pattern_map[:pattern])
end
 
raise "Unknown upload path pattern \"#{path}\"" unless @matching_pattern_map
unless @matching_pattern_map
raise "Unknown upload path pattern \"#{path}\""
end
 
@matching_pattern_map
end
 
def file_uploader_model_id
matchd = path_relative_to_upload_dir.match(FILE_UPLOADER_CAPTURE_FULL_PATH_PATTERN)
raise "Could not capture project full_path from a FileUploader path: \"#{path_relative_to_upload_dir}\"" unless matchd
matchd = path_relative_to_upload_dir.match(FULL_PATH_CAPTURE)
not_found_msg = <<~MSG
Could not capture project full_path from a FileUploader path:
"#{path_relative_to_upload_dir}"
MSG
raise not_found_msg unless matchd
 
full_path = matchd[1]
project = Project.find_by_full_path(full_path)
Loading
Loading
@@ -123,7 +137,8 @@ module Gitlab
 
# Not including a leading slash
def path_relative_to_upload_dir
base = %r{\A#{Regexp.escape(Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR)}/}
upload_dir = Gitlab::BackgroundMigration::PrepareUntrackedUploads::RELATIVE_UPLOAD_DIR # rubocop:disable Metrics/LineLength
base = %r{\A#{Regexp.escape(upload_dir)}/}
@path_relative_to_upload_dir ||= path.sub(base, '')
end
 
Loading
Loading
@@ -132,6 +147,7 @@ module Gitlab
end
end
 
# This class is used to query the `uploads` table.
class Upload < ActiveRecord::Base
self.table_name = 'uploads'
end
Loading
Loading
@@ -192,8 +208,10 @@ module Gitlab
end
 
ids.each do |model_type, model_ids|
found_ids = Object.const_get(model_type).where(id: model_ids.uniq).pluck(:id)
ids[model_type] = ids[model_type] - found_ids # replace with deleted ids
model_class = Object.const_get(model_type)
found_ids = model_class.where(id: model_ids.uniq).pluck(:id)
deleted_ids = ids[model_type] - found_ids
ids[model_type] = deleted_ids
end
 
ids
Loading
Loading
@@ -204,11 +222,15 @@ module Gitlab
file.to_h.merge(created_at: 'NOW()')
end
 
Gitlab::Database.bulk_insert('uploads', rows, disable_quote: :created_at)
Gitlab::Database.bulk_insert('uploads',
rows,
disable_quote: :created_at)
end
 
def drop_temp_table_if_finished
UntrackedFile.connection.drop_table(:untracked_files_for_uploads) if UntrackedFile.all.empty?
if UntrackedFile.all.empty?
UntrackedFile.connection.drop_table(:untracked_files_for_uploads)
end
end
end
end
Loading
Loading
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
class PrepareUntrackedUploads
# This class finds all non-hashed uploaded file paths and saves them to a
# `untracked_files_for_uploads` table.
class PrepareUntrackedUploads # rubocop:disable Metrics/ClassLength
# For bulk_queue_background_migration_jobs_by_range
include Database::MigrationHelpers
 
FILE_PATH_BATCH_SIZE = 500
FIND_BATCH_SIZE = 500
RELATIVE_UPLOAD_DIR = "uploads".freeze
ABSOLUTE_UPLOAD_DIR = "#{CarrierWave.root}/#{RELATIVE_UPLOAD_DIR}".freeze
FOLLOW_UP_MIGRATION = 'PopulateUntrackedUploads'.freeze
Loading
Loading
@@ -12,6 +16,8 @@ module Gitlab
EXCLUDED_HASHED_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/@hashed/*".freeze
EXCLUDED_TMP_UPLOADS_PATH = "#{ABSOLUTE_UPLOAD_DIR}/tmp/*".freeze
 
# This class is used to iterate over batches of
# `untracked_files_for_uploads` rows.
class UntrackedFile < ActiveRecord::Base
include EachBatch
 
Loading
Loading
@@ -39,8 +45,9 @@ module Gitlab
private
 
def ensure_temporary_tracking_table_exists
unless UntrackedFile.connection.table_exists?(:untracked_files_for_uploads)
UntrackedFile.connection.create_table :untracked_files_for_uploads do |t|
table_name = :untracked_files_for_uploads
unless UntrackedFile.connection.table_exists?(table_name)
UntrackedFile.connection.create_table table_name do |t|
t.string :path, limit: 600, null: false
t.index :path, unique: true
end
Loading
Loading
@@ -54,7 +61,7 @@ module Gitlab
def store_untracked_file_paths
return unless Dir.exist?(ABSOLUTE_UPLOAD_DIR)
 
each_file_batch(ABSOLUTE_UPLOAD_DIR, FILE_PATH_BATCH_SIZE) do |file_paths|
each_file_batch(ABSOLUTE_UPLOAD_DIR, FIND_BATCH_SIZE) do |file_paths|
insert_file_paths(file_paths)
end
end
Loading
Loading
@@ -85,12 +92,17 @@ module Gitlab
end
 
def build_find_command(search_dir)
cmd = %W[find #{search_dir} -type f ! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune ) ! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune ) -print0]
cmd = %W[find #{search_dir}
-type f
! ( -path #{EXCLUDED_HASHED_UPLOADS_PATH} -prune )
! ( -path #{EXCLUDED_TMP_UPLOADS_PATH} -prune )
-print0]
 
ionice = which_ionice
cmd = %W[#{ionice} -c Idle] + cmd if ionice
 
Rails.logger.info "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\""
log_msg = "PrepareUntrackedUploads find command: \"#{cmd.join(' ')}\""
Rails.logger.info log_msg
 
cmd
end
Loading
Loading
@@ -98,25 +110,32 @@ module Gitlab
def which_ionice
Gitlab::Utils.which('ionice')
rescue StandardError
# In this case, returning false is relatively safe, even though it isn't very nice
# In this case, returning false is relatively safe,
# even though it isn't very nice
false
end
 
def insert_file_paths(file_paths)
sql = if postgresql_pre_9_5?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
elsif postgresql?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)} ON CONFLICT DO NOTHING;"
else # MySQL
"INSERT IGNORE INTO #{table_columns_and_values_for_insert(file_paths)};"
end
sql = insert_sql(file_paths)
 
ActiveRecord::Base.connection.execute(sql)
end
 
def insert_sql(file_paths)
if postgresql_pre_9_5?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)};"
elsif postgresql?
"INSERT INTO #{table_columns_and_values_for_insert(file_paths)}"\
" ON CONFLICT DO NOTHING;"
else # MySQL
"INSERT IGNORE INTO"\
" #{table_columns_and_values_for_insert(file_paths)};"
end
end
def table_columns_and_values_for_insert(file_paths)
values = file_paths.map do |file_path|
ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend
ActiveRecord::Base.send(:sanitize_sql_array, ['(?)', file_path]) # rubocop:disable GitlabSecurity/PublicSend, Metrics/LineLength
end.join(', ')
 
"#{UntrackedFile.table_name} (path) VALUES #{values}"
Loading
Loading
@@ -131,11 +150,13 @@ module Gitlab
end
 
def postgresql_pre_9_5?
@postgresql_pre_9_5 ||= postgresql? && Gitlab::Database.version.to_f < 9.5
@postgresql_pre_9_5 ||= postgresql? &&
Gitlab::Database.version.to_f < 9.5
end
 
def schedule_populate_untracked_uploads_jobs
bulk_queue_background_migration_jobs_by_range(UntrackedFile, FOLLOW_UP_MIGRATION)
bulk_queue_background_migration_jobs_by_range(
UntrackedFile, FOLLOW_UP_MIGRATION)
end
end
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment