Skip to content
Snippets Groups Projects
Commit 594e6a0a authored by Micael Bergeron's avatar Micael Bergeron
Browse files

Refactor the uploaders

I've demoted the ObjectStoreUploader to a concern that is mixed in
the concrete uploader classes that need to store files in a remote
object store.

I've been working on making the local -> remote migration working
first, which has been trivial compared to the remote -> local one.

The current implementation is heavily based on side-effects which
makes the code brittle and hard to reason about.

The current approach is to store the `store` field in the correct
`Upload` model once a migration has been done. To retrieve the field
I use the `has_many :uploads` relationship, with all the paths that
a certain file may have `uploads.where(path: paths).last`. This as
the drawback of adding a database query for every upload lookup, but
I feel that the generalization of this behavior is worth it. We should
be able to optimize this down the road quite easily.
parent bbcaf4ae
No related branches found
No related tags found
No related merge requests found
Showing
with 320 additions and 211 deletions
Loading
Loading
@@ -2,6 +2,7 @@ module UploadsActions
include Gitlab::Utils::StrongMemoize
 
def create
# TODO why not pass a GitlabUploader instance
link_to_file = UploadService.new(model, params[:file], uploader_class).execute
 
respond_to do |format|
Loading
Loading
@@ -17,34 +18,53 @@ module UploadsActions
end
end
 
# This should either find the @file and redirect to its URL
def show
binding.pry
return render_404 unless uploader.exists?
 
disposition = uploader.image_or_video? ? 'inline' : 'attachment'
# send to the remote URL
redirect_to uploader.url unless uploader.file_storage?
 
# or send the file
disposition = uploader.image_or_video? ? 'inline' : 'attachment'
expires_in 0.seconds, must_revalidate: true, private: true
send_file uploader.file.path, disposition: disposition
end
 
private
 
def uploader_class
uploader.class
end
def upload_mount
mounted_as = params[:mounted_as]
upload_mounts = %w(avatar attachment file logo header_logo)
mounted_as if upload_mounts.include? mounted_as
end
# TODO: this method is too complex
#
def uploader
strong_memoize(:uploader) do
return if show_model.nil?
@uploader ||= if upload_model_class < CarrierWave::Mount::Extension && upload_mount
model.public_send(upload_mount)
elsif upload_model_class == PersonalSnippet
find_upload(PersonalFileUploader)&.build_uploader || PersonalFileUploader.new(model)
else
find_upload(FileUploader)&.build_uploader || FileUploader.new(model)
end
end
 
file_uploader = FileUploader.new(show_model, params[:secret])
file_uploader.retrieve_from_store!(params[:filename])
def find_upload(uploader_class)
return nil unless params[:secret] && params[:filename]
 
file_uploader
end
upload_path = uploader_class.upload_path(params[:secret], params[:filename])
Upload.where(uploader: uploader_class.to_s, path: upload_path)&.last
end
 
def image_or_video?
uploader && uploader.exists? && uploader.image_or_video?
end
def uploader_class
FileUploader
end
end
# Used out-of-context uploads
# see #upload_model_classs
#
class UploadsController < ApplicationController
include UploadsActions
 
UnknownUploadModelError = Class.new(StandardError)
rescue_from UnknownUploadModelError, with: :render_404
skip_before_action :authenticate_user!
before_action :upload_mount_satisfied?
before_action :find_model
before_action :authorize_access!, only: [:show]
before_action :authorize_create_access!, only: [:create]
 
private
def find_model
return nil unless params[:id]
return render_404 unless upload_model && upload_mount
@model = upload_model.find(params[:id])
@model = upload_model_class.find(params[:id])
end
 
def authorize_access!
Loading
Loading
@@ -53,8 +56,8 @@ class UploadsController < ApplicationController
end
end
 
def upload_model
upload_models = {
def upload_model_class
model_classes = {
"user" => User,
"project" => Project,
"note" => Note,
Loading
Loading
@@ -63,42 +66,17 @@ class UploadsController < ApplicationController
"personal_snippet" => PersonalSnippet
}
 
upload_models[params[:model]]
raise UnknownUploadModelError unless cls = model_classes[params[:model]]
cls
end
 
def upload_mount
return true unless params[:mounted_as]
upload_mounts = %w(avatar attachment file logo header_logo)
if upload_mounts.include?(params[:mounted_as])
params[:mounted_as]
end
end
def uploader
return @uploader if defined?(@uploader)
case model
when nil
@uploader = PersonalFileUploader.new(nil, params[:secret])
@uploader.retrieve_from_store!(params[:filename])
when PersonalSnippet
@uploader = PersonalFileUploader.new(model, params[:secret])
@uploader.retrieve_from_store!(params[:filename])
else
@uploader = @model.public_send(upload_mount) # rubocop:disable GitlabSecurity/PublicSend
redirect_to @uploader.url unless @uploader.file_storage?
end
@uploader
def upload_model_class_has_mounts?
upload_model_class < CarrierWave::Mount::Extension
end
 
def uploader_class
PersonalFileUploader
def upload_mount_satisfied?
return true unless upload_model_class_has_mounts?
upload_model_class.uploader_options.has_key?(upload_mount)
end
 
def model
Loading
Loading
Loading
Loading
@@ -46,7 +46,7 @@ module Ci
end
scope :with_artifacts_not_expired, ->() { with_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts.where('artifacts_expire_at < ?', Time.now) }
scope :with_artifacts_stored_locally, ->() { with_artifacts.where(artifacts_file_store: [nil, LegacyArtifactUploader::LOCAL_STORE]) }
scope :with_artifacts_stored_locally, ->() { with_artifacts.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }
Loading
Loading
Loading
Loading
@@ -7,7 +7,7 @@ class LfsObject < ActiveRecord::Base
 
validates :oid, presence: true, uniqueness: true
 
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::LOCAL_STORE]) }
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
 
mount_uploader :file, LfsObjectUploader
 
Loading
Loading
Loading
Loading
@@ -90,7 +90,9 @@ class Note < ActiveRecord::Base
end
end
 
# @deprecated attachments are handler by the MarkdownUploader
mount_uploader :attachment, AttachmentUploader
deprecate :attachment => 'Use the Markdown uploader instead'
 
# Scopes
scope :searchable, -> { where(system: false) }
Loading
Loading
Loading
Loading
@@ -931,6 +931,14 @@ class Project < ActiveRecord::Base
end
end
 
def avatar_uploader(uploader)
return uploader unless avatar_identifier
paths = uploader.store_dirs.map {|store, path| File.join(path, avatar_identifier) }
uploader.upload = uploads.where(uploader: 'AvatarUploader', path: paths)&.last
uploader.object_store = uploader.upload&.store # TODO: move this to RecordsUploads
end
def avatar_in_git
repository.avatar
end
Loading
Loading
Loading
Loading
@@ -17,13 +17,15 @@ class Upload < ActiveRecord::Base
end
 
def self.record(uploader)
remove_path(uploader.relative_path)
upload = uploader.upload || new
 
create(
binding.pry
upload.update_attributes(
size: uploader.file.size,
path: uploader.relative_path,
path: uploader.dynamic_path,
model: uploader.model,
uploader: uploader.class.to_s
uploader: uploader.class.to_s,
store: uploader.try(:object_store) || ObjectStorage::Store::LOCAL
)
end
 
Loading
Loading
@@ -49,7 +51,15 @@ class Upload < ActiveRecord::Base
File.exist?(absolute_path)
end
 
private
def build_uploader(from = nil)
uploader = from || uploader_class.new(model)
uploader.upload = self
uploader.object_store = store
uploader
end
#private
 
def foreground_checksum?
size <= CHECKSUM_THRESHOLD
Loading
Loading
Loading
Loading
@@ -25,7 +25,7 @@ module Geo
end
 
def local_store_path
Pathname.new(LfsObjectUploader.local_store_path)
Pathname.new(LfsObjectUploader.workhorse_upload_path)
end
 
def relative_file_path
Loading
Loading
Loading
Loading
@@ -16,9 +16,9 @@ module Projects
@old_path = project.full_path
@new_path = project.disk_path
 
origin = FileUploader.dynamic_path_segment(project)
origin = FileUploader.model_path_segment(project)
project.storage_version = ::Project::HASHED_STORAGE_FEATURES[:attachments]
target = FileUploader.dynamic_path_segment(project)
target = FileUploader.model_path_segment(project)
 
result = move_folder!(origin, target)
project.save!
Loading
Loading
class AttachmentUploader < GitlabUploader
include RecordsUploads
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
include UploaderHelper
 
storage :file
storage_options Gitlab.config.uploads
 
def store_dir
"#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}"
private
def dynamic_segment
File.join(model.class.to_s.underscore, mounted_as.to_s, model.id.to_s)
end
end
class AvatarUploader < GitlabUploader
include RecordsUploads
include UploaderHelper
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
 
storage :file
def store_dir
"#{base_dir}/#{model.class.to_s.underscore}/#{mounted_as}/#{model.id}"
end
storage_options Gitlab.config.uploads
 
def exists?
model.avatar.file && model.avatar.file.present?
Loading
Loading
@@ -22,4 +20,10 @@ class AvatarUploader < GitlabUploader
def move_to_cache
false
end
private
def dynamic_segment
File.join(model.class.to_s.underscore, mounted_as.to_s, model.id.to_s)
end
end
Loading
Loading
@@ -21,13 +21,12 @@ class FileMover
end
 
def update_markdown
updated_text = model.read_attribute(update_field).gsub(temp_file_uploader.to_markdown, uploader.to_markdown)
binding.pry
updated_text = model.read_attribute(update_field)
.gsub(temp_file_uploader.markdown_link, uploader.markdown_link)
model.update_attribute(update_field, updated_text)
true
rescue
revert
false
end
 
Loading
Loading
# This class breaks the actual CarrierWave concept.
# Every uploader should use a base_dir that is model agnostic so we can build
# back URLs from base_dir-relative paths saved in the `Upload` model.
#
# As the `.base_dir` is model dependent and **not** saved in the upload model (see #upload_path)
# there is no way to build back the correct file path without the model, which defies
# CarrierWave way of storing files.
#
class FileUploader < GitlabUploader
include RecordsUploads
include UploaderHelper
include RecordsUploads::Concern
include ObjectStorage::Concern
prepend ObjectStorage::Extension::RecordsUploads
 
MARKDOWN_PATTERN = %r{\!?\[.*?\]\(/uploads/(?<secret>[0-9a-f]{32})/(?<file>.*?)\)}
DYNAMIC_PATH_PATTERN = %r{(?<secret>\h{32})/(?<identifier>.*)}
 
storage :file
attr_accessor :model
attr_reader :secret
# TODO: remove this, FileUploader should not have storage_options, this class
# should be abstract, or even a Concern that simply add the secret
#
# Then create a new AdhocUploader that implement the base_dir logic of this class,
# which is wrong anyways.
storage_options Gitlab.config.uploads
 
def self.absolute_path(upload_record)
def self.root
storage_options&.storage_path
end
def self.absolute_path(upload)
File.join(
self.dynamic_path_segment(upload_record.model),
upload_record.path
root,
base_dir(upload.model),
upload.path # this already contain the dynamic_segment, see #upload_path
)
end
 
# Not using `GitlabUploader.base_dir` because all project namespaces are in
# the `public/uploads` dir.
#
def self.base_dir
root_dir
def self.base_dir(model)
model_path_segment(model)
end
 
# Returns the part of `store_dir` that can change based on the model's current
Loading
Loading
@@ -29,59 +50,102 @@ class FileUploader < GitlabUploader
# model - Object that responds to `full_path` and `disk_path`
#
# Returns a String without a trailing slash
def self.dynamic_path_segment(model)
def self.model_path_segment(model)
if model.hashed_storage?(:attachments)
dynamic_path_builder(model.disk_path)
model.disk_path
else
dynamic_path_builder(model.full_path)
model.full_path
end
end
 
# Auxiliary method to build dynamic path segment when not using a project model
#
# Prefer to use the `.dynamic_path_segment` as it includes Hashed Storage specific logic
# Prefer to use the `.model_path_segment` as it includes Hashed Storage specific logic
#
# TODO: review this path?
# TODO: remove me this makes no sense
def self.dynamic_path_builder(path)
File.join(CarrierWave.root, base_dir, path)
File.join(root, path)
end
 
attr_accessor :model
attr_reader :secret
def self.upload_path(secret, identifier)
File.join(secret, identifier)
end
 
def initialize(model, secret = nil)
@model = model
@secret = secret || generate_secret
@secret = secret
end
 
def store_dir
File.join(dynamic_path_segment, @secret)
def base_dir
self.class.base_dir(@model)
end
 
def relative_path
self.file.path.sub("#{dynamic_path_segment}/", '')
# we don't need to know the actual path, an uploader instance should be
# able to yield the file content on demand, so we should build the digest
def absolute_path
self.class.absolute_path(@upload)
end
 
def to_markdown
to_h[:markdown]
def upload_path
self.class.upload_path(dynamic_segment, identifier)
end
 
def to_h
filename = image_or_video? ? self.file.basename : self.file.filename
escaped_filename = filename.gsub("]", "\\]")
def model_path_segment
self.class.model_path_segment(@model)
end
def store_dir
File.join(base_dir, dynamic_segment)
end
 
markdown = "[#{escaped_filename}](#{secure_url})"
def markdown_link
markdown = "[#{markdown_name}](#{secure_url})"
markdown.prepend("!") if image_or_video? || dangerous?
markdown
end
 
def to_h
{
alt: filename,
alt: markdown_name,
url: secure_url,
markdown: markdown
markdown: markdown_link
}
end
 
def filename
self.file.filename
end
# This is weird: the upload do not hold the secret, but holds the path
# so we need to extract the secret from the path
def upload=(value)
if matches = DYNAMIC_PATH_PATTERN.match(value.path)
@secret = matches[:secret]
@identifier = matches[:identifier]
retrieve_from_store!(@identifier)
end
super
end
def secret
@secret ||= generate_secret
end
private
 
def dynamic_path_segment
self.class.dynamic_path_segment(model)
def markdown_name
(image_or_video? ? File.basename(filename, File.extname(filename)) : filename).gsub("]", "\\]")
end
def identifier
@identifier ||= filename
end
def dynamic_segment
secret
end
 
def generate_secret
Loading
Loading
class GitlabUploader < CarrierWave::Uploader::Base
def self.absolute_path(upload_record)
File.join(CarrierWave.root, upload_record.path)
end
class << self
# DSL setter
def storage_options(options = nil)
@storage_options = options if options
@storage_options
end
 
def self.root_dir
'uploads'
end
def root
storage_options&.storage_path
end
 
# When object storage is used, keep the `root_dir` as `base_dir`.
# The files aren't really in folders there, they just have a name.
# The files that contain user input in their name, also contain a hash, so
# the names are still unique
#
# This method is overridden in the `FileUploader`
def self.base_dir
return root_dir unless file_storage?
# represent the directory namespacing at the class level
def base_dir
storage_options&.base_dir || ""
end
 
File.join(root_dir, '-', 'system')
end
def file_storage?
storage == CarrierWave::Storage::File
end
 
def self.file_storage?
self.storage == CarrierWave::Storage::File
def absolute_path(upload_record)
File.join(CarrierWave.root, upload_record.path)
end
end
 
delegate :base_dir, :file_storage?, to: :class
Loading
Loading
@@ -39,17 +40,6 @@ class GitlabUploader < CarrierWave::Uploader::Base
true
end
 
# Designed to be overridden by child uploaders that have a dynamic path
# segment -- that is, a path that changes based on mutable attributes of its
# associated model
#
# For example, `FileUploader` builds the storage path based on the associated
# project model's `path_with_namespace` value, which can change when the
# project or its containing namespace is moved or renamed.
def relative_path
self.file.path.sub("#{root}/", '')
end
def exists?
file.present?
end
Loading
Loading
@@ -67,6 +57,17 @@ class GitlabUploader < CarrierWave::Uploader::Base
 
private
 
# Designed to be overridden by child uploaders that have a dynamic path
# segment -- that is, a path that changes based on mutable attributes of its
# associated model
#
# For example, `FileUploader` builds the storage path based on the associated
# project model's `path_with_namespace` value, which can change when the
# project or its containing namespace is moved or renamed.
def dynamic_segment
raise(NotImplementedError)
end
# To prevent files from moving across filesystems, override the default
# implementation:
# http://github.com/carrierwaveuploader/carrierwave/blob/v1.0.0/lib/carrierwave/uploader/cache.rb#L181-L183
Loading
Loading
class JobArtifactUploader < ObjectStoreUploader
storage_options Gitlab.config.artifacts
def self.local_store_path
Gitlab.config.artifacts.path
end
class JobArtifactUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
 
def self.artifacts_upload_path
File.join(self.local_store_path, 'tmp/uploads/')
end
storage_options Gitlab.config.artifacts
 
def size
return super if model.size.nil?
Loading
Loading
@@ -17,7 +12,7 @@ class JobArtifactUploader < ObjectStoreUploader
 
private
 
def default_path
def dynamic_segment
creation_date = model.created_at.utc.strftime('%Y_%m_%d')
 
File.join(disk_hash[0..1], disk_hash[2..3], disk_hash,
Loading
Loading
class LegacyArtifactUploader < ObjectStoreUploader
storage_options Gitlab.config.artifacts
def self.local_store_path
Gitlab.config.artifacts.path
end
class LegacyArtifactUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
 
def self.artifacts_upload_path
File.join(self.local_store_path, 'tmp/uploads/')
end
storage_options Gitlab.config.artifacts
 
private
 
def default_path
def dynamic_segment
File.join(model.created_at.utc.strftime('%Y_%m'), model.project_id.to_s, model.id.to_s)
end
end
class LfsObjectUploader < ObjectStoreUploader
storage_options Gitlab.config.lfs
class LfsObjectUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
 
def self.local_store_path
Gitlab.config.lfs.storage_path
end
storage_options Gitlab.config.lfs
 
def filename
model.oid[4..-1]
Loading
Loading
@@ -11,7 +10,7 @@ class LfsObjectUploader < ObjectStoreUploader
 
private
 
def default_path
"#{model.oid[0, 2]}/#{model.oid[2, 2]}"
def dynamic_segment
File.join(model.oid[0, 2], model.oid[2, 2])
end
end
class NamespaceFileUploader < FileUploader
def self.base_dir
File.join(root_dir, '-', 'system', 'namespace')
storage_options Gitlab.config.uploads
def self.base_dir(model)
File.join(storage_options&.base_dir, 'namespace', model_path_segment(model))
end
 
def self.dynamic_path_segment(model)
dynamic_path_builder(model.id.to_s)
def self.model_path_segment(model)
File.join(model.id.to_s)
end
 
private
# Re-Override
def store_dir
store_dirs[object_store]
end
 
def secure_url
File.join('/uploads', @secret, file.filename)
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join('namespace', model_path_segment, dynamic_segment)
}
end
end
class PersonalFileUploader < FileUploader
def self.dynamic_path_segment(model)
File.join(CarrierWave.root, model_path(model))
storage_options Gitlab.config.uploads
def self.base_dir(model)
File.join(storage_options&.base_dir, model_path_segment(model))
end
 
def self.base_dir
File.join(root_dir, '-', 'system')
def self.model_path_segment(model)
return 'temp/' unless model
File.join(model.class.to_s.underscore, model.id.to_s)
end
 
private
def object_store
return Store::LOCAL unless model
 
def secure_url
File.join(self.class.model_path(model), secret, file.filename)
super
end
# Revert-Override
def store_dir
store_dirs[object_store]
end
 
def self.model_path(model)
if model
File.join("/#{base_dir}", model.class.to_s.underscore, model.id.to_s)
else
File.join("/#{base_dir}", 'temp')
end
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join(model_path_segment, dynamic_segment)
}
end
private
def secure_url
File.join('/', base_dir, secret, file.filename)
end
end
module RecordsUploads
extend ActiveSupport::Concern
module Concern
extend ActiveSupport::Concern
 
included do
after :store, :record_upload
before :remove, :destroy_upload
end
attr_accessor :upload
 
# After storing an attachment, create a corresponding Upload record
#
# NOTE: We're ignoring the argument passed to this callback because we want
# the `SanitizedFile` object from `CarrierWave::Uploader::Base#file`, not the
# `Tempfile` object the callback gets.
#
# Called `after :store`
def record_upload(_tempfile = nil)
return unless model
return unless file_storage?
return unless file.exists?
Upload.record(self)
end
included do
before :store, :destroy_upload
after :store, :record_upload
before :remove, :destroy_upload
end
# After storing an attachment, create a corresponding Upload record
#
# NOTE: We're ignoring the argument passed to this callback because we want
# the `SanitizedFile` object from `CarrierWave::Uploader::Base#file`, not the
# `Tempfile` object the callback gets.
#
# Called `after :store`
def record_upload(_tempfile = nil)
return unless model
return unless file && file.exists?
Upload.record(self)
end
def upload_path
File.join(store_dir, filename.to_s)
end
 
private
private
 
# Before removing an attachment, destroy any Upload records at the same path
#
# Called `before :remove`
def destroy_upload(*args)
return unless file_storage?
return unless file
# Before removing an attachment, destroy any Upload records at the same path
#
# Called `before :remove`
def destroy_upload(*args)
return unless file && file.exists?
 
Upload.remove_path(relative_path)
# that should be the old path?
Upload.remove_path(upload_path)
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment