Skip to content
Snippets Groups Projects
Commit 835fe4d3 authored by Sean McGivern's avatar Sean McGivern
Browse files

Merge branch '40781-os-to-ce' into 'master'

Bring Object Storage to CE

Closes #4171, #4163, #3370, #2841, and #29203

See merge request gitlab-org/gitlab-ce!17358
parents ab8f13c3 6d63a098
No related branches found
No related tags found
No related merge requests found
Showing
with 745 additions and 28 deletions
class LfsObjectUploader < GitlabUploader
extend Workhorse::UploadPath
# LfsObject are in `tmp/upload` instead of `tmp/uploads`
def self.workhorse_upload_path
File.join(root, 'tmp/upload')
end
include ObjectStorage::Concern
 
storage_options Gitlab.config.lfs
 
Loading
Loading
Loading
Loading
@@ -4,7 +4,7 @@ class NamespaceFileUploader < FileUploader
options.storage_path
end
 
def self.base_dir(model)
def self.base_dir(model, _store = nil)
File.join(options.base_dir, 'namespace', model_path_segment(model))
end
 
Loading
Loading
@@ -14,6 +14,13 @@ class NamespaceFileUploader < FileUploader
 
# Re-Override
def store_dir
File.join(base_dir, dynamic_segment)
store_dirs[object_store]
end
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join('namespace', self.class.model_path_segment(model), dynamic_segment)
}
end
end
require 'fog/aws'
require 'carrierwave/storage/fog'
#
# This concern should add object storage support
# to the GitlabUploader class
#
module ObjectStorage
RemoteStoreError = Class.new(StandardError)
UnknownStoreError = Class.new(StandardError)
ObjectStorageUnavailable = Class.new(StandardError)
module Store
LOCAL = 1
REMOTE = 2
end
module Extension
# this extension is the glue between the ObjectStorage::Concern and RecordsUploads::Concern
module RecordsUploads
extend ActiveSupport::Concern
def prepended(base)
raise "#{base} must include ObjectStorage::Concern to use extensions." unless base < Concern
base.include(RecordsUploads::Concern)
end
def retrieve_from_store!(identifier)
paths = store_dirs.map { |store, path| File.join(path, identifier) }
unless current_upload_satisfies?(paths, model)
# the upload we already have isn't right, find the correct one
self.upload = uploads.find_by(model: model, path: paths)
end
super
end
def build_upload
super.tap do |upload|
upload.store = object_store
end
end
def upload=(upload)
return unless upload
self.object_store = upload.store
super
end
def schedule_background_upload(*args)
return unless schedule_background_upload?
return unless upload
ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name,
upload.class.to_s,
mounted_as,
upload.id)
end
private
def current_upload_satisfies?(paths, model)
return false unless upload
return false unless model
paths.include?(upload.path) &&
upload.model_id == model.id &&
upload.model_type == model.class.base_class.sti_name
end
end
end
# Add support for automatic background uploading after the file is stored.
#
module BackgroundMove
extend ActiveSupport::Concern
def background_upload(mount_points = [])
return unless mount_points.any?
run_after_commit do
mount_points.each { |mount| send(mount).schedule_background_upload } # rubocop:disable GitlabSecurity/PublicSend
end
end
def changed_mounts
self.class.uploaders.select do |mount, uploader_class|
mounted_as = uploader_class.serialization_column(self.class, mount)
uploader = send(:"#{mounted_as}") # rubocop:disable GitlabSecurity/PublicSend
next unless uploader
next unless uploader.exists?
next unless send(:"#{mounted_as}_changed?") # rubocop:disable GitlabSecurity/PublicSend
mount
end.keys
end
included do
after_save on: [:create, :update] do
background_upload(changed_mounts)
end
end
end
module Concern
extend ActiveSupport::Concern
included do |base|
base.include(ObjectStorage)
after :migrate, :delete_migrated_file
end
class_methods do
def object_store_options
options.object_store
end
def object_store_enabled?
object_store_options.enabled
end
def background_upload_enabled?
object_store_options.background_upload
end
def proxy_download_enabled?
object_store_options.proxy_download
end
def direct_download_enabled?
!proxy_download_enabled?
end
def object_store_credentials
object_store_options.connection.to_hash.deep_symbolize_keys
end
def remote_store_path
object_store_options.remote_directory
end
def serialization_column(model_class, mount_point)
model_class.uploader_options.dig(mount_point, :mount_on) || mount_point
end
end
def file_storage?
storage.is_a?(CarrierWave::Storage::File)
end
def file_cache_storage?
cache_storage.is_a?(CarrierWave::Storage::File)
end
def object_store
@object_store ||= model.try(store_serialization_column) || Store::LOCAL
end
# rubocop:disable Gitlab/ModuleWithInstanceVariables
def object_store=(value)
@object_store = value || Store::LOCAL
@storage = storage_for(object_store)
end
# rubocop:enable Gitlab/ModuleWithInstanceVariables
# Return true if the current file is part or the model (i.e. is mounted in the model)
#
def persist_object_store?
model.respond_to?(:"#{store_serialization_column}=")
end
# Save the current @object_store to the model <mounted_as>_store column
def persist_object_store!
return unless persist_object_store?
updated = model.update_column(store_serialization_column, object_store)
raise 'Failed to update object store' unless updated
end
def use_file
if file_storage?
return yield path
end
begin
cache_stored_file!
yield cache_path
ensure
cache_storage.delete_dir!(cache_path(nil))
end
end
def filename
super || file&.filename
end
#
# Move the file to another store
#
# new_store: Enum (Store::LOCAL, Store::REMOTE)
#
def migrate!(new_store)
uuid = Gitlab::ExclusiveLease.new(exclusive_lease_key, timeout: 1.hour.to_i).try_obtain
raise 'Already running' unless uuid
unsafe_migrate!(new_store)
ensure
Gitlab::ExclusiveLease.cancel(exclusive_lease_key, uuid)
end
def schedule_background_upload(*args)
return unless schedule_background_upload?
ObjectStorage::BackgroundMoveWorker.perform_async(self.class.name,
model.class.name,
mounted_as,
model.id)
end
def fog_directory
self.class.remote_store_path
end
def fog_credentials
self.class.object_store_credentials
end
def fog_public
false
end
def delete_migrated_file(migrated_file)
migrated_file.delete if exists?
end
def exists?
file.present?
end
def store_dir(store = nil)
store_dirs[store || object_store]
end
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join(dynamic_segment)
}
end
private
def schedule_background_upload?
self.class.object_store_enabled? &&
self.class.background_upload_enabled? &&
self.file_storage?
end
# this is a hack around CarrierWave. The #migrate method needs to be
# able to force the current file to the migrated file upon success.
def file=(file)
@file = file # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def serialization_column
self.class.serialization_column(model.class, mounted_as)
end
# Returns the column where the 'store' is saved
# defaults to 'store'
def store_serialization_column
[serialization_column, 'store'].compact.join('_').to_sym
end
def storage
@storage ||= storage_for(object_store)
end
def storage_for(store)
case store
when Store::REMOTE
raise 'Object Storage is not enabled' unless self.class.object_store_enabled?
CarrierWave::Storage::Fog.new(self)
when Store::LOCAL
CarrierWave::Storage::File.new(self)
else
raise UnknownStoreError
end
end
def exclusive_lease_key
"object_storage_migrate:#{model.class}:#{model.id}"
end
#
# Move the file to another store
#
# new_store: Enum (Store::LOCAL, Store::REMOTE)
#
def unsafe_migrate!(new_store)
return unless object_store != new_store
return unless file
new_file = nil
file_to_delete = file
from_object_store = object_store
self.object_store = new_store # changes the storage and file
cache_stored_file! if file_storage?
with_callbacks(:migrate, file_to_delete) do
with_callbacks(:store, file_to_delete) do # for #store_versions!
new_file = storage.store!(file)
persist_object_store!
self.file = new_file
end
end
file
rescue => e
# in case of failure delete new file
new_file.delete unless new_file.nil?
# revert back to the old file
self.object_store = from_object_store
self.file = file_to_delete
raise e
end
end
end
Loading
Loading
@@ -4,7 +4,7 @@ class PersonalFileUploader < FileUploader
options.storage_path
end
 
def self.base_dir(model)
def self.base_dir(model, _store = nil)
File.join(options.base_dir, model_path_segment(model))
end
 
Loading
Loading
@@ -14,6 +14,12 @@ class PersonalFileUploader < FileUploader
File.join(model.class.to_s.underscore, model.id.to_s)
end
 
def object_store
return Store::LOCAL unless model
super
end
# model_path_segment does not require a model to be passed, so we can always
# generate a path, even when there's no model.
def model_valid?
Loading
Loading
@@ -22,7 +28,14 @@ class PersonalFileUploader < FileUploader
 
# Revert-Override
def store_dir
File.join(base_dir, dynamic_segment)
store_dirs[object_store]
end
def store_dirs
{
Store::LOCAL => File.join(base_dir, dynamic_segment),
Store::REMOTE => File.join(self.class.model_path_segment(model), dynamic_segment)
}
end
 
private
Loading
Loading
Loading
Loading
@@ -24,8 +24,7 @@ module RecordsUploads
uploads.where(path: upload_path).delete_all
upload.destroy! if upload
 
self.upload = build_upload
upload.save!
self.upload = build_upload.tap(&:save!)
end
end
 
Loading
Loading
Loading
Loading
@@ -35,7 +35,7 @@
= link_to download_project_job_artifacts_path(@project, @build), rel: 'nofollow', download: '', class: 'btn btn-sm btn-default' do
Download
 
- if @build.artifacts_metadata?
- if @build.browsable_artifacts?
= link_to browse_project_job_artifacts_path(@project, @build), class: 'btn btn-sm btn-default' do
Browse
 
Loading
Loading
Loading
Loading
@@ -39,6 +39,10 @@
- github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository
 
- object_storage_upload
- object_storage:object_storage_background_move
- object_storage:object_storage_migrate_uploads
- pipeline_cache:expire_job_cache
- pipeline_cache:expire_pipeline_cache
- pipeline_creation:create_pipeline
Loading
Loading
# Concern for setting Sidekiq settings for the various GitLab ObjectStorage workers.
module ObjectStorageQueue
extend ActiveSupport::Concern
included do
queue_namespace :object_storage
end
end
module ObjectStorage
class BackgroundMoveWorker
include ApplicationWorker
include ObjectStorageQueue
sidekiq_options retry: 5
def perform(uploader_class_name, subject_class_name, file_field, subject_id)
uploader_class = uploader_class_name.constantize
subject_class = subject_class_name.constantize
return unless uploader_class < ObjectStorage::Concern
return unless uploader_class.object_store_enabled?
return unless uploader_class.background_upload_enabled?
subject = subject_class.find(subject_id)
uploader = build_uploader(subject, file_field&.to_sym)
uploader.migrate!(ObjectStorage::Store::REMOTE)
end
def build_uploader(subject, mount_point)
case subject
when Upload then subject.build_uploader(mount_point)
else
subject.send(mount_point) # rubocop:disable GitlabSecurity/PublicSend
end
end
end
end
# frozen_string_literal: true
# rubocop:disable Metrics/LineLength
# rubocop:disable Style/Documentation
module ObjectStorage
class MigrateUploadsWorker
include ApplicationWorker
include ObjectStorageQueue
SanityCheckError = Class.new(StandardError)
class Upload < ActiveRecord::Base
# Upper limit for foreground checksum processing
CHECKSUM_THRESHOLD = 100.megabytes
belongs_to :model, polymorphic: true # rubocop:disable Cop/PolymorphicAssociations
validates :size, presence: true
validates :path, presence: true
validates :model, presence: true
validates :uploader, presence: true
before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :checksummable?
scope :stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
scope :stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
def self.hexdigest(path)
Digest::SHA256.file(path).hexdigest
end
def absolute_path
raise ObjectStorage::RemoteStoreError, "Remote object has no absolute path." unless local?
return path unless relative_path?
uploader_class.absolute_path(self)
end
def calculate_checksum!
self.checksum = nil
return unless checksummable?
self.checksum = self.class.hexdigest(absolute_path)
end
def build_uploader(mounted_as = nil)
uploader_class.new(model, mounted_as).tap do |uploader|
uploader.upload = self
uploader.retrieve_from_store!(identifier)
end
end
def exist?
File.exist?(absolute_path)
end
def local?
return true if store.nil?
store == ObjectStorage::Store::LOCAL
end
private
def checksummable?
checksum.nil? && local? && exist?
end
def foreground_checksummable?
checksummable? && size <= CHECKSUM_THRESHOLD
end
def schedule_checksum
UploadChecksumWorker.perform_async(id)
end
def relative_path?
!path.start_with?('/')
end
def identifier
File.basename(path)
end
def uploader_class
Object.const_get(uploader)
end
end
class MigrationResult
attr_reader :upload
attr_accessor :error
def initialize(upload, error = nil)
@upload, @error = upload, error
end
def success?
error.nil?
end
def to_s
success? ? "Migration successful." : "Error while migrating #{upload.id}: #{error.message}"
end
end
module Report
class MigrationFailures < StandardError
attr_reader :errors
def initialize(errors)
@errors = errors
end
def message
errors.map(&:message).join("\n")
end
end
def report!(results)
success, failures = results.partition(&:success?)
Rails.logger.info header(success, failures)
Rails.logger.warn failures(failures)
raise MigrationFailures.new(failures.map(&:error)) if failures.any?
end
def header(success, failures)
"Migrated #{success.count}/#{success.count + failures.count} files."
end
def failures(failures)
failures.map { |f| "\t#{f}" }.join('\n')
end
end
include Report
def self.enqueue!(uploads, mounted_as, to_store)
sanity_check!(uploads, mounted_as)
perform_async(uploads.ids, mounted_as, to_store)
end
# We need to be sure all the uploads are for the same uploader and model type
# and that the mount point exists if provided.
#
def self.sanity_check!(uploads, mounted_as)
upload = uploads.first
uploader_class = upload.uploader.constantize
model_class = uploads.first.model_type.constantize
uploader_types = uploads.map(&:uploader).uniq
model_types = uploads.map(&:model_type).uniq
model_has_mount = mounted_as.nil? || model_class.uploaders[mounted_as] == uploader_class
raise(SanityCheckError, "Multiple uploaders found: #{uploader_types}") unless uploader_types.count == 1
raise(SanityCheckError, "Multiple model types found: #{model_types}") unless model_types.count == 1
raise(SanityCheckError, "Mount point #{mounted_as} not found in #{model_class}.") unless model_has_mount
end
def perform(ids, mounted_as, to_store)
@mounted_as = mounted_as&.to_sym
@to_store = to_store
uploads = Upload.preload(:model).where(id: ids)
sanity_check!(uploads)
results = migrate(uploads)
report!(results)
rescue SanityCheckError => e
# do not retry: the job is insane
Rails.logger.warn "#{self.class}: Sanity check error (#{e.message})"
end
def sanity_check!(uploads)
self.class.sanity_check!(uploads, @mounted_as)
end
def build_uploaders(uploads)
uploads.map { |upload| upload.build_uploader(@mounted_as) }
end
def migrate(uploads)
build_uploaders(uploads).map(&method(:process_uploader))
end
def process_uploader(uploader)
MigrationResult.new(uploader.upload).tap do |result|
begin
uploader.migrate!(@to_store)
rescue => e
result.error = e
end
end
end
end
end
# @Deprecated - remove once the `object_storage_upload` queue is empty
# The queue has been renamed `object_storage:object_storage_background_upload`
#
class ObjectStorageUploadWorker
include ApplicationWorker
sidekiq_options retry: 5
def perform(uploader_class_name, subject_class_name, file_field, subject_id)
uploader_class = uploader_class_name.constantize
subject_class = subject_class_name.constantize
return unless uploader_class < ObjectStorage::Concern
return unless uploader_class.object_store_enabled?
return unless uploader_class.background_upload_enabled?
subject = subject_class.find(subject_id)
uploader = subject.public_send(file_field) # rubocop:disable GitlabSecurity/PublicSend
uploader.migrate!(ObjectStorage::Store::REMOTE)
end
end
---
title: Update CI/CD secret variables list to be dynamic and save without reloading
the page
merge_request: 4110
author:
type: added
---
title: Fix JavaScript bundle running on Cluster update/destroy pages
merge_request:
author:
type: fixed
---
title: Authorize project access with an external service
merge_request: 4675
author:
type: added
---
title: Add object storage support for LFS objects, CI artifacts, and uploads.
merge_request: 17358
author:
type: added
---
title: File uploads in remote storage now support project renaming.
merge_request: 4597
author:
type: fixed
Loading
Loading
@@ -145,18 +145,55 @@ production: &base
enabled: true
# The location where build artifacts are stored (default: shared/artifacts).
# path: shared/artifacts
# object_store:
# enabled: false
# remote_directory: artifacts # The bucket name
# background_upload: false # Temporary option to limit automatic upload (Default: true)
# proxy_download: false # Passthrough all downloads via GitLab instead of using Redirects to Object Storage
# connection:
# provider: AWS # Only AWS supported at the moment
# aws_access_key_id: AWS_ACCESS_KEY_ID
# aws_secret_access_key: AWS_SECRET_ACCESS_KEY
# region: eu-central-1
 
## Git LFS
lfs:
enabled: true
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
object_store:
enabled: false
remote_directory: lfs-objects # Bucket name
# background_upload: false # Temporary option to limit automatic upload (Default: true)
# proxy_download: false # Passthrough all downloads via GitLab instead of using Redirects to Object Storage
connection:
provider: AWS
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
# Use the following options to configure an AWS compatible host
# host: 'localhost' # default: s3.amazonaws.com
# endpoint: 'http://127.0.0.1:9000' # default: nil
# path_style: true # Use 'host/bucket_name/object' instead of 'bucket_name.host/object'
 
## Uploads (attachments, avatars, etc...)
uploads:
# The location where uploads objects are stored (default: public/).
# storage_path: public/
# base_dir: uploads/-/system
object_store:
enabled: false
# remote_directory: uploads # Bucket name
# background_upload: false # Temporary option to limit automatic upload (Default: true)
# proxy_download: false # Passthrough all downloads via GitLab instead of using Redirects to Object Storage
# connection:
# provider: AWS
# aws_access_key_id: AWS_ACCESS_KEY_ID
# aws_secret_access_key: AWS_SECRET_ACCESS_KEY
# region: eu-central-1
# host: 'localhost' # default: s3.amazonaws.com
# endpoint: 'http://127.0.0.1:9000' # default: nil
# path_style: true # Use 'host/bucket_name/object' instead of 'bucket_name.host/object'
 
## GitLab Pages
pages:
Loading
Loading
@@ -655,10 +692,39 @@ test:
enabled: true
lfs:
enabled: false
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
object_store:
enabled: false
remote_directory: lfs-objects # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
artifacts:
path: tmp/tests/artifacts
enabled: true
# The location where build artifacts are stored (default: shared/artifacts).
# path: shared/artifacts
object_store:
enabled: false
remote_directory: artifacts # The bucket name
background_upload: false
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
uploads:
storage_path: tmp/tests/public
object_store:
enabled: false
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
gitlab:
host: localhost
port: 80
Loading
Loading
Loading
Loading
@@ -305,6 +305,13 @@ Settings.artifacts['storage_path'] = Settings.absolute(Settings.artifacts.values
# Settings.artifact['path'] is deprecated, use `storage_path` instead
Settings.artifacts['path'] = Settings.artifacts['storage_path']
Settings.artifacts['max_size'] ||= 100 # in megabytes
Settings.artifacts['object_store'] ||= Settingslogic.new({})
Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil?
Settings.artifacts['object_store']['remote_directory'] ||= nil
Settings.artifacts['object_store']['background_upload'] = true if Settings.artifacts['object_store']['background_upload'].nil?
Settings.artifacts['object_store']['proxy_download'] = false if Settings.artifacts['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.artifacts['object_store']['connection']&.deep_stringify_keys!
 
#
# Registry
Loading
Loading
@@ -340,6 +347,13 @@ Settings.pages['artifacts_server'] ||= Settings.pages['enabled'] if Settings.pa
Settings['lfs'] ||= Settingslogic.new({})
Settings.lfs['enabled'] = true if Settings.lfs['enabled'].nil?
Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] || File.join(Settings.shared['path'], "lfs-objects"))
Settings.lfs['object_store'] ||= Settingslogic.new({})
Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil?
Settings.lfs['object_store']['remote_directory'] ||= nil
Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil?
Settings.lfs['object_store']['proxy_download'] = false if Settings.lfs['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.lfs['object_store']['connection']&.deep_stringify_keys!
 
#
# Uploads
Loading
Loading
@@ -347,6 +361,13 @@ Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] ||
Settings['uploads'] ||= Settingslogic.new({})
Settings.uploads['storage_path'] = Settings.absolute(Settings.uploads['storage_path'] || 'public')
Settings.uploads['base_dir'] = Settings.uploads['base_dir'] || 'uploads/-/system'
Settings.uploads['object_store'] ||= Settingslogic.new({})
Settings.uploads['object_store']['enabled'] = false if Settings.uploads['object_store']['enabled'].nil?
Settings.uploads['object_store']['remote_directory'] ||= 'uploads'
Settings.uploads['object_store']['background_upload'] = true if Settings.uploads['object_store']['background_upload'].nil?
Settings.uploads['object_store']['proxy_download'] = false if Settings.uploads['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.uploads['object_store']['connection']&.deep_stringify_keys!
 
#
# Mattermost
Loading
Loading
#
# Monkey patching the https support for private urls
# See https://gitlab.com/gitlab-org/gitlab-ee/issues/4879
#
module Fog
module Storage
class GoogleXML
class File < Fog::Model
module MonkeyPatch
def url(expires)
requires :key
collection.get_https_url(key, expires)
end
end
prepend MonkeyPatch
end
end
end
end
Loading
Loading
@@ -68,5 +68,7 @@
- [project_migrate_hashed_storage, 1]
- [storage_migrator, 1]
- [pages_domain_verification, 1]
- [object_storage_upload, 1]
- [object_storage, 1]
- [plugin, 1]
- [pipeline_background, 1]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment