Skip to content
Snippets Groups Projects
Commit bc760627 authored by Douwe Maan's avatar Douwe Maan Committed by Kamil Trzciński
Browse files

Merge branch 'jej/lfs-object-storage' into 'master'

Can migrate LFS objects to S3 style object storage

Closes #2841

See merge request !2760
parent 5a69b51b
No related branches found
No related tags found
No related merge requests found
Showing
with 274 additions and 59 deletions
module SendFileUpload
def send_upload(file_upload, send_params: {}, redirect_params: {}, attachment: nil)
if attachment
redirect_params[:query] = { "response-content-disposition" => "attachment;filename=#{attachment.inspect}" }
send_params.merge!(filename: attachment, disposition: 'attachment')
end
if file_upload.file_storage?
send_file file_upload.path, send_params
else
redirect_to file_upload.url(**redirect_params)
end
end
end
class Projects::ArtifactsController < Projects::ApplicationController
include ExtractsPath
include RendersBlob
include SendFileUpload
 
layout 'project'
before_action :authorize_read_build!
Loading
Loading
@@ -10,11 +11,7 @@ class Projects::ArtifactsController < Projects::ApplicationController
before_action :entry, only: [:file]
 
def download
if artifacts_file.file_storage?
send_file artifacts_file.path, disposition: 'attachment'
else
redirect_to artifacts_file.url
end
send_upload(artifacts_file, attachment: artifacts_file.filename)
end
 
def browse
Loading
Loading
class Projects::LfsStorageController < Projects::GitHttpClientController
include LfsRequest
include WorkhorseRequest
include SendFileUpload
 
skip_before_action :verify_workhorse_api!, only: [:download, :upload_finalize]
 
Loading
Loading
@@ -11,7 +12,7 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
return
end
 
send_file lfs_object.file.path, content_type: "application/octet-stream"
send_upload(lfs_object.file, send_params: { content_type: "application/octet-stream" })
end
 
def upload_authorize
Loading
Loading
Loading
Loading
@@ -2,6 +2,7 @@
class Projects::RawController < Projects::ApplicationController
include ExtractsPath
include BlobHelper
include SendFileUpload
 
before_action :require_non_empty_project
before_action :assign_ref_vars
Loading
Loading
@@ -31,7 +32,7 @@ class Projects::RawController < Projects::ApplicationController
lfs_object = find_lfs_object
 
if lfs_object && lfs_object.project_allowed_access?(@project)
send_file lfs_object.file.path, filename: @blob.name, disposition: 'attachment'
send_upload(lfs_object.file, attachment: @blob.name)
else
render_404
end
Loading
Loading
Loading
Loading
@@ -33,6 +33,7 @@ module Ci
scope :with_artifacts, ->() { where.not(artifacts_file: [nil, '']) }
scope :with_artifacts_not_expired, ->() { with_artifacts.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts.where('artifacts_expire_at < ?', Time.now) }
scope :with_artifacts_stored_locally, ->() { with_artifacts.where(artifacts_file_store: [nil, ArtifactUploader::LOCAL_STORE]) }
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }
Loading
Loading
Loading
Loading
@@ -4,6 +4,8 @@ class LfsObject < ActiveRecord::Base
 
validates :oid, presence: true, uniqueness: true
 
scope :with_files_stored_locally, ->() { where(file_store: [nil, LfsObjectUploader::LOCAL_STORE]) }
mount_uploader :file, LfsObjectUploader
 
def storage_project(project)
Loading
Loading
class ArtifactUploader < ObjectStoreUploader
storage_options Gitlab.config.artifacts
 
def self.local_artifacts_store
def self.local_store_path
Gitlab.config.artifacts.path
end
 
def self.artifacts_upload_path
File.join(self.local_artifacts_store, 'tmp/uploads/')
end
def store_dir
if file_storage?
default_local_path
else
default_path
end
end
def cache_dir
File.join(self.class.local_artifacts_store, 'tmp/cache')
end
def work_dir
File.join(self.class.local_artifacts_store, 'tmp/work')
File.join(self.local_store_path, 'tmp/uploads/')
end
 
private
 
def default_local_path
File.join(self.class.local_artifacts_store, default_path)
end
def default_path
File.join(subject.created_at.utc.strftime('%Y_%m'), subject.project_id.to_s, subject.id.to_s)
end
Loading
Loading
class LfsObjectUploader < GitlabUploader
storage :file
class LfsObjectUploader < ObjectStoreUploader
storage_options Gitlab.config.lfs
after :store, :schedule_migration_to_object_storage
 
def store_dir
"#{Gitlab.config.lfs.storage_path}/#{model.oid[0, 2]}/#{model.oid[2, 2]}"
end
def cache_dir
"#{Gitlab.config.lfs.storage_path}/tmp/cache"
def self.local_store_path
Gitlab.config.lfs.storage_path
end
 
def filename
model.oid[4..-1]
subject.oid[4..-1]
end
 
def work_dir
File.join(Gitlab.config.lfs.storage_path, 'tmp', 'work')
private
def default_path
"#{subject.oid[0, 2]}/#{subject.oid[2, 2]}"
end
end
Loading
Loading
@@ -20,6 +20,22 @@ class ObjectStoreUploader < GitlabUploader
def object_store_enabled?
object_store_options&.enabled
end
def background_upload_enabled?
object_store_options&.background_upload
end
def object_store_credentials
@object_store_credentials ||= object_store_options&.connection&.to_hash&.deep_symbolize_keys
end
def object_store_directory
object_store_options&.remote_directory
end
def local_store_path
raise NotImplementedError
end
end
 
attr_reader :subject, :field
Loading
Loading
@@ -38,6 +54,14 @@ class ObjectStoreUploader < GitlabUploader
subject.public_send(:"#{field}_store=", value)
end
 
def store_dir
if file_storage?
default_local_path
else
default_path
end
end
def use_file
if file_storage?
return yield path
Loading
Loading
@@ -85,6 +109,12 @@ class ObjectStoreUploader < GitlabUploader
end
end
 
def schedule_migration_to_object_storage(new_file)
if self.class.object_store_enabled? && licensed? && file_storage?
ObjectStorageUploadWorker.perform_async(self.class.name, subject.class.name, field, subject.id)
end
end
def fog_directory
self.class.object_store_options.remote_directory
end
Loading
Loading
@@ -109,7 +139,27 @@ class ObjectStoreUploader < GitlabUploader
def verify_license!(new_file)
return if file_storage?
 
raise 'Object Storage feature is missing' unless subject.project.feature_available?(:object_storage)
raise 'Object Storage feature is missing' unless licensed?
end
def exists?
file.try(:exists?)
end
def cache_dir
File.join(self.class.local_store_path, 'tmp/cache')
end
# Override this if you don't want to save local files by default to the Rails.root directory
def work_dir
# Default path set by CarrierWave:
# https://github.com/carrierwaveuploader/carrierwave/blob/v1.1.0/lib/carrierwave/uploader/cache.rb#L182
# CarrierWave.tmp_path
File.join(self.class.local_store_path, 'tmp/work')
end
def licensed?
License.feature_available?(:object_storage)
end
 
private
Loading
Loading
@@ -118,6 +168,14 @@ class ObjectStoreUploader < GitlabUploader
self.object_store = LOCAL_STORE unless self.object_store
end
 
def default_local_path
File.join(self.class.local_store_path, default_path)
end
def default_path
raise NotImplementedError
end
def storage
@storage ||=
if object_store == REMOTE_STORE
Loading
Loading
class ObjectStorageUploadWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
def perform(uploader_class_name, subject_class_name, file_field, subject_id)
uploader_class = uploader_class_name.constantize
subject_class = subject_class_name.constantize
return unless uploader_class.object_store_enabled?
return unless uploader_class.background_upload_enabled?
subject = subject_class.find(subject_id)
file = subject.public_send(file_field) # rubocop:disable GitlabSecurity/PublicSend
return unless file.licensed?
file.migrate!(uploader_class::REMOTE_STORE)
end
end
---
title: LFS files can be stored in remote object storage such as S3
merge_request: 2760
author:
type: added
Loading
Loading
@@ -147,7 +147,8 @@ production: &base
# path: shared/artifacts
# object_store:
# enabled: false
# remote_directory: artifacts
# remote_directory: artifacts # The bucket name
# background_upload: false # Temporary option to limit automatic upload (Default: true)
# connection:
# provider: AWS # Only AWS supported at the moment
# aws_access_key_id: AWS_ACCESS_KEY_ID
Loading
Loading
@@ -159,6 +160,19 @@ production: &base
enabled: true
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
object_store:
enabled: false
remote_directory: lfs-objects # Bucket name
# background_upload: false # Temporary option to limit automatic upload (Default: true)
connection:
provider: AWS
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
# Use the following options to configure an AWS compatible host
# host: 'localhost' # default: s3.amazonaws.com
# endpoint: 'http://127.0.0.1:9000' # default: nil
# path_style: true # Use 'host/bucket_name/object' instead of 'bucket_name.host/object'
 
## GitLab Pages
pages:
Loading
Loading
@@ -655,6 +669,28 @@ test:
enabled: true
lfs:
enabled: false
# The location where LFS objects are stored (default: shared/lfs-objects).
# storage_path: shared/lfs-objects
object_store:
enabled: false
remote_directory: lfs-objects # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
artifacts:
enabled: true
# The location where build artifacts are stored (default: shared/artifacts).
# path: shared/artifacts
object_store:
enabled: false
remote_directory: artifacts # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
gitlab:
host: localhost
port: 80
Loading
Loading
Loading
Loading
@@ -302,8 +302,9 @@ Settings.artifacts['max_size'] ||= 100 # in megabytes
Settings.artifacts['object_store'] ||= Settingslogic.new({})
Settings.artifacts['object_store']['enabled'] = false if Settings.artifacts['object_store']['enabled'].nil?
Settings.artifacts['object_store']['remote_directory'] ||= nil
# Convert upload connection settings to use symbol keys, to make Fog happy
Settings.artifacts['object_store']['connection']&.deep_symbolize_keys!
Settings.artifacts['object_store']['background_upload'] = true if Settings.artifacts['object_store']['background_upload'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.artifacts['object_store']['connection']&.deep_stringify_keys!
 
#
# Registry
Loading
Loading
@@ -339,6 +340,13 @@ Settings['lfs'] ||= Settingslogic.new({})
Settings.lfs['enabled'] = true if Settings.lfs['enabled'].nil?
Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] || File.join(Settings.shared['path'], "lfs-objects"))
 
Settings.lfs['object_store'] ||= Settingslogic.new({})
Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil?
Settings.lfs['object_store']['remote_directory'] ||= nil
Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
Settings.lfs['object_store']['connection']&.deep_stringify_keys!
#
# Mattermost
#
Loading
Loading
Loading
Loading
@@ -63,3 +63,4 @@
- [update_user_activity, 1]
- [propagate_service_template, 1]
- [background_migration, 1]
- [object_storage_upload, 1]
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddFileStoreToLfsObjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
# When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the
# migration requires downtime.
# DOWNTIME_REASON = ''
# When using the methods "add_concurrent_index", "remove_concurrent_index" or
# "add_column_with_default" you must disable the use of transactions
# as these methods can not run in an existing transaction.
# When using "add_concurrent_index" or "remove_concurrent_index" methods make sure
# that either of them is the _only_ method called in the migration,
# any other changes should go in a separate migration.
# This ensures that upon failure _only_ the index creation or removing fails
# and can be retried or reverted easily.
#
# To disable transactions uncomment the following line and remove these
# comments:
disable_ddl_transaction!
def up
add_column(:lfs_objects, :file_store, :integer)
end
def down
remove_column(:lfs_objects, :file_store)
end
end
Loading
Loading
@@ -741,6 +741,7 @@ ActiveRecord::Schema.define(version: 20170905112933) do
t.datetime "created_at"
t.datetime "updated_at"
t.string "file"
t.integer "file_store"
end
 
add_index "lfs_objects", ["oid"], name: "index_lfs_objects_on_oid", unique: true, using: :btree
Loading
Loading
Loading
Loading
@@ -3,7 +3,7 @@ require 'backup/files'
module Backup
class Artifacts < Files
def initialize
super('artifacts', ArtifactUploader.local_artifacts_store)
super('artifacts', ArtifactUploader.local_store_path)
end
 
def create_files_dir
Loading
Loading
Loading
Loading
@@ -2,10 +2,12 @@ desc "GitLab | Migrate files for artifacts to comply with new storage format"
namespace :gitlab do
namespace :artifacts do
task migrate: :environment do
puts 'Artifacts'.color(:yellow)
Ci::Build.joins(:project).with_artifacts
.where(artifacts_file_store: ArtifactUploader::LOCAL_STORE)
.find_each(batch_size: 100) do |issue|
logger = Logger.new(STDOUT)
logger.info('Starting transfer of artifacts')
Ci::Build.joins(:project)
.with_artifacts_stored_locally
.find_each(batch_size: 10) do |build|
begin
build.artifacts_file.migrate!(ArtifactUploader::REMOTE_STORE)
build.artifacts_metadata.migrate!(ArtifactUploader::REMOTE_STORE)
Loading
Loading
require 'logger'
desc "GitLab | Migrate LFS objects to remote storage"
namespace :gitlab do
namespace :lfs do
task migrate: :environment do
logger = Logger.new(STDOUT)
logger.info('Starting transfer of LFS files to object storage')
LfsObject.with_files_stored_locally
.find_each(batch_size: 10) do |lfs_object|
begin
lfs_object.file.migrate!(LfsObjectUploader::REMOTE_STORE)
logger.info("Transferred LFS object #{lfs_object.oid} of size #{lfs_object.size.to_i.bytes} to object storage")
rescue => e
logger.error("Failed to transfer LFS object #{lfs_object.oid} with error: #{e.message}")
end
end
end
end
end
Loading
Loading
@@ -22,7 +22,7 @@ describe Projects::ArtifactsController do
 
describe 'GET download' do
it 'sends the artifacts file' do
expect(controller).to receive(:send_file).with(job.artifacts_file.path, disposition: 'attachment').and_call_original
expect(controller).to receive(:send_file).with(job.artifacts_file.path, hash_including(disposition: 'attachment')).and_call_original
 
get :download, namespace_id: project.namespace, project_id: project, job_id: job
end
Loading
Loading
@@ -66,19 +66,52 @@ describe Projects::ArtifactsController do
 
describe 'GET raw' do
context 'when the file exists' do
it 'serves the file using workhorse' do
get :raw, namespace_id: project.namespace, project_id: project, job_id: job, path: 'ci_artifacts.txt'
let(:path) { 'ci_artifacts.txt' }
let(:job) { create(:ci_build, :success, :artifacts, pipeline: pipeline, artifacts_file_store: store, artifacts_metadata_store: store) }
 
send_data = response.headers[Gitlab::Workhorse::SEND_DATA_HEADER]
shared_examples 'a valid file' do
it 'serves the file using workhorse' do
subject
 
expect(send_data).to start_with('artifacts-entry:')
expect(send_data).to start_with('artifacts-entry:')
 
base64_params = send_data.sub(/\Aartifacts\-entry:/, '')
params = JSON.parse(Base64.urlsafe_decode64(base64_params))
expect(params.keys).to eq(%w(Archive Entry))
expect(params['Archive']).to start_with(archive_path)
# On object storage, the URL can end with a query string
expect(params['Archive']).to match(/build_artifacts.zip(\?[^?]+)?$/)
expect(params['Entry']).to eq(Base64.encode64('ci_artifacts.txt'))
end
def send_data
response.headers[Gitlab::Workhorse::SEND_DATA_HEADER]
end
 
expect(params.keys).to eq(%w(Archive Entry))
expect(params['Archive']).to end_with('build_artifacts.zip')
expect(params['Entry']).to eq(Base64.encode64('ci_artifacts.txt'))
def params
@params ||= begin
base64_params = send_data.sub(/\Aartifacts\-entry:/, '')
JSON.parse(Base64.urlsafe_decode64(base64_params))
end
end
end
context 'when using local file storage' do
it_behaves_like 'a valid file' do
let(:store) { ObjectStoreUploader::LOCAL_STORE }
let(:archive_path) { ArtifactUploader.local_store_path }
end
end
context 'when using remote file storage' do
before do
stub_artifacts_object_storage
end
it_behaves_like 'a valid file' do
let!(:artifact) { create(:ci_job_artifact, :archive, :remote_store, job: job) }
let!(:job) { create(:ci_build, :success, pipeline: pipeline) }
let(:store) { ObjectStorage::Store::REMOTE }
let(:archive_path) { 'https://' }
end
end
end
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment