Skip to content
Snippets Groups Projects
Commit e8f49b4b authored by Francisco Javier López's avatar Francisco Javier López Committed by Douwe Maan
Browse files

Support LFS objects when creating a project by import

parent 0dd7563b
No related branches found
No related tags found
No related merge requests found
Showing
with 496 additions and 8 deletions
Loading
Loading
@@ -956,6 +956,10 @@ class Repository
blob_data_at(sha, path)
end
 
def lfsconfig_for(sha)
blob_data_at(sha, '.lfsconfig')
end
def fetch_ref(source_repository, source_ref:, target_ref:)
raw_repository.fetch_ref(source_repository.raw_repository, source_ref: source_ref, target_ref: target_ref)
end
Loading
Loading
Loading
Loading
@@ -3,7 +3,7 @@ class BaseService
 
attr_accessor :project, :current_user, :params
 
def initialize(project, user, params = {})
def initialize(project, user = nil, params = {})
@project, @current_user, @params = project, user, params.dup
end
 
Loading
Loading
Loading
Loading
@@ -17,6 +17,8 @@ module Projects
def execute
add_repository_to_project
 
download_lfs_objects
import_data
 
success
Loading
Loading
@@ -37,7 +39,7 @@ module Projects
 
# We should skip the repository for a GitHub import or GitLab project import,
# because these importers fetch the project repositories for us.
return if has_importer? && importer_class.try(:imports_repository?)
return if importer_imports_repository?
 
if unknown_url?
# In this case, we only want to import issues, not a repository.
Loading
Loading
@@ -73,6 +75,27 @@ module Projects
end
end
 
def download_lfs_objects
# In this case, we only want to import issues
return if unknown_url?
# If it has its own repository importer, it has to implements its own lfs import download
return if importer_imports_repository?
return unless project.lfs_enabled?
oids_to_download = Projects::LfsPointers::LfsImportService.new(project).execute
download_service = Projects::LfsPointers::LfsDownloadService.new(project)
oids_to_download.each do |oid, link|
download_service.execute(oid, link)
end
rescue => e
# Right now, to avoid aborting the importing process, we silently fail
# if any exception raises.
Rails.logger.error("The Lfs import process failed. #{e.message}")
end
def import_data
return unless has_importer?
 
Loading
Loading
@@ -98,5 +121,9 @@ module Projects
def unknown_url?
project.import_url == Project::UNKNOWN_IMPORT_URL
end
def importer_imports_repository?
has_importer? && importer_class.try(:imports_repository?)
end
end
end
# This service lists the download link from a remote source based on the
# oids provided
module Projects
module LfsPointers
class LfsDownloadLinkListService < BaseService
DOWNLOAD_ACTION = 'download'.freeze
DownloadLinksError = Class.new(StandardError)
DownloadLinkNotFound = Class.new(StandardError)
attr_reader :remote_uri
def initialize(project, remote_uri: nil)
super(project)
@remote_uri = remote_uri
end
# This method accepts two parameters:
# - oids: hash of oids to query. The structure is { lfs_file_oid => lfs_file_size }
#
# Returns a hash with the structure { lfs_file_oids => download_link }
def execute(oids)
return {} unless project&.lfs_enabled? && remote_uri && oids.present?
get_download_links(oids)
end
private
def get_download_links(oids)
response = Gitlab::HTTP.post(remote_uri,
body: request_body(oids),
headers: headers)
raise DownloadLinksError, response.message unless response.success?
parse_response_links(response['objects'])
end
def parse_response_links(objects_response)
objects_response.each_with_object({}) do |entry, link_list|
begin
oid = entry['oid']
link = entry.dig('actions', DOWNLOAD_ACTION, 'href')
raise DownloadLinkNotFound unless link
link_list[oid] = add_credentials(link)
rescue DownloadLinkNotFound, URI::InvalidURIError
Rails.logger.error("Link for Lfs Object with oid #{oid} not found or invalid.")
end
end
end
def request_body(oids)
{
operation: DOWNLOAD_ACTION,
objects: oids.map { |oid, size| { oid: oid, size: size } }
}.to_json
end
def headers
{
'Accept' => LfsRequest::CONTENT_TYPE,
'Content-Type' => LfsRequest::CONTENT_TYPE
}.freeze
end
def add_credentials(link)
uri = URI.parse(link)
if should_add_credentials?(uri)
uri.user = remote_uri.user
uri.password = remote_uri.password
end
uri.to_s
end
# The download link can be a local url or an object storage url
# If the download link has the some host as the import url then
# we add the same credentials because we may need them
def should_add_credentials?(link_uri)
url_credentials? && link_uri.host == remote_uri.host
end
def url_credentials?
remote_uri.user.present? || remote_uri.password.present?
end
end
end
end
# This service downloads and links lfs objects from a remote URL
module Projects
module LfsPointers
class LfsDownloadService < BaseService
def execute(oid, url)
return unless project&.lfs_enabled? && oid.present? && url.present?
return if LfsObject.exists?(oid: oid)
sanitized_uri = Gitlab::UrlSanitizer.new(url)
with_tmp_file(oid) do |file|
size = download_and_save_file(file, sanitized_uri)
lfs_object = LfsObject.new(oid: oid, size: size, file: file)
project.all_lfs_objects << lfs_object
end
rescue StandardError => e
Rails.logger.error("LFS file with oid #{oid} could't be downloaded from #{sanitized_uri.sanitized_url}: #{e.message}")
end
private
def download_and_save_file(file, sanitized_uri)
IO.copy_stream(open(sanitized_uri.sanitized_url, headers(sanitized_uri)), file)
end
def headers(sanitized_uri)
{}.tap do |headers|
credentials = sanitized_uri.credentials
if credentials[:user].present? || credentials[:password].present?
# Using authentication headers in the request
headers[:http_basic_authentication] = [credentials[:user], credentials[:password]]
end
end
end
def with_tmp_file(oid)
create_tmp_storage_dir
File.open(File.join(tmp_storage_dir, oid), 'w') { |file| yield file }
end
def create_tmp_storage_dir
FileUtils.makedirs(tmp_storage_dir) unless Dir.exist?(tmp_storage_dir)
end
def tmp_storage_dir
@tmp_storage_dir ||= File.join(storage_dir, 'tmp', 'download')
end
def storage_dir
@storage_dir ||= Gitlab.config.lfs.storage_path
end
end
end
end
# This service manages the whole worflow of discovering the Lfs files in a
# repository, linking them to the project and downloading (and linking) the non
# existent ones.
module Projects
module LfsPointers
class LfsImportService < BaseService
include Gitlab::Utils::StrongMemoize
HEAD_REV = 'HEAD'.freeze
LFS_ENDPOINT_PATTERN = /^\t?url\s*=\s*(.+)$/.freeze
LFS_BATCH_API_ENDPOINT = '/info/lfs/objects/batch'.freeze
LfsImportError = Class.new(StandardError)
def execute
return {} unless project&.lfs_enabled?
if external_lfs_endpoint?
# If the endpoint host is different from the import_url it means
# that the repo is using a third party service for storing the LFS files.
# In this case, we have to disable lfs in the project
disable_lfs!
return {}
end
get_download_links
rescue LfsDownloadLinkListService::DownloadLinksError => e
raise LfsImportError, "The LFS objects download list couldn't be imported. Error: #{e.message}"
end
private
def external_lfs_endpoint?
lfsconfig_endpoint_uri && lfsconfig_endpoint_uri.host != import_uri.host
end
def disable_lfs!
project.update(lfs_enabled: false)
end
def get_download_links
existent_lfs = LfsListService.new(project).execute
linked_oids = LfsLinkService.new(project).execute(existent_lfs.keys)
# Retrieving those oids not linked and which we need to download
not_linked_lfs = existent_lfs.except(*linked_oids)
LfsDownloadLinkListService.new(project, remote_uri: current_endpoint_uri).execute(not_linked_lfs)
end
def lfsconfig_endpoint_uri
strong_memoize(:lfsconfig_endpoint_uri) do
# Retrieveing the blob data from the .lfsconfig file
data = project.repository.lfsconfig_for(HEAD_REV)
# Parsing the data to retrieve the url
parsed_data = data&.match(LFS_ENDPOINT_PATTERN)
if parsed_data
URI.parse(parsed_data[1]).tap do |endpoint|
endpoint.user ||= import_uri.user
endpoint.password ||= import_uri.password
end
end
end
rescue URI::InvalidURIError
raise LfsImportError, 'Invalid URL in .lfsconfig file'
end
def import_uri
@import_uri ||= URI.parse(project.import_url)
rescue URI::InvalidURIError
raise LfsImportError, 'Invalid project import URL'
end
def current_endpoint_uri
(lfsconfig_endpoint_uri || default_endpoint_uri)
end
# The import url must end with '.git' here we ensure it is
def default_endpoint_uri
@default_endpoint_uri ||= begin
import_uri.dup.tap do |uri|
path = uri.path.gsub(%r(/$), '')
path += '.git' unless path.ends_with?('.git')
uri.path = path + LFS_BATCH_API_ENDPOINT
end
end
end
end
end
end
# Given a list of oids, this services links the existent Lfs Objects to the project
module Projects
module LfsPointers
class LfsLinkService < BaseService
# Accept an array of oids to link
#
# Returns a hash with the same structure with oids linked
def execute(oids)
return {} unless project&.lfs_enabled?
# Search and link existing LFS Object
link_existing_lfs_objects(oids)
end
private
def link_existing_lfs_objects(oids)
existent_lfs_objects = LfsObject.where(oid: oids)
return [] unless existent_lfs_objects.any?
not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
project.all_lfs_objects << not_linked_lfs_objects
existent_lfs_objects.pluck(:oid)
end
end
end
end
# This service list all existent Lfs objects in a repository
module Projects
module LfsPointers
class LfsListService < BaseService
REV = 'HEAD'.freeze
# Retrieve all lfs blob pointers and returns a hash
# with the structure { lfs_file_oid => lfs_file_size }
def execute
return {} unless project&.lfs_enabled?
Gitlab::Git::LfsChanges.new(project.repository, REV)
.all_pointers
.map! { |blob| [blob.lfs_oid, blob.lfs_size] }
.to_h
end
end
end
end
Loading
Loading
@@ -31,12 +31,14 @@
- github_importer:github_import_import_diff_note
- github_importer:github_import_import_issue
- github_importer:github_import_import_note
- github_importer:github_import_import_lfs_object
- github_importer:github_import_import_pull_request
- github_importer:github_import_refresh_import_jid
- github_importer:github_import_stage_finish_import
- github_importer:github_import_stage_import_base_data
- github_importer:github_import_stage_import_issues_and_diff_notes
- github_importer:github_import_stage_import_notes
- github_importer:github_import_stage_import_lfs_objects
- github_importer:github_import_stage_import_pull_requests
- github_importer:github_import_stage_import_repository
 
Loading
Loading
Loading
Loading
@@ -21,6 +21,7 @@ module Gitlab
STAGES = {
issues_and_diff_notes: Stage::ImportIssuesAndDiffNotesWorker,
notes: Stage::ImportNotesWorker,
lfs_objects: Stage::ImportLfsObjectsWorker,
finish: Stage::FinishImportWorker
}.freeze
 
Loading
Loading
# frozen_string_literal: true
module Gitlab
module GithubImport
class ImportLfsObjectWorker
include ObjectImporter
def representation_class
Representation::LfsObject
end
def importer_class
Importer::LfsObjectImporter
end
def counter_name
:github_importer_imported_lfs_objects
end
def counter_description
'The number of imported GitHub Lfs Objects'
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Stage
class ImportLfsObjectsWorker
include ApplicationWorker
include GithubImport::Queue
include StageMethods
def perform(project_id)
return unless (project = find_project(project_id))
import(project)
end
# project - An instance of Project.
def import(project)
waiter = Importer::LfsObjectsImporter
.new(project, nil)
.execute
AdvanceStageWorker.perform_async(
project.id,
{ waiter.key => waiter.jobs_remaining },
:finish
)
end
end
end
end
end
Loading
Loading
@@ -18,7 +18,7 @@ module Gitlab
AdvanceStageWorker.perform_async(
project.id,
{ waiter.key => waiter.jobs_remaining },
:finish
:lfs_objects
)
end
end
Loading
Loading
---
title: Added support for LFS Download in the importing process
merge_request: 18871
author:
type: fixed
Loading
Loading
@@ -1543,7 +1543,7 @@ module Gitlab
end
end
 
def rev_list(including: [], excluding: [], objects: false, &block)
def rev_list(including: [], excluding: [], options: [], objects: false, &block)
args = ['rev-list']
 
args.push(*rev_list_param(including))
Loading
Loading
@@ -1556,6 +1556,10 @@ module Gitlab
 
args.push('--objects') if objects
 
if options.any?
args.push(*options)
end
run_git!(args, lazy_block: block)
end
 
Loading
Loading
Loading
Loading
@@ -38,7 +38,10 @@ module Gitlab
end
 
def all_objects(require_path: nil, &lazy_block)
get_objects(including: :all, require_path: require_path, &lazy_block)
get_objects(including: :all,
options: ["--filter=blob:limit=#{Gitlab::Git::Blob::LFS_POINTER_MAX_SIZE}"],
require_path: require_path,
&lazy_block)
end
 
# This methods returns an array of missed references
Loading
Loading
@@ -54,8 +57,8 @@ module Gitlab
repository.rev_list(args).split("\n")
end
 
def get_objects(including: [], excluding: [], require_path: nil)
opts = { including: including, excluding: excluding, objects: true }
def get_objects(including: [], excluding: [], options: [], require_path: nil)
opts = { including: including, excluding: excluding, options: options, objects: true }
 
repository.rev_list(opts) do |lazy_output|
objects = objects_from_output(lazy_output, require_path: require_path)
Loading
Loading
# frozen_string_literal: true
module Gitlab
module GithubImport
module Importer
class LfsObjectImporter
attr_reader :lfs_object, :project
# lfs_object - An instance of `Gitlab::GithubImport::Representation::LfsObject`.
# project - An instance of `Project`.
def initialize(lfs_object, project, _)
@lfs_object = lfs_object
@project = project
end
def execute
Projects::LfsPointers::LfsDownloadService
.new(project)
.execute(lfs_object.oid, lfs_object.download_link)
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Importer
class LfsObjectsImporter
include ParallelScheduling
def importer_class
LfsObjectImporter
end
def representation_class
Representation::LfsObject
end
def sidekiq_worker_class
ImportLfsObjectWorker
end
def collection_method
:lfs_objects
end
def each_object_to_import
lfs_objects = Projects::LfsPointers::LfsImportService.new(project).execute
lfs_objects.each do |object|
yield object
end
rescue StandardError => e
Rails.logger.error("The Lfs import process failed. #{e.message}")
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module GithubImport
module Representation
class LfsObject
include ToHash
include ExposeAttribute
attr_reader :attributes
expose_attribute :oid, :download_link
# Builds a lfs_object
def self.from_api_response(lfs_object)
new({ oid: lfs_object[0], download_link: lfs_object[1] })
end
# Builds a new lfs_object using a Hash that was built from a JSON payload.
def self.from_json_hash(raw_hash)
new(Representation.symbolize_hash(raw_hash))
end
# attributes - A Hash containing the raw lfs_object details. The keys of this
# Hash must be Symbols.
def initialize(attributes)
@attributes = attributes
end
end
end
end
end
Loading
Loading
@@ -19,7 +19,8 @@ module Gitlab
Importer::PullRequestsImporter,
Importer::IssuesImporter,
Importer::DiffNotesImporter,
Importer::NotesImporter
Importer::NotesImporter,
Importer::LfsObjectsImporter
].freeze
 
# project - The project to import the data into.
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment