Newer
Older
attr_reader :errors, :project, :repo, :repo_url
@project = project
@repo = project.import_source
Douglas Barbosa Alexandre
committed
@repo_url = project.import_url
@errors = []
end
def client
return @client if defined?(@client)
unless credentials
raise Projects::ImportService::Error,
"Unable to find project import data credentials for project ID: #{@project.id}"
end
Douglas Barbosa Alexandre
committed
opts = {}
# Gitea plan to be GitHub compliant
uri = URI.parse(project.import_url)
host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '')
opts = {
host: host,
api_version: 'v1'
}
@client = Client.new(credentials[:user], opts)
# The ordering of importing is important here due to the way GitHub structures their data
# 1. Labels are required by other items while not having a dependency on anything else
# so need to be first
# 2. Pull requests must come before issues. Every pull request is also an issue but not
# all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab
# doesn't structure data like this so we need to make sure that we've created the MRs
# before we attempt to add the labels defined in the GitHub issue for the related, already
# imported, pull request
Ahmad Sherif
committed
import_labels
import_milestones
import_pull_requests
Ahmad Sherif
committed
import_comments(:issues)
import_comments(:pull_requests)
import_wiki
# Gitea doesn't have a Release API yet
# See https://github.com/go-gitea/gitea/issues/330
handle_errors
true
Douglas Barbosa Alexandre
committed
def credentials
return @credentials if defined?(@credentials)
@credentials = project.import_data ? project.import_data.credentials : nil
def handle_errors
Douglas Barbosa Alexandre
committed
return unless errors.any?
project.update_column(:import_error, {
message: 'The remote data could not be fully imported.',
errors: errors
}.to_json)
end
Ahmad Sherif
committed
fetch_resources(:labels, repo, per_page: 100) do |labels|
Ahmad Sherif
committed
labels.each do |raw|
begin
gh_label = LabelFormatter.new(project, raw)
gh_label.create!
Ahmad Sherif
committed
rescue => e
errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message }
Ahmad Sherif
committed
end
end
end
Ahmad Sherif
committed
fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
Ahmad Sherif
committed
milestones.each do |raw|
begin
gh_milestone = MilestoneFormatter.new(project, raw)
gh_milestone.create!
Ahmad Sherif
committed
rescue => e
errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message }
Ahmad Sherif
committed
end
end
end
Ahmad Sherif
committed
fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
Ahmad Sherif
committed
issues.each do |raw|
gh_issue = IssueFormatter.new(project, raw, client)
Ahmad Sherif
committed
begin
issuable =
if gh_issue.pull_request?
MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number)
else
gh_issue.create!
end
apply_labels(issuable, raw)
rescue => e
errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message }
end
def import_pull_requests
Ahmad Sherif
committed
fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
Ahmad Sherif
committed
pull_requests.each do |raw|
gh_pull_request = PullRequestFormatter.new(project, raw, client)
Ahmad Sherif
committed
begin
restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists?
restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists?
merge_request = gh_pull_request.create!
Ahmad Sherif
committed
# Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage
apply_labels(merge_request, raw)
end
Ahmad Sherif
committed
rescue => e
errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message }
Ahmad Sherif
committed
ensure
clean_up_restored_branches(gh_pull_request)
Ahmad Sherif
committed
end
Douglas Barbosa Alexandre
committed
end
project.repository.after_remove_branch
Douglas Barbosa Alexandre
committed
def restore_source_branch(pull_request)
project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name)
Douglas Barbosa Alexandre
committed
end
Douglas Barbosa Alexandre
committed
Douglas Barbosa Alexandre
committed
def restore_target_branch(pull_request)
project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
Douglas Barbosa Alexandre
committed
end
Douglas Barbosa Alexandre
committed
def remove_branch(name)
project.repository.delete_branch(name)
rescue Rugged::ReferenceError
errors << { type: :remove_branch, name: name }
Douglas Barbosa Alexandre
committed
end
def clean_up_restored_branches(pull_request)
remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
Douglas Barbosa Alexandre
committed
end
def apply_labels(issuable, raw)
return unless raw.labels.count > 0
label_ids = raw.labels.
map { |attrs| @labels[attrs.name] }.
compact
issuable.update_attribute(:label_ids, label_ids)
end
Ahmad Sherif
committed
def import_comments(issuable_type)
resource_type = "#{issuable_type}_comments".to_sym
Ahmad Sherif
committed
# Two notes here:
# 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
# compare it against every comment in the current imported page until we find match, and that's where start importing
# 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
# only comments on diffs, so select last note not based on noteable_type but on line_code
line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
last_note = project.notes.where("line_code IS #{line_code_is}").last
Ahmad Sherif
committed
fetch_resources(resource_type, repo, per_page: 100) do |comments|
if last_note
discard_inserted_comments(comments, last_note)
last_note = nil
end
Ahmad Sherif
committed
end
ActiveRecord::Base.no_touching do
comments.each do |raw|
begin
comment = CommentFormatter.new(project, raw, client)
# GH does not return info about comment's parent, so we guess it by checking its URL!
*_, parent, iid = URI(raw.html_url).path.split('/')
issuable = if parent == 'issues'
Issue.find_by(project_id: project.id, iid: iid)
else
MergeRequest.find_by(target_project_id: project.id, iid: iid)
end
next unless issuable
issuable.notes.create!(comment.attributes)
rescue => e
errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
end
end
def discard_inserted_comments(comments, last_note)
last_note_attrs = nil
cut_off_index = comments.find_index do |raw|
comment = CommentFormatter.new(project, raw)
comment_attrs = comment.attributes
last_note_attrs ||= last_note.slice(*comment_attrs.keys)
comment_attrs.with_indifferent_access == last_note_attrs
end
# No matching resource in the collection, which means we got halted right on the end of the last page, so all good
return unless cut_off_index
comments.shift(cut_off_index + 1)
end
unless project.wiki.repository_exists?
gitlab_shell.import_repository(project.repository_storage_path, wiki.path_with_namespace, wiki.import_url)
Douglas Barbosa Alexandre
committed
rescue Gitlab::Shell::Error => e
Douglas Barbosa Alexandre
committed
# GitHub error message when the wiki repo has not been created,
# this means that repo has wiki enabled, but have no pages. So,
# we can skip the import.
if e.message !~ /repository not exported/
errors << { type: :wiki, errors: e.message }
Douglas Barbosa Alexandre
committed
end
Ahmad Sherif
committed
fetch_resources(:releases, repo, per_page: 100) do |releases|
Ahmad Sherif
committed
releases.each do |raw|
begin
gh_release = ReleaseFormatter.new(project, raw)
Ahmad Sherif
committed
gh_release.create! if gh_release.valid?
rescue => e
errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message }
Ahmad Sherif
committed
end
Ahmad Sherif
committed
end
end
project.labels.select(:id, :title).find_each do |label|
@labels[label.title] = label.id
end
end
Ahmad Sherif
committed
def fetch_resources(resource_type, *opts)
return if imported?(resource_type)
opts.last[:page] = current_page(resource_type)
Ahmad Sherif
committed
client.public_send(resource_type, *opts) do |resources|
yield resources
increment_page(resource_type)
Ahmad Sherif
committed
imported!(resource_type)
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
end
def imported?(resource_type)
Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
end
def imported!(resource_type)
Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
end
def increment_page(resource_type)
key = "#{cache_key_prefix}:#{resource_type}:current-page"
# Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
# a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
page = Rails.cache.read(key)
page += 1
Rails.cache.write(key, page)
page
end
def current_page(resource_type)
Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
end
def cache_key_prefix
@cache_key_prefix ||= "github-import:#{project.id}"