Skip to content
Snippets Groups Projects
importer.rb 11.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • Valery Sizov's avatar
    Valery Sizov committed
    module Gitlab
    
      module GithubImport
    
    Valery Sizov's avatar
    Valery Sizov committed
        class Importer
    
          include Gitlab::ShellAdapter
    
    
          attr_reader :errors, :project, :repo, :repo_url
    
    Valery Sizov's avatar
    Valery Sizov committed
    
          def initialize(project)
    
            @project  = project
            @repo     = project.import_source
    
          end
    
          def client
            return @client if defined?(@client)
            unless credentials
              raise Projects::ImportService::Error,
                    "Unable to find project import data credentials for project ID: #{@project.id}"
            end
    
            opts = {}
            # Gitea plan to be GitHub compliant
    
            if project.gitea_import?
    
              uri = URI.parse(project.import_url)
              host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '')
              opts = {
                host: host,
                api_version: 'v1'
              }
    
    James Lopez's avatar
    James Lopez committed
            end
    
    
            @client = Client.new(credentials[:user], opts)
    
    Valery Sizov's avatar
    Valery Sizov committed
          end
    
          def execute
    
            # The ordering of importing is important here due to the way GitHub structures their data
            # 1. Labels are required by other items while not having a dependency on anything else
            # so need to be first
            # 2. Pull requests must come before issues. Every pull request is also an issue but not
            # all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab
            # doesn't structure data like this so we need to make sure that we've created the MRs
            # before we attempt to add the labels defined in the GitHub issue for the related, already
            # imported, pull request
    
            import_labels
            import_milestones
            import_pull_requests
    
            import_comments(:issues)
            import_comments(:pull_requests)
    
    
            # Gitea doesn't have a Release API yet
            # See https://github.com/go-gitea/gitea/issues/330
    
            unless project.gitea_import?
    
              import_releases
            end
    
    
            return @credentials if defined?(@credentials)
    
            @credentials = project.import_data ? project.import_data.credentials : nil
    
    James Lopez's avatar
    James Lopez committed
          end
    
    
            return unless errors.any?
    
            project.update_column(:import_error, {
              message: 'The remote data could not be fully imported.',
              errors: errors
            }.to_json)
    
          def import_labels
    
            fetch_resources(:labels, repo, per_page: 100) do |labels|
    
                  gh_label = LabelFormatter.new(project, raw)
                  gh_label.create!
    
                  errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message }
    
            cache_labels!
    
          def import_milestones
    
            fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
    
                  gh_milestone = MilestoneFormatter.new(project, raw)
                  gh_milestone.create!
    
                  errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message }
    
            fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
    
                gh_issue = IssueFormatter.new(project, raw, client)
    
                begin
                  issuable =
                    if gh_issue.pull_request?
    
                      MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number)
    
                    else
                      gh_issue.create!
                    end
    
                  apply_labels(issuable, raw)
                rescue => e
    
                  errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message }
    
    Valery Sizov's avatar
    Valery Sizov committed
              end
            end
          end
    
    
          def import_pull_requests
    
            fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
    
                gh_pull_request = PullRequestFormatter.new(project, raw, client)
    
    
                next unless gh_pull_request.valid?
    
                  restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists?
                  restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists?
    
                  merge_request = gh_pull_request.create!
    
                  # Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage
    
                  if project.gitea_import?
    
                    apply_labels(merge_request, raw)
                  end
    
                  errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message }
    
                  clean_up_restored_branches(gh_pull_request)
    
    
            project.repository.after_remove_branch
    
          def restore_source_branch(pull_request)
            project.repository.fetch_ref(repo_url, "pull/#{pull_request.number}/head", pull_request.source_branch_name)
    
          def restore_target_branch(pull_request)
            project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
    
          def remove_branch(name)
            project.repository.delete_branch(name)
          rescue Rugged::ReferenceError
    
            errors << { type: :remove_branch, name: name }
    
          end
    
          def clean_up_restored_branches(pull_request)
            remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
            remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
    
          def apply_labels(issuable, raw)
            return unless raw.labels.count > 0
    
    Douwe Maan's avatar
    Douwe Maan committed
            label_ids = raw.labels.
              map { |attrs| @labels[attrs.name] }.
              compact
    
            issuable.update_attribute(:label_ids, label_ids)
    
          def import_comments(issuable_type)
            resource_type = "#{issuable_type}_comments".to_sym
    
            # Two notes here:
            # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
            # compare it against every comment in the current imported page until we find match, and that's where start importing
            # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
            # only comments on diffs, so select last note not based on noteable_type but on line_code
            line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
            last_note    = project.notes.where("line_code IS #{line_code_is}").last
    
            fetch_resources(resource_type, repo, per_page: 100) do |comments|
    
              if last_note
                discard_inserted_comments(comments, last_note)
                last_note = nil
              end
    
              create_comments(comments)
    
    Valery Sizov's avatar
    Valery Sizov committed
    
    
          def create_comments(comments)
    
            ActiveRecord::Base.no_touching do
              comments.each do |raw|
                begin
    
                  comment = CommentFormatter.new(project, raw, client)
    
    
                  # GH does not return info about comment's parent, so we guess it by checking its URL!
                  *_, parent, iid = URI(raw.html_url).path.split('/')
    
    
                  issuable = if parent == 'issues'
                               Issue.find_by(project_id: project.id, iid: iid)
                             else
                               MergeRequest.find_by(target_project_id: project.id, iid: iid)
                             end
    
                  issuable.notes.create!(comment.attributes)
                rescue => e
                  errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
                end
    
    Valery Sizov's avatar
    Valery Sizov committed
          end
    
          def discard_inserted_comments(comments, last_note)
            last_note_attrs = nil
    
            cut_off_index = comments.find_index do |raw|
    
              comment           = CommentFormatter.new(project, raw)
    
              comment_attrs     = comment.attributes
              last_note_attrs ||= last_note.slice(*comment_attrs.keys)
    
              comment_attrs.with_indifferent_access == last_note_attrs
            end
    
            # No matching resource in the collection, which means we got halted right on the end of the last page, so all good
            return unless cut_off_index
    
    
    Ahmad Sherif's avatar
    Ahmad Sherif committed
            # Otherwise, remove the resources we've already inserted
    
            comments.shift(cut_off_index + 1)
          end
    
    
            unless project.wiki.repository_exists?
    
              wiki = WikiFormatter.new(project)
    
              gitlab_shell.import_repository(project.repository_storage_path, wiki.path_with_namespace, wiki.import_url)
    
            # GitHub error message when the wiki repo has not been created,
            # this means that repo has wiki enabled, but have no pages. So,
            # we can skip the import.
            if e.message !~ /repository not exported/
    
              errors << { type: :wiki, errors: e.message }
    
    
          def import_releases
    
            fetch_resources(:releases, repo, per_page: 100) do |releases|
    
                  gh_release = ReleaseFormatter.new(project, raw)
    
                  gh_release.create! if gh_release.valid?
                rescue => e
    
                  errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message }
    
          def cache_labels!
    
            project.labels.select(:id, :title).find_each do |label|
              @labels[label.title] = label.id
            end
          end
    
    
          def fetch_resources(resource_type, *opts)
            return if imported?(resource_type)
    
    
            opts.last[:page] = current_page(resource_type)
    
            client.public_send(resource_type, *opts) do |resources|
              yield resources
              increment_page(resource_type)
    
          end
    
          def imported?(resource_type)
            Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
          end
    
          def imported!(resource_type)
            Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
          end
    
          def increment_page(resource_type)
            key = "#{cache_key_prefix}:#{resource_type}:current-page"
    
            # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
            # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
            page = Rails.cache.read(key)
            page += 1
            Rails.cache.write(key, page)
    
            page
          end
    
          def current_page(resource_type)
            Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
          end
    
          def cache_key_prefix
            @cache_key_prefix ||= "github-import:#{project.id}"
    
    Valery Sizov's avatar
    Valery Sizov committed
        end
      end
    end