From 14fbd25d06248ee268434278e0fe31fec262d23b Mon Sep 17 00:00:00 2001
From: Ahmad Sherif <me@ahmadsherif.com>
Date: Wed, 19 Oct 2016 20:42:31 +0200
Subject: [PATCH] Modify GitHub importer to be retryable

---
 CHANGELOG.md                                  |   1 +
 lib/gitlab/github_import/base_formatter.rb    |   4 +-
 lib/gitlab/github_import/importer.rb          | 115 +++++++++++++++---
 lib/gitlab/github_import/issue_formatter.rb   |   8 +-
 lib/gitlab/github_import/label_formatter.rb   |   4 +-
 .../github_import/milestone_formatter.rb      |   8 +-
 .../github_import/pull_request_formatter.rb   |   8 +-
 lib/gitlab/github_import/release_formatter.rb |   8 +-
 .../lib/gitlab/github_import/importer_spec.rb |   7 +-
 9 files changed, 136 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 56d9d4e2809..ba35d57afb6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Please view this file on the master branch, on stable branches it's out of date.
   - Fix documents and comments on Build API `scope`
   - Fix applying labels for GitHub-imported MRs
   - Fix importing MR comments from GitHub
+  - Modify GitHub importer to be retryable
   - Refactor email, use setter method instead AR callbacks for email attribute (Semyon Pupkov)
   - Shortened merge request modal to let clipboard button not overlap
 
diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/github_import/base_formatter.rb
index 8cacf4f4925..d07bb43044b 100644
--- a/lib/gitlab/github_import/base_formatter.rb
+++ b/lib/gitlab/github_import/base_formatter.rb
@@ -10,7 +10,9 @@ module Gitlab
       end
 
       def create!
-        self.klass.create!(self.attributes)
+        project.send(project_association).find_or_create_by!(find_condition) do |record|
+          record.attributes = attributes
+        end
       end
 
       private
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index 27946dff608..7e13f731002 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -20,13 +20,13 @@ module Gitlab
       end
 
       def execute
-        import_labels
-        import_milestones
-        import_issues
-        import_pull_requests
+        import_labels        unless imported?(:labels)
+        import_milestones    unless imported?(:milestones)
+        import_issues        unless imported?(:issues)
+        import_pull_requests unless imported?(:pull_requests)
         import_comments
         import_wiki
-        import_releases
+        import_releases      unless imported?(:releases)
         handle_errors
 
         true
@@ -48,7 +48,7 @@ module Gitlab
       end
 
       def import_labels
-        client.labels(repo, per_page: 100) do |labels|
+        client.labels(repo, page: current_page(:labels), per_page: 100) do |labels|
           labels.each do |raw|
             begin
               label = LabelFormatter.new(project, raw).create!
@@ -57,11 +57,15 @@ module Gitlab
               errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
             end
           end
+
+          increment_page(:labels)
         end
+
+        imported!(:labels)
       end
 
       def import_milestones
-        client.milestones(repo, state: :all, per_page: 100) do |milestones|
+        client.milestones(repo, state: :all, page: current_page(:milestones), per_page: 100) do |milestones|
           milestones.each do |raw|
             begin
               MilestoneFormatter.new(project, raw).create!
@@ -69,11 +73,15 @@ module Gitlab
               errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
             end
           end
+
+          increment_page(:milestones)
         end
+
+        imported!(:milestones)
       end
 
       def import_issues
-        client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
+        client.issues(repo, state: :all, sort: :created, direction: :asc, page: current_page(:issues), per_page: 100) do |issues|
           issues.each do |raw|
             gh_issue = IssueFormatter.new(project, raw)
 
@@ -86,11 +94,15 @@ module Gitlab
               end
             end
           end
+
+          increment_page(:issues)
         end
+
+        imported!(:issues)
       end
 
       def import_pull_requests
-        client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
+        client.pull_requests(repo, state: :all, sort: :created, direction: :asc, page: current_page(:pull_requests), per_page: 100) do |pull_requests|
           pull_requests.each do |raw|
             pull_request = PullRequestFormatter.new(project, raw)
             next unless pull_request.valid?
@@ -107,9 +119,13 @@ module Gitlab
               clean_up_restored_branches(pull_request)
             end
           end
+
+          increment_page(:pull_requests)
         end
 
         project.repository.after_remove_branch
+
+        imported!(:pull_requests)
       end
 
       def restore_source_branch(pull_request)
@@ -149,13 +165,34 @@ module Gitlab
       end
 
       def import_comments
-        client.issues_comments(repo, per_page: 100) do |comments|
+        # We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
+        # compare it against every comment in the current imported page until we find match, and that's where start importing
+        last_note = Note.where(noteable_type: 'Issue').last
+
+        client.issues_comments(repo, page: current_page(:issue_comments), per_page: 100) do |comments|
+          if last_note
+            discard_inserted_comments(comments, last_note)
+            last_note = nil
+          end
+
           create_comments(comments)
-        end
+          increment_page(:issue_comments)
+        end unless imported?(:issue_comments)
+
+        imported!(:issue_comments)
+
+        last_note = Note.where(noteable_type: 'MergeRequest').last
+        client.pull_requests_comments(repo, page: current_page(:pull_request_comments), per_page: 100) do |comments|
+          if last_note
+            discard_inserted_comments(comments, last_note)
+            last_note = nil
+          end
 
-        client.pull_requests_comments(repo, per_page: 100) do |comments|
           create_comments(comments)
-        end
+          increment_page(:pull_request_comments)
+        end unless imported?(:pull_request_comments)
+
+        imported!(:pull_request_comments)
       end
 
       def create_comments(comments)
@@ -177,6 +214,24 @@ module Gitlab
         end
       end
 
+      def discard_inserted_comments(comments, last_note)
+        last_note_attrs = nil
+
+        cut_off_index = comments.find_index do |raw|
+          comment           = CommentFormatter.new(project, raw)
+          comment_attrs     = comment.attributes
+          last_note_attrs ||= last_note.slice(*comment_attrs.keys)
+
+          comment_attrs.with_indifferent_access == last_note_attrs
+        end
+
+        # No matching resource in the collection, which means we got halted right on the end of the last page, so all good
+        return unless cut_off_index
+
+        # Otherwise, remove the resouces we've already inserted
+        comments.shift(cut_off_index + 1)
+      end
+
       def import_wiki
         unless project.wiki.repository_exists?
           wiki = WikiFormatter.new(project)
@@ -192,7 +247,7 @@ module Gitlab
       end
 
       def import_releases
-        client.releases(repo, per_page: 100) do |releases|
+        client.releases(repo, page: current_page(:releases), per_page: 100) do |releases|
           releases.each do |raw|
             begin
               gh_release = ReleaseFormatter.new(project, raw)
@@ -201,7 +256,39 @@ module Gitlab
               errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
             end
           end
+
+          increment_page(:releases)
         end
+
+        imported!(:releases)
+      end
+
+      def imported?(resource_type)
+        Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
+      end
+
+      def imported!(resource_type)
+        Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
+      end
+
+      def increment_page(resource_type)
+        key = "#{cache_key_prefix}:#{resource_type}:current-page"
+
+        # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
+        # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
+        page = Rails.cache.read(key)
+        page += 1
+        Rails.cache.write(key, page)
+
+        page
+      end
+
+      def current_page(resource_type)
+        Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
+      end
+
+      def cache_key_prefix
+        @cache_key_prefix ||= "github-import:#{project.id}"
       end
     end
   end
diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/github_import/issue_formatter.rb
index 77621de9f4c..8c32ac59fc5 100644
--- a/lib/gitlab/github_import/issue_formatter.rb
+++ b/lib/gitlab/github_import/issue_formatter.rb
@@ -20,8 +20,12 @@ module Gitlab
         raw_data.comments > 0
       end
 
-      def klass
-        Issue
+      def project_association
+        :issues
+      end
+
+      def find_condition
+        { iid: number }
       end
 
       def number
diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/github_import/label_formatter.rb
index 942dfb3312b..002494739e9 100644
--- a/lib/gitlab/github_import/label_formatter.rb
+++ b/lib/gitlab/github_import/label_formatter.rb
@@ -9,8 +9,8 @@ module Gitlab
         }
       end
 
-      def klass
-        Label
+      def project_association
+        :labels
       end
 
       def create!
diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/github_import/milestone_formatter.rb
index b2fa524cf5b..401dd962521 100644
--- a/lib/gitlab/github_import/milestone_formatter.rb
+++ b/lib/gitlab/github_import/milestone_formatter.rb
@@ -14,8 +14,12 @@ module Gitlab
         }
       end
 
-      def klass
-        Milestone
+      def project_association
+        :milestones
+      end
+
+      def find_condition
+        { iid: raw_data.number }
       end
 
       private
diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb
index 1408683100f..b9a227fb11a 100644
--- a/lib/gitlab/github_import/pull_request_formatter.rb
+++ b/lib/gitlab/github_import/pull_request_formatter.rb
@@ -24,8 +24,12 @@ module Gitlab
         }
       end
 
-      def klass
-        MergeRequest
+      def project_association
+        :merge_requests
+      end
+
+      def find_condition
+        { iid: number }
       end
 
       def number
diff --git a/lib/gitlab/github_import/release_formatter.rb b/lib/gitlab/github_import/release_formatter.rb
index 73d643b00ad..1ad702a6058 100644
--- a/lib/gitlab/github_import/release_formatter.rb
+++ b/lib/gitlab/github_import/release_formatter.rb
@@ -11,8 +11,12 @@ module Gitlab
         }
       end
 
-      def klass
-        Release
+      def project_association
+        :releases
+      end
+
+      def find_condition
+        { tag: raw_data.tag_name }
       end
 
       def valid?
diff --git a/spec/lib/gitlab/github_import/importer_spec.rb b/spec/lib/gitlab/github_import/importer_spec.rb
index 84f280ceaae..7478f86bd28 100644
--- a/spec/lib/gitlab/github_import/importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer_spec.rb
@@ -2,6 +2,10 @@ require 'spec_helper'
 
 describe Gitlab::GithubImport::Importer, lib: true do
   describe '#execute' do
+    before do
+      allow(Rails).to receive(:cache).and_return(ActiveSupport::Cache::MemoryStore.new)
+    end
+
     context 'when an error occurs' do
       let(:project) { create(:project, import_url: 'https://github.com/octocat/Hello-World.git', wiki_access_level: ProjectFeature::DISABLED) }
       let(:octocat) { double(id: 123456, login: 'octocat') }
@@ -152,10 +156,9 @@ describe Gitlab::GithubImport::Importer, lib: true do
           message: 'The remote data could not be fully imported.',
           errors: [
             { type: :label, url: "https://api.github.com/repos/octocat/Hello-World/labels/bug", errors: "Validation failed: Title can't be blank, Title is invalid" },
-            { type: :milestone, url: "https://api.github.com/repos/octocat/Hello-World/milestones/1", errors: "Validation failed: Title has already been taken" },
             { type: :issue, url: "https://api.github.com/repos/octocat/Hello-World/issues/1348", errors: "Validation failed: Title can't be blank, Title is too short (minimum is 0 characters)" },
             { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
-            { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Validation failed: Validate branches Cannot Create: This merge request already exists: [\"New feature\"]" },
+            { type: :pull_request, url: "https://api.github.com/repos/octocat/Hello-World/pulls/1347", errors: "Invalid Repository. Use user/repo format." },
             { type: :wiki, errors: "Gitlab::Shell::Error" },
             { type: :release, url: 'https://api.github.com/repos/octocat/Hello-World/releases/2', errors: "Validation failed: Description can't be blank" }
           ]
-- 
GitLab