From 9437b8a2e4ae4c688272d0febfbca4007232e4f5 Mon Sep 17 00:00:00 2001
From: Douglas Barbosa Alexandre <dbalexandre@gmail.com>
Date: Wed, 18 May 2016 16:14:20 -0500
Subject: [PATCH] Import GitHub repositories respecting the API rate limit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While Octokit auto pagination set the page size to the maximum 100, and
seek to not overstep the rate limit. When the rate limit is reached its
raises an exception, and stop doing new requests.

Here we use a custom pattern for traversing large lists, so we can
check if we’ll reach the rate limit and wait the API to reset the rate
limit before making new requests.
---
 lib/gitlab/github_import/importer.rb          | 112 ++++++++++++------
 lib/gitlab/github_import/issue_formatter.rb   |   4 +
 lib/gitlab/github_import/label_formatter.rb   |   4 +
 .../github_import/milestone_formatter.rb      |   4 +
 .../github_import/pull_request_formatter.rb   |   4 +
 5 files changed, 92 insertions(+), 36 deletions(-)

diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index 9d077e79c39..a2ee56bee89 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -3,6 +3,9 @@ module Gitlab
     class Importer
       include Gitlab::ShellAdapter
 
+      GITHUB_SAFE_REMAINING_REQUESTS = 100
+      GITHUB_SAFE_SLEEP_TIME = 500
+
       attr_reader :client, :project, :repo, :repo_url
 
       def initialize(project)
@@ -25,14 +28,53 @@ module Gitlab
 
       private
 
+      def turn_auto_pagination_off!
+        client.auto_paginate = false
+      end
+
+      def turn_auto_pagination_on!
+        client.auto_paginate = true
+      end
+
+      def rate_limit
+        client.rate_limit!
+      end
+
+      def rate_limit_exceed?
+        rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS
+      end
+
+      def rate_limit_sleep_time
+        rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
+      end
+
+      def paginate
+        turn_auto_pagination_off!
+
+        sleep rate_limit_sleep_time if rate_limit_exceed?
+
+        data = yield
+
+        last_response = client.last_response
+
+        while last_response.rels[:next]
+          sleep rate_limit_sleep_time if rate_limit_exceed?
+          last_response = last_response.rels[:next].get
+          data.concat(last_response.data) if last_response.data.is_a?(Array)
+        end
+
+        turn_auto_pagination_on!
+
+        data
+      end
+
       def credentials
         @credentials ||= project.import_data.credentials if project.import_data
       end
 
       def import_labels
-        client.labels(repo).each do |raw_data|
-          Label.create!(LabelFormatter.new(project, raw_data).attributes)
-        end
+        labels = paginate { client.labels(repo, per_page: 100) }
+        labels.each { |raw| LabelFormatter.new(project, raw).create! }
 
         true
       rescue ActiveRecord::RecordInvalid => e
@@ -40,9 +82,8 @@ module Gitlab
       end
 
       def import_milestones
-        client.list_milestones(repo, state: :all).each do |raw_data|
-          Milestone.create!(MilestoneFormatter.new(project, raw_data).attributes)
-        end
+        milestones = paginate { client.milestones(repo, state: :all, per_page: 100) }
+        milestones.each { |raw| MilestoneFormatter.new(project, raw).create! }
 
         true
       rescue ActiveRecord::RecordInvalid => e
@@ -50,16 +91,15 @@ module Gitlab
       end
 
       def import_issues
-        client.list_issues(repo, state: :all, sort: :created, direction: :asc).each do |raw_data|
-          gh_issue = IssueFormatter.new(project, raw_data)
+        data = paginate { client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) }
 
-          if gh_issue.valid?
-            issue = Issue.create!(gh_issue.attributes)
-            apply_labels(gh_issue.number, issue)
+        data.each do |raw|
+          gh_issue = IssueFormatter.new(project, raw)
 
-            if gh_issue.has_comments?
-              import_comments(gh_issue.number, issue)
-            end
+          if gh_issue.valid?
+            issue = gh_issue.create!
+            apply_labels(issue)
+            import_comments(issue) if gh_issue.has_comments?
           end
         end
 
@@ -69,9 +109,8 @@ module Gitlab
       end
 
       def import_pull_requests
-        pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc)
-                              .map { |raw| PullRequestFormatter.new(project, raw) }
-                              .select(&:valid?)
+        pull_requests = paginate { client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) }
+        pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?)
 
         source_branches_removed = pull_requests.reject(&:source_branch_exists?).map { |pr| [pr.source_branch_name, pr.source_branch_sha] }
         target_branches_removed = pull_requests.reject(&:target_branch_exists?).map { |pr| [pr.target_branch_name, pr.target_branch_sha] }
@@ -80,13 +119,10 @@ module Gitlab
         create_refs(branches_removed)
 
         pull_requests.each do |pull_request|
-          merge_request = MergeRequest.new(pull_request.attributes)
-
-          if merge_request.save
-            apply_labels(pull_request.number, merge_request)
-            import_comments(pull_request.number, merge_request)
-            import_comments_on_diff(pull_request.number, merge_request)
-          end
+          merge_request = pull_request.create!
+          apply_labels(merge_request)
+          import_comments(merge_request)
+          import_comments_on_diff(merge_request)
         end
 
         true
@@ -98,6 +134,7 @@ module Gitlab
 
       def create_refs(branches)
         branches.each do |name, sha|
+          sleep rate_limit_sleep_time if rate_limit_exceed?
           client.create_ref(repo, "refs/heads/#{name}", sha)
         end
 
@@ -106,13 +143,16 @@ module Gitlab
 
       def delete_refs(branches)
         branches.each do |name, _|
+          sleep rate_limit_sleep_time if rate_limit_exceed?
           client.delete_ref(repo, "heads/#{name}")
           project.repository.rm_branch(project.creator, name)
         end
       end
 
-      def apply_labels(number, issuable)
-        issue = client.issue(repo, number)
+      def apply_labels(issuable)
+        sleep rate_limit_sleep_time if rate_limit_exceed?
+
+        issue = client.issue(repo, issuable.iid)
 
         if issue.labels.count > 0
           label_ids = issue.labels.map do |raw|
@@ -123,20 +163,20 @@ module Gitlab
         end
       end
 
-      def import_comments(issue_number, noteable)
-        comments = client.issue_comments(repo, issue_number)
-        create_comments(comments, noteable)
+      def import_comments(issuable)
+        comments = paginate { client.issue_comments(repo, issuable.iid, per_page: 100) }
+        create_comments(issuable, comments)
       end
 
-      def import_comments_on_diff(pull_request_number, merge_request)
-        comments = client.pull_request_comments(repo, pull_request_number)
-        create_comments(comments, merge_request)
+      def import_comments_on_diff(merge_request)
+        comments = paginate { client.pull_request_comments(repo, merge_request.iid, per_page: 100) }
+        create_comments(merge_request, comments)
       end
 
-      def create_comments(comments, noteable)
-        comments.each do |raw_data|
-          comment = CommentFormatter.new(project, raw_data)
-          noteable.notes.create!(comment.attributes)
+      def create_comments(issuable, comments)
+        comments.each do |raw|
+          comment = CommentFormatter.new(project, raw)
+          issuable.notes.create!(comment.attributes)
         end
       end
 
diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/github_import/issue_formatter.rb
index c8173913b4e..47f625efb3a 100644
--- a/lib/gitlab/github_import/issue_formatter.rb
+++ b/lib/gitlab/github_import/issue_formatter.rb
@@ -16,6 +16,10 @@ module Gitlab
         }
       end
 
+      def create!
+        Issue.create!(self.attributes)
+      end
+
       def has_comments?
         raw_data.comments > 0
       end
diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/github_import/label_formatter.rb
index c2b9d40b511..87b51a0a178 100644
--- a/lib/gitlab/github_import/label_formatter.rb
+++ b/lib/gitlab/github_import/label_formatter.rb
@@ -9,6 +9,10 @@ module Gitlab
         }
       end
 
+      def create!
+        Label.create!(self.attributes)
+      end
+
       private
 
       def color
diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/github_import/milestone_formatter.rb
index e91a7e328cf..a0d2e47c412 100644
--- a/lib/gitlab/github_import/milestone_formatter.rb
+++ b/lib/gitlab/github_import/milestone_formatter.rb
@@ -14,6 +14,10 @@ module Gitlab
         }
       end
 
+      def create!
+        Milestone.create!(self.attributes)
+      end
+
       private
 
       def number
diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb
index a2947b56ad9..0d21c49035e 100644
--- a/lib/gitlab/github_import/pull_request_formatter.rb
+++ b/lib/gitlab/github_import/pull_request_formatter.rb
@@ -24,6 +24,10 @@ module Gitlab
         }
       end
 
+      def create!
+        MergeRequest.create!(self.attributes)
+      end
+
       def number
         raw_data.number
       end
-- 
GitLab