From 395a9301b233da30a9abef56bb7b6fa6229f3acd Mon Sep 17 00:00:00 2001
From: Ahmad Sherif <me@ahmadsherif.com>
Date: Tue, 27 Sep 2016 19:32:47 +0200
Subject: [PATCH] Process each page of GitHub resources instead of concating
 them then processing

This should avoid having large memory growth when importing GitHub repos
with lots of resources.
---
 lib/gitlab/github_import/client.rb           |  11 +-
 lib/gitlab/github_import/importer.rb         | 101 ++++++++++---------
 spec/lib/gitlab/github_import/client_spec.rb |   2 +-
 3 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 084e514492c..e33ac61f5ae 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -52,7 +52,7 @@ module Gitlab
 
       def method_missing(method, *args, &block)
         if api.respond_to?(method)
-          request { api.send(method, *args, &block) }
+          request(method, *args, &block)
         else
           super(method, *args, &block)
         end
@@ -99,20 +99,19 @@ module Gitlab
         rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
       end
 
-      def request
+      def request(method, *args, &block)
         sleep rate_limit_sleep_time if rate_limit_exceed?
 
-        data = yield
+        data = api.send(method, *args, &block)
+        yield data
 
         last_response = api.last_response
 
         while last_response.rels[:next]
           sleep rate_limit_sleep_time if rate_limit_exceed?
           last_response = last_response.rels[:next].get
-          data.concat(last_response.data) if last_response.data.is_a?(Array)
+          yield last_response.data if last_response.data.is_a?(Array)
         end
-
-        data
       end
     end
   end
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index d35ee2a1c65..f1adec8212b 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -46,64 +46,66 @@ module Gitlab
       end
 
       def import_labels
-        labels = client.labels(repo, per_page: 100)
-
-        labels.each do |raw|
-          begin
-            LabelFormatter.new(project, raw).create!
-          rescue => e
-            errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+        client.labels(repo, per_page: 100) do |labels|
+          labels.each do |raw|
+            begin
+              LabelFormatter.new(project, raw).create!
+            rescue => e
+              errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+            end
           end
         end
       end
 
       def import_milestones
-        milestones = client.milestones(repo, state: :all, per_page: 100)
-
-        milestones.each do |raw|
-          begin
-            MilestoneFormatter.new(project, raw).create!
-          rescue => e
-            errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+        client.milestones(repo, state: :all, per_page: 100) do |milestones|
+          milestones.each do |raw|
+            begin
+              MilestoneFormatter.new(project, raw).create!
+            rescue => e
+              errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+            end
           end
         end
       end
 
       def import_issues
-        issues = client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
-
-        issues.each do |raw|
-          gh_issue = IssueFormatter.new(project, raw)
-
-          if gh_issue.valid?
-            begin
-              issue = gh_issue.create!
-              apply_labels(issue)
-              import_comments(issue) if gh_issue.has_comments?
-            rescue => e
-              errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+        client.issues(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
+          issues.each do |raw|
+            gh_issue = IssueFormatter.new(project, raw)
+
+            if gh_issue.valid?
+              begin
+                issue = gh_issue.create!
+                apply_labels(issue)
+                import_comments(issue) if gh_issue.has_comments?
+              rescue => e
+                errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+              end
             end
           end
         end
       end
 
       def import_pull_requests
-        pull_requests = client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100)
-        pull_requests = pull_requests.map { |raw| PullRequestFormatter.new(project, raw) }.select(&:valid?)
-
-        pull_requests.each do |pull_request|
-          begin
-            restore_source_branch(pull_request) unless pull_request.source_branch_exists?
-            restore_target_branch(pull_request) unless pull_request.target_branch_exists?
-
-            merge_request = pull_request.create!
-            apply_labels(merge_request)
-            import_comments(merge_request)
-            import_comments_on_diff(merge_request)
-          rescue => e
-            errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
-          ensure
-            clean_up_restored_branches(pull_request)
+        client.pull_requests(repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
+          pull_requests.each do |raw|
+            pull_request = PullRequestFormatter.new(project, raw)
+            next unless pull_request.valid?
+
+            begin
+              restore_source_branch(pull_request) unless pull_request.source_branch_exists?
+              restore_target_branch(pull_request) unless pull_request.target_branch_exists?
+
+              merge_request = pull_request.create!
+              apply_labels(merge_request)
+              import_comments(merge_request)
+              import_comments_on_diff(merge_request)
+            rescue => e
+              errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(pull_request.url), errors: e.message }
+            ensure
+              clean_up_restored_branches(pull_request)
+            end
           end
         end
       end
@@ -180,13 +182,14 @@ module Gitlab
       end
 
       def import_releases
-        releases = client.releases(repo, per_page: 100)
-        releases.each do |raw|
-          begin
-            gh_release = ReleaseFormatter.new(project, raw)
-            gh_release.create! if gh_release.valid?
-          rescue => e
-            errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+        client.releases(repo, per_page: 100) do |releases|
+          releases.each do |raw|
+            begin
+              gh_release = ReleaseFormatter.new(project, raw)
+              gh_release.create! if gh_release.valid?
+            rescue => e
+              errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
+            end
           end
         end
       end
diff --git a/spec/lib/gitlab/github_import/client_spec.rb b/spec/lib/gitlab/github_import/client_spec.rb
index 613c47d55f1..e829b936343 100644
--- a/spec/lib/gitlab/github_import/client_spec.rb
+++ b/spec/lib/gitlab/github_import/client_spec.rb
@@ -66,6 +66,6 @@ describe Gitlab::GithubImport::Client, lib: true do
     stub_request(:get, /api.github.com/)
     allow(client.api).to receive(:rate_limit!).and_raise(Octokit::NotFound)
 
-    expect { client.issues }.not_to raise_error
+    expect { client.issues {} }.not_to raise_error
   end
 end
-- 
GitLab