From 2d1e0a081110618e8f64336324c9b8e43caae4ff Mon Sep 17 00:00:00 2001
From: Douglas Barbosa Alexandre <dbalexandre@gmail.com>
Date: Fri, 3 Feb 2017 13:07:22 -0200
Subject: [PATCH] GitHub Importer - Find users based on their email address

The returned email by the GitHub API is the user's publicly visible
email address (or null if the user has not specified a public email
address in their profile)
---
 lib/gitlab/github_import/base_formatter.rb    | 18 ++-----
 lib/gitlab/github_import/comment_formatter.rb | 10 ++--
 lib/gitlab/github_import/importer.rb          | 19 ++++---
 .../github_import/issuable_formatter.rb       | 26 ++++++----
 lib/gitlab/github_import/user_formatter.rb    | 52 +++++++++++++++++++
 .../github_import/comment_formatter_spec.rb   | 18 +++++--
 .../lib/gitlab/github_import/importer_spec.rb |  5 +-
 .../github_import/issue_formatter_spec.rb     | 27 ++++++++--
 .../pull_request_formatter_spec.rb            | 27 ++++++++--
 .../github_import/user_formatter_spec.rb      | 39 ++++++++++++++
 10 files changed, 191 insertions(+), 50 deletions(-)
 create mode 100644 lib/gitlab/github_import/user_formatter.rb
 create mode 100644 spec/lib/gitlab/github_import/user_formatter_spec.rb

diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/github_import/base_formatter.rb
index 95dba9a327b..8c80791e7c9 100644
--- a/lib/gitlab/github_import/base_formatter.rb
+++ b/lib/gitlab/github_import/base_formatter.rb
@@ -1,11 +1,12 @@
 module Gitlab
   module GithubImport
     class BaseFormatter
-      attr_reader :formatter, :project, :raw_data
+      attr_reader :client, :formatter, :project, :raw_data
 
-      def initialize(project, raw_data)
+      def initialize(project, raw_data, client = nil)
         @project = project
         @raw_data = raw_data
+        @client = client
         @formatter = Gitlab::ImportFormatter.new
       end
 
@@ -18,19 +19,6 @@ module Gitlab
       def url
         raw_data.url || ''
       end
-
-      private
-
-      def gitlab_user_id(github_id)
-        User.joins(:identities).
-          find_by("identities.extern_uid = ? AND identities.provider = 'github'", github_id.to_s).
-          try(:id)
-      end
-
-      def gitlab_author_id
-        return @gitlab_author_id if defined?(@gitlab_author_id)
-        @gitlab_author_id = gitlab_user_id(raw_data.user.id)
-      end
     end
   end
 end
diff --git a/lib/gitlab/github_import/comment_formatter.rb b/lib/gitlab/github_import/comment_formatter.rb
index 2bddcde2b7c..e21922070c1 100644
--- a/lib/gitlab/github_import/comment_formatter.rb
+++ b/lib/gitlab/github_import/comment_formatter.rb
@@ -1,6 +1,8 @@
 module Gitlab
   module GithubImport
     class CommentFormatter < BaseFormatter
+      attr_writer :author_id
+
       def attributes
         {
           project: project,
@@ -17,11 +19,11 @@ module Gitlab
       private
 
       def author
-        raw_data.user.login
+        @author ||= UserFormatter.new(client, raw_data.user)
       end
 
       def author_id
-        gitlab_author_id || project.creator_id
+        author.gitlab_id || project.creator_id
       end
 
       def body
@@ -52,10 +54,10 @@ module Gitlab
       end
 
       def note
-        if gitlab_author_id
+        if author.gitlab_id
           body
         else
-          formatter.author_line(author) + body
+          formatter.author_line(author.login) + body
         end
       end
 
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
index 9a4ffd28438..d95ff4fd104 100644
--- a/lib/gitlab/github_import/importer.rb
+++ b/lib/gitlab/github_import/importer.rb
@@ -110,7 +110,7 @@ module Gitlab
       def import_issues
         fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
           issues.each do |raw|
-            gh_issue = IssueFormatter.new(project, raw)
+            gh_issue = IssueFormatter.new(project, raw, client)
 
             begin
               issuable =
@@ -131,7 +131,8 @@ module Gitlab
       def import_pull_requests
         fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
           pull_requests.each do |raw|
-            gh_pull_request = PullRequestFormatter.new(project, raw)
+            gh_pull_request = PullRequestFormatter.new(project, raw, client)
+
             next unless gh_pull_request.valid?
 
             begin
@@ -209,14 +210,16 @@ module Gitlab
         ActiveRecord::Base.no_touching do
           comments.each do |raw|
             begin
-              comment         = CommentFormatter.new(project, raw)
+              comment = CommentFormatter.new(project, raw, client)
+
               # GH does not return info about comment's parent, so we guess it by checking its URL!
               *_, parent, iid = URI(raw.html_url).path.split('/')
-              if parent == 'issues'
-                issuable = Issue.find_by(project_id: project.id, iid: iid)
-              else
-                issuable = MergeRequest.find_by(target_project_id: project.id, iid: iid)
-              end
+
+              issuable = if parent == 'issues'
+                           Issue.find_by(project_id: project.id, iid: iid)
+                         else
+                           MergeRequest.find_by(target_project_id: project.id, iid: iid)
+                         end
 
               next unless issuable
 
diff --git a/lib/gitlab/github_import/issuable_formatter.rb b/lib/gitlab/github_import/issuable_formatter.rb
index 256f360efc7..29fb0f9d333 100644
--- a/lib/gitlab/github_import/issuable_formatter.rb
+++ b/lib/gitlab/github_import/issuable_formatter.rb
@@ -1,6 +1,8 @@
 module Gitlab
   module GithubImport
     class IssuableFormatter < BaseFormatter
+      attr_writer :assignee_id, :author_id
+
       def project_association
         raise NotImplementedError
       end
@@ -23,18 +25,24 @@ module Gitlab
         raw_data.assignee.present?
       end
 
-      def assignee_id
+      def author
+        @author ||= UserFormatter.new(client, raw_data.user)
+      end
+
+      def author_id
+        @author_id ||= author.gitlab_id || project.creator_id
+      end
+
+      def assignee
         if assigned?
-          gitlab_user_id(raw_data.assignee.id)
+          @assignee ||= UserFormatter.new(client, raw_data.assignee)
         end
       end
 
-      def author
-        raw_data.user.login
-      end
+      def assignee_id
+        return @assignee_id if defined?(@assignee_id)
 
-      def author_id
-        gitlab_author_id || project.creator_id
+        @assignee_id = assignee.try(:gitlab_id)
       end
 
       def body
@@ -42,10 +50,10 @@ module Gitlab
       end
 
       def description
-        if gitlab_author_id
+        if author.gitlab_id
           body
         else
-          formatter.author_line(author) + body
+          formatter.author_line(author.login) + body
         end
       end
 
diff --git a/lib/gitlab/github_import/user_formatter.rb b/lib/gitlab/github_import/user_formatter.rb
new file mode 100644
index 00000000000..7550326ffbe
--- /dev/null
+++ b/lib/gitlab/github_import/user_formatter.rb
@@ -0,0 +1,52 @@
+module Gitlab
+  module GithubImport
+    class UserFormatter
+      attr_reader :client, :raw
+
+      delegate :id, :login, to: :raw, allow_nil: true
+
+      def initialize(client, raw)
+        @client = client
+        @raw = raw
+      end
+
+      def gitlab_id
+        return @gitlab_id if defined?(@gitlab_id)
+
+        @gitlab_id = find_by_external_uid || find_by_email
+      end
+
+      private
+
+      def email
+        @email ||= client.user(raw.login).try(:email)
+      end
+
+      def find_by_email
+        return nil unless email
+        users  = ::User.arel_table
+        emails = ::Email.arel_table
+
+        left_join_emails = users.join(emails, Arel::Nodes::OuterJoin).on(
+          users[:id].eq(emails[:user_id])
+        ).join_sources
+
+        User.select(:id)
+            .joins(left_join_emails)
+            .where(users[:email].eq(email).or(emails[:email].eq(email)))
+            .first.try(:id)
+      end
+
+      def find_by_external_uid
+        return nil unless id
+
+        identities = ::Identity.arel_table
+
+        User.select(:id)
+            .joins(:identities).where(identities[:provider].eq(:github)
+            .and(identities[:extern_uid].eq(id))).
+            first.try(:id)
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/github_import/comment_formatter_spec.rb b/spec/lib/gitlab/github_import/comment_formatter_spec.rb
index e6e33d3686a..cc38872e426 100644
--- a/spec/lib/gitlab/github_import/comment_formatter_spec.rb
+++ b/spec/lib/gitlab/github_import/comment_formatter_spec.rb
@@ -1,8 +1,9 @@
 require 'spec_helper'
 
 describe Gitlab::GithubImport::CommentFormatter, lib: true do
+  let(:client) { double }
   let(:project) { create(:empty_project) }
-  let(:octocat) { double(id: 123456, login: 'octocat') }
+  let(:octocat) { double(id: 123456, login: 'octocat', email: 'octocat@example.com') }
   let(:created_at) { DateTime.strptime('2013-04-10T20:09:31Z') }
   let(:updated_at) { DateTime.strptime('2014-03-03T18:58:10Z') }
   let(:base) do
@@ -16,7 +17,11 @@ describe Gitlab::GithubImport::CommentFormatter, lib: true do
     }
   end
 
-  subject(:comment) { described_class.new(project, raw)}
+  subject(:comment) { described_class.new(project, raw, client) }
+
+  before do
+    allow(client).to receive(:user).and_return(octocat)
+  end
 
   describe '#attributes' do
     context 'when do not reference a portion of the diff' do
@@ -69,8 +74,15 @@ describe Gitlab::GithubImport::CommentFormatter, lib: true do
     context 'when author is a GitLab user' do
       let(:raw) { double(base.merge(user: octocat)) }
 
-      it 'returns GitLab user id as author_id' do
+      it 'returns GitLab user id associated with GitHub id as author_id' do
         gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
+
+        expect(comment.attributes.fetch(:author_id)).to eq gl_user.id
+      end
+
+      it 'returns GitLab user id associated with GitHub email as author_id' do
+        gl_user = create(:user, email: octocat.email)
+
         expect(comment.attributes.fetch(:author_id)).to eq gl_user.id
       end
 
diff --git a/spec/lib/gitlab/github_import/importer_spec.rb b/spec/lib/gitlab/github_import/importer_spec.rb
index afd78abdc9b..33d83d6d2f1 100644
--- a/spec/lib/gitlab/github_import/importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer_spec.rb
@@ -44,6 +44,7 @@ describe Gitlab::GithubImport::Importer, lib: true do
       allow_any_instance_of(Octokit::Client).to receive(:rate_limit!).and_raise(Octokit::NotFound)
       allow_any_instance_of(Gitlab::Shell).to receive(:import_repository).and_raise(Gitlab::Shell::Error)
 
+      allow_any_instance_of(Octokit::Client).to receive(:user).and_return(octocat)
       allow_any_instance_of(Octokit::Client).to receive(:labels).and_return([label1, label2])
       allow_any_instance_of(Octokit::Client).to receive(:milestones).and_return([milestone, milestone])
       allow_any_instance_of(Octokit::Client).to receive(:issues).and_return([issue1, issue2])
@@ -53,7 +54,8 @@ describe Gitlab::GithubImport::Importer, lib: true do
       allow_any_instance_of(Octokit::Client).to receive(:last_response).and_return(double(rels: { next: nil }))
       allow_any_instance_of(Octokit::Client).to receive(:releases).and_return([release1, release2])
     end
-    let(:octocat) { double(id: 123456, login: 'octocat') }
+
+    let(:octocat) { double(id: 123456, login: 'octocat', email: 'octocat@example.com') }
     let(:created_at) { DateTime.strptime('2011-01-26T19:01:12Z') }
     let(:updated_at) { DateTime.strptime('2011-01-27T19:01:12Z') }
     let(:label1) do
@@ -125,6 +127,7 @@ describe Gitlab::GithubImport::Importer, lib: true do
       )
     end
 
+    let!(:user) { create(:user, email: octocat.email) }
     let(:repository) { double(id: 1, fork: false) }
     let(:source_sha) { create(:commit, project: project).id }
     let(:source_branch) { double(ref: 'feature', repo: repository, sha: source_sha) }
diff --git a/spec/lib/gitlab/github_import/issue_formatter_spec.rb b/spec/lib/gitlab/github_import/issue_formatter_spec.rb
index eec1fabab54..f34d09f2c1d 100644
--- a/spec/lib/gitlab/github_import/issue_formatter_spec.rb
+++ b/spec/lib/gitlab/github_import/issue_formatter_spec.rb
@@ -1,8 +1,9 @@
 require 'spec_helper'
 
 describe Gitlab::GithubImport::IssueFormatter, lib: true do
+  let(:client) { double }
   let!(:project) { create(:empty_project, namespace: create(:namespace, path: 'octocat')) }
-  let(:octocat) { double(id: 123456, login: 'octocat') }
+  let(:octocat) { double(id: 123456, login: 'octocat', email: 'octocat@example.com') }
   let(:created_at) { DateTime.strptime('2011-01-26T19:01:12Z') }
   let(:updated_at) { DateTime.strptime('2011-01-27T19:01:12Z') }
 
@@ -23,7 +24,11 @@ describe Gitlab::GithubImport::IssueFormatter, lib: true do
     }
   end
 
-  subject(:issue) { described_class.new(project, raw_data) }
+  subject(:issue) { described_class.new(project, raw_data, client) }
+
+  before do
+    allow(client).to receive(:user).and_return(octocat)
+  end
 
   shared_examples 'Gitlab::GithubImport::IssueFormatter#attributes' do
     context 'when issue is open' do
@@ -75,11 +80,17 @@ describe Gitlab::GithubImport::IssueFormatter, lib: true do
         expect(issue.attributes.fetch(:assignee_id)).to be_nil
       end
 
-      it 'returns GitLab user id as assignee_id when is a GitLab user' do
+      it 'returns GitLab user id associated with GitHub id as assignee_id' do
         gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
         expect(issue.attributes.fetch(:assignee_id)).to eq gl_user.id
       end
+
+      it 'returns GitLab user id associated with GitHub email as assignee_id' do
+        gl_user = create(:user, email: octocat.email)
+
+        expect(issue.attributes.fetch(:assignee_id)).to eq gl_user.id
+      end
     end
 
     context 'when it has a milestone' do
@@ -100,16 +111,22 @@ describe Gitlab::GithubImport::IssueFormatter, lib: true do
     context 'when author is a GitLab user' do
       let(:raw_data) { double(base_data.merge(user: octocat)) }
 
-      it 'returns project#creator_id as author_id when is not a GitLab user' do
+      it 'returns project creator_id as author_id when is not a GitLab user' do
         expect(issue.attributes.fetch(:author_id)).to eq project.creator_id
       end
 
-      it 'returns GitLab user id as author_id when is a GitLab user' do
+      it 'returns GitLab user id associated with GitHub id as author_id' do
         gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
         expect(issue.attributes.fetch(:author_id)).to eq gl_user.id
       end
 
+      it 'returns GitLab user id associated with GitHub email as author_id' do
+        gl_user = create(:user, email: octocat.email)
+
+        expect(issue.attributes.fetch(:author_id)).to eq gl_user.id
+      end
+
       it 'returns description without created at tag line' do
         create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
diff --git a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
index 90947ff4707..e46be18aa99 100644
--- a/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
+++ b/spec/lib/gitlab/github_import/pull_request_formatter_spec.rb
@@ -1,6 +1,7 @@
 require 'spec_helper'
 
 describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
+  let(:client) { double }
   let(:project) { create(:project, :repository) }
   let(:source_sha) { create(:commit, project: project).id }
   let(:target_sha) { create(:commit, project: project, git_commit: RepoHelpers.another_sample_commit).id }
@@ -10,7 +11,7 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
   let(:target_repo) { repository }
   let(:target_branch) { double(ref: 'master', repo: target_repo, sha: target_sha) }
   let(:removed_branch) { double(ref: 'removed-branch', repo: source_repo, sha: '2e5d3239642f9161dcbbc4b70a211a68e5e45e2b') }
-  let(:octocat) { double(id: 123456, login: 'octocat') }
+  let(:octocat) { double(id: 123456, login: 'octocat', email: 'octocat@example.com') }
   let(:created_at) { DateTime.strptime('2011-01-26T19:01:12Z') }
   let(:updated_at) { DateTime.strptime('2011-01-27T19:01:12Z') }
   let(:base_data) do
@@ -32,7 +33,11 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
     }
   end
 
-  subject(:pull_request) { described_class.new(project, raw_data) }
+  subject(:pull_request) { described_class.new(project, raw_data, client) }
+
+  before do
+    allow(client).to receive(:user).and_return(octocat)
+  end
 
   shared_examples 'Gitlab::GithubImport::PullRequestFormatter#attributes' do
     context 'when pull request is open' do
@@ -121,26 +126,38 @@ describe Gitlab::GithubImport::PullRequestFormatter, lib: true do
         expect(pull_request.attributes.fetch(:assignee_id)).to be_nil
       end
 
-      it 'returns GitLab user id as assignee_id when is a GitLab user' do
+      it 'returns GitLab user id associated with GitHub id as assignee_id' do
         gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
         expect(pull_request.attributes.fetch(:assignee_id)).to eq gl_user.id
       end
+
+      it 'returns GitLab user id associated with GitHub email as assignee_id' do
+        gl_user = create(:user, email: octocat.email)
+
+        expect(pull_request.attributes.fetch(:assignee_id)).to eq gl_user.id
+      end
     end
 
     context 'when author is a GitLab user' do
       let(:raw_data) { double(base_data.merge(user: octocat)) }
 
-      it 'returns project#creator_id as author_id when is not a GitLab user' do
+      it 'returns project creator_id as author_id when is not a GitLab user' do
         expect(pull_request.attributes.fetch(:author_id)).to eq project.creator_id
       end
 
-      it 'returns GitLab user id as author_id when is a GitLab user' do
+      it 'returns GitLab user id associated with GitHub id as author_id' do
         gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
         expect(pull_request.attributes.fetch(:author_id)).to eq gl_user.id
       end
 
+      it 'returns GitLab user id associated with GitHub email as author_id' do
+        gl_user = create(:user, email: octocat.email)
+
+        expect(pull_request.attributes.fetch(:author_id)).to eq gl_user.id
+      end
+
       it 'returns description without created at tag line' do
         create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
 
diff --git a/spec/lib/gitlab/github_import/user_formatter_spec.rb b/spec/lib/gitlab/github_import/user_formatter_spec.rb
new file mode 100644
index 00000000000..db792233657
--- /dev/null
+++ b/spec/lib/gitlab/github_import/user_formatter_spec.rb
@@ -0,0 +1,39 @@
+require 'spec_helper'
+
+describe Gitlab::GithubImport::UserFormatter, lib: true do
+  let(:client) { double }
+  let(:octocat) { double(id: 123456, login: 'octocat', email: 'octocat@example.com') }
+
+  subject(:user) { described_class.new(client, octocat) }
+
+  before do
+    allow(client).to receive(:user).and_return(octocat)
+  end
+
+  describe '#gitlab_id' do
+    context 'when GitHub user is a GitLab user' do
+      it 'return GitLab user id when user associated their account with GitHub' do
+        gl_user = create(:omniauth_user, extern_uid: octocat.id, provider: 'github')
+
+        expect(user.gitlab_id).to eq gl_user.id
+      end
+
+      it 'returns GitLab user id when user primary email matches GitHub email' do
+        gl_user = create(:user, email: octocat.email)
+
+        expect(user.gitlab_id).to eq gl_user.id
+      end
+
+      it 'returns GitLab user id when any of user linked emails matches GitHub email' do
+        gl_user = create(:user, email: 'johndoe@example.com')
+        create(:email, user: gl_user, email: octocat.email)
+
+        expect(user.gitlab_id).to eq gl_user.id
+      end
+    end
+
+    it 'returns nil when GitHub user is not a GitLab user' do
+      expect(user.gitlab_id).to be_nil
+    end
+  end
+end
-- 
GitLab