From 16ed9b6129daf51a296d4576580c5f232d043db6 Mon Sep 17 00:00:00 2001
From: Yorick Peterse <yorickpeterse@gmail.com>
Date: Tue, 4 Oct 2016 18:03:10 +0200
Subject: [PATCH] Refactor Gitlab::Identifier

This refactors Gitlab::Identifier so it uses fewer queries and is
actually tested. Queries are reduced by caching the output as well as
using 1 query (instead of 2) to find a user using an SSH key.
---
 CHANGELOG                          |   1 +
 app/models/user.rb                 |   5 ++
 lib/gitlab/identifier.rb           |  58 ++++++++++++--
 spec/lib/gitlab/identifier_spec.rb | 123 +++++++++++++++++++++++++++++
 spec/models/user_spec.rb           |  17 ++++
 spec/workers/post_receive_spec.rb  |   4 +-
 6 files changed, 199 insertions(+), 9 deletions(-)
 create mode 100644 spec/lib/gitlab/identifier_spec.rb

diff --git a/CHANGELOG b/CHANGELOG
index a47ec511452..800b1a84f62 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -44,6 +44,7 @@ v 8.13.0 (unreleased)
   - Optimize GitHub importing for speed and memory
   - API: expose pipeline data in builds API (!6502, Guilherme Salazar)
   - Notify the Merger about merge after successful build (Dimitris Karakasilis)
+  - Reduce queries needed to find users using their SSH keys when pushing commits
   - Fix broken repository 500 errors in project list
   - Close todos when accepting merge requests via the API !6486 (tonygambone)
   - Changed Slack service user referencing from full name to username (Sebastian Poxhofer)
diff --git a/app/models/user.rb b/app/models/user.rb
index 7f5a8562907..508efd85050 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -279,6 +279,11 @@ class User < ActiveRecord::Base
       find_by('users.username = ? OR users.id = ?', name_or_id.to_s, name_or_id.to_i)
     end
 
+    # Returns a user for the given SSH key.
+    def find_by_ssh_key_id(key_id)
+      find_by(id: Key.unscoped.select(:user_id).where(id: key_id))
+    end
+
     def build_user(attrs = {})
       User.new(attrs)
     end
diff --git a/lib/gitlab/identifier.rb b/lib/gitlab/identifier.rb
index 3e5d728f3bc..f8809db21aa 100644
--- a/lib/gitlab/identifier.rb
+++ b/lib/gitlab/identifier.rb
@@ -5,19 +5,61 @@ module Gitlab
     def identify(identifier, project, newrev)
       if identifier.blank?
         # Local push from gitlab
-        email = project.commit(newrev).author_email rescue nil
-        User.find_by(email: email) if email
-
+        identify_using_commit(project, newrev)
       elsif identifier =~ /\Auser-\d+\Z/
         # git push over http
-        user_id = identifier.gsub("user-", "")
-        User.find_by(id: user_id)
-
+        identify_using_user(identifier)
       elsif identifier =~ /\Akey-\d+\Z/
         # git push over ssh
-        key_id = identifier.gsub("key-", "")
-        Key.find_by(id: key_id).try(:user)
+        identify_using_ssh_key(identifier)
+      end
+    end
+
+    # Tries to identify a user based on a commit SHA.
+    def identify_using_commit(project, ref)
+      commit = project.commit(ref)
+
+      return if !commit || !commit.author_email
+
+      email = commit.author_email
+
+      identify_with_cache(:email, email) do
+        User.find_by(email: email)
       end
     end
+
+    # Tries to identify a user based on a user identifier (e.g. "user-123").
+    def identify_using_user(identifier)
+      user_id = identifier.gsub("user-", "")
+
+      identify_with_cache(:user, user_id) do
+        User.find_by(id: user_id)
+      end
+    end
+
+    # Tries to identify a user based on an SSH key identifier (e.g. "key-123").
+    def identify_using_ssh_key(identifier)
+      key_id = identifier.gsub("key-", "")
+
+      identify_with_cache(:ssh_key, key_id) do
+        User.find_by_ssh_key_id(key_id)
+      end
+    end
+
+    def identify_with_cache(category, key)
+      if identification_cache[category].key?(key)
+        identification_cache[category][key]
+      else
+        identification_cache[category][key] = yield
+      end
+    end
+
+    def identification_cache
+      @identification_cache ||= {
+        email: {},
+        user: {},
+        ssh_key: {}
+      }
+    end
   end
 end
diff --git a/spec/lib/gitlab/identifier_spec.rb b/spec/lib/gitlab/identifier_spec.rb
new file mode 100644
index 00000000000..47d6f1007d1
--- /dev/null
+++ b/spec/lib/gitlab/identifier_spec.rb
@@ -0,0 +1,123 @@
+require 'spec_helper'
+
+describe Gitlab::Identifier do
+  let(:identifier) do
+    Class.new { include Gitlab::Identifier }.new
+  end
+
+  let(:project) { create(:empty_project) }
+  let(:user) { create(:user) }
+  let(:key) { create(:key, user: user) }
+
+  describe '#identify' do
+    context 'without an identifier' do
+      it 'identifies the user using a commit' do
+        expect(identifier).to receive(:identify_using_commit).
+          with(project, '123')
+
+        identifier.identify('', project, '123')
+      end
+    end
+
+    context 'with a user identifier' do
+      it 'identifies the user using a user ID' do
+        expect(identifier).to receive(:identify_using_user).
+          with("user-#{user.id}")
+
+        identifier.identify("user-#{user.id}", project, '123')
+      end
+    end
+
+    context 'with an SSH key identifier' do
+      it 'identifies the user using an SSH key ID' do
+        expect(identifier).to receive(:identify_using_ssh_key).
+          with("key-#{key.id}")
+
+        identifier.identify("key-#{key.id}", project, '123')
+      end
+    end
+  end
+
+  describe '#identify_using_commit' do
+    it "returns the User for an existing commit author's Email address" do
+      commit = double(:commit, author_email: user.email)
+
+      expect(project).to receive(:commit).with('123').and_return(commit)
+
+      expect(identifier.identify_using_commit(project, '123')).to eq(user)
+    end
+
+    it 'returns nil when no user could be found' do
+      allow(project).to receive(:commit).with('123').and_return(nil)
+
+      expect(identifier.identify_using_commit(project, '123')).to be_nil
+    end
+
+    it 'returns nil when the commit does not have an author Email' do
+      commit = double(:commit, author_email: nil)
+
+      expect(project).to receive(:commit).with('123').and_return(commit)
+
+      expect(identifier.identify_using_commit(project, '123')).to be_nil
+    end
+
+    it 'caches the found users per Email' do
+      commit = double(:commit, author_email: user.email)
+
+      expect(project).to receive(:commit).with('123').twice.and_return(commit)
+      expect(User).to receive(:find_by).once.and_call_original
+
+      2.times do
+        expect(identifier.identify_using_commit(project, '123')).to eq(user)
+      end
+    end
+  end
+
+  describe '#identify_using_user' do
+    it 'returns the User for an existing ID in the identifier' do
+      found = identifier.identify_using_user("user-#{user.id}")
+
+      expect(found).to eq(user)
+    end
+
+    it 'returns nil for a non existing user ID' do
+      found = identifier.identify_using_user('user--1')
+
+      expect(found).to be_nil
+    end
+
+    it 'caches the found users per ID' do
+      expect(User).to receive(:find_by).once.and_call_original
+
+      2.times do
+        found = identifier.identify_using_user("user-#{user.id}")
+
+        expect(found).to eq(user)
+      end
+    end
+  end
+
+  describe '#identify_using_ssh_key' do
+    it 'returns the User for an existing SSH key' do
+      found = identifier.identify_using_ssh_key("key-#{key.id}")
+
+      expect(found).to eq(user)
+    end
+
+    it 'returns nil for an invalid SSH key' do
+      found = identifier.identify_using_ssh_key('key--1')
+
+      expect(found).to be_nil
+    end
+
+    it 'caches the found users per key' do
+      expect(User).to receive(:find_by_ssh_key_id).once.and_call_original
+
+      2.times do
+        found = identifier.identify_using_ssh_key("key-#{key.id}")
+
+        expect(found).to eq(user)
+      end
+    end
+  end
+end
diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb
index a1770d96f83..65b2896930a 100644
--- a/spec/models/user_spec.rb
+++ b/spec/models/user_spec.rb
@@ -610,6 +610,23 @@ describe User, models: true do
     end
   end
 
+  describe '.find_by_ssh_key_id' do
+    context 'using an existing SSH key ID' do
+      let(:user) { create(:user) }
+      let(:key) { create(:key, user: user) }
+
+      it 'returns the corresponding User' do
+        expect(described_class.find_by_ssh_key_id(key.id)).to eq(user)
+      end
+    end
+
+    context 'using an invalid SSH key ID' do
+      it 'returns nil' do
+        expect(described_class.find_by_ssh_key_id(-1)).to be_nil
+      end
+    end
+  end
+
   describe '.by_login' do
     let(:username) { 'John' }
     let!(:user) { create(:user, username: username) }
diff --git a/spec/workers/post_receive_spec.rb b/spec/workers/post_receive_spec.rb
index 1d2cf7acddd..ffeaafe654a 100644
--- a/spec/workers/post_receive_spec.rb
+++ b/spec/workers/post_receive_spec.rb
@@ -79,7 +79,9 @@ describe PostReceive do
     end
 
     it "does not run if the author is not in the project" do
-      allow(Key).to receive(:find_by).with(hash_including(id: anything())) { nil }
+      allow_any_instance_of(Gitlab::GitPostReceive).
+        to receive(:identify_using_ssh_key).
+        and_return(nil)
 
       expect(project).not_to receive(:execute_hooks)
 
-- 
GitLab