Skip to content
Snippets Groups Projects
Unverified Commit 19428e80 authored by Yorick Peterse's avatar Yorick Peterse
Browse files

Preload pipeline data for project pipelines

When displaying the pipelines of a project we now preload the following
data:

1. Authors of the commits that belong to these pipelines
2. The number of warnings per pipeline, which is used by
   Ci::Pipeline#has_warnings?

== Commit Authors

Previously this data was queried for every Commit separately, leading to
20 SQL queries being executed in the worst case. With an average of 3 to
5 milliseconds per SQL query this could result in 100 milliseconds being
spent in _just_ getting Commit authors.

To preload this data Commit#author now uses BatchLoader (through
Commit#lazy_author), and a separate module
Gitlab::Ci::Pipeline::Preloader is used to ensure all authors are loaded
before they are used.

== Number of warnings

This changes Ci::Pipeline#has_warnings? so it supports preloading of the
number of warnings per pipeline. This removes the need for executing a
COUNT(*) query for every pipeline just to see if it has any warnings or
not.
parent 70985aa1
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -23,7 +23,7 @@ class Projects::PipelinesController < Projects::ApplicationController
@finished_count = limited_pipelines_count(project, 'finished')
@pipelines_count = limited_pipelines_count(project)
 
@pipelines.map(&:commit) # List commits for batch loading
Gitlab::Ci::Pipeline::Preloader.preload(@pipelines)
 
respond_to do |format|
format.html
Loading
Loading
Loading
Loading
@@ -406,7 +406,18 @@ module Ci
end
 
def has_warnings?
builds.latest.failed_but_allowed.any?
number_of_warnings.positive?
end
def number_of_warnings
BatchLoader.for(id).batch(default_value: 0) do |pipeline_ids, loader|
Build.where(commit_id: pipeline_ids)
.latest
.failed_but_allowed
.group(:commit_id)
.count
.each { |id, amount| loader.call(id, amount) }
end
end
 
def set_config_source
Loading
Loading
Loading
Loading
@@ -224,8 +224,34 @@ class Commit
Gitlab::ClosingIssueExtractor.new(project, current_user).closed_by_message(safe_message)
end
 
def lazy_author
BatchLoader.for(author_email.downcase).batch do |emails, loader|
# A Hash that maps user Emails to the corresponding User objects. The
# Emails at this point are the _primary_ Emails of the Users.
users_for_emails = User
.by_any_email(emails)
.each_with_object({}) { |user, hash| hash[user.email] = user }
users_for_ids = users_for_emails
.values
.each_with_object({}) { |user, hash| hash[user.id] = user }
# Some commits may have used an alternative Email address. In this case we
# need to query the "emails" table to map those addresses to User objects.
Email
.where(email: emails - users_for_emails.keys)
.pluck(:email, :user_id)
.each { |(email, id)| users_for_emails[email] = users_for_ids[id] }
users_for_emails.each { |email, user| loader.call(email, user) }
end
end
def author
User.find_by_any_email(author_email.downcase)
# We use __sync so that we get the actual objects back (including an actual
# nil), instead of a wrapper, as returning a wrapped nil breaks a lot of
# code.
lazy_author.__sync
end
request_cache(:author) { author_email.downcase }
 
Loading
Loading
# frozen_string_literal: true
module Gitlab
module Ci
module Pipeline
# Class for preloading data associated with pipelines such as commit
# authors.
module Preloader
def self.preload(pipelines)
# This ensures that all the pipeline commits are eager loaded before we
# start using them.
pipelines.each(&:commit)
pipelines.each do |pipeline|
# This preloads the author of every commit. We're using "lazy_author"
# here since "author" immediately loads the data on the first call.
pipeline.commit.try(:lazy_author)
# This preloads the number of warnings for every pipeline, ensuring
# that Ci::Pipeline#has_warnings? doesn't execute any additional
# queries.
pipeline.number_of_warnings
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Ci::Pipeline::Preloader do
describe '.preload' do
it 'preloads the author of every pipeline commit' do
commit = double(:commit)
pipeline = double(:pipeline, commit: commit)
expect(commit)
.to receive(:lazy_author)
expect(pipeline)
.to receive(:number_of_warnings)
described_class.preload([pipeline])
end
end
end
Loading
Loading
@@ -774,6 +774,33 @@ describe Ci::Pipeline, :mailer do
end
end
 
describe '#number_of_warnings' do
it 'returns the number of warnings' do
create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline, name: 'rubocop')
expect(pipeline.number_of_warnings).to eq(1)
end
it 'supports eager loading of the number of warnings' do
pipeline2 = create(:ci_empty_pipeline, status: :created, project: project)
create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline, name: 'rubocop')
create(:ci_build, :allowed_to_fail, :failed, pipeline: pipeline2, name: 'rubocop')
pipelines = project.pipelines.to_a
pipelines.each(&:number_of_warnings)
# To run the queries we need to actually use the lazy objects, which we do
# by just sending "to_i" to them.
amount = ActiveRecord::QueryRecorder
.new { pipelines.each { |p| p.number_of_warnings.to_i } }
.count
expect(amount).to eq(1)
end
end
shared_context 'with some outdated pipelines' do
before do
create_pipeline(:canceled, 'ref', 'A', project)
Loading
Loading
Loading
Loading
@@ -52,22 +52,98 @@ describe Commit do
end
end
 
describe '#author' do
describe '#author', :request_store do
it 'looks up the author in a case-insensitive way' do
user = create(:user, email: commit.author_email.upcase)
expect(commit.author).to eq(user)
end
 
it 'caches the author', :request_store do
it 'caches the author' do
user = create(:user, email: commit.author_email)
expect(User).to receive(:find_by_any_email).and_call_original
 
expect(commit.author).to eq(user)
key = "Commit:author:#{commit.author_email.downcase}"
expect(RequestStore.store[key]).to eq(user)
 
expect(RequestStore.store[key]).to eq(user)
expect(commit.author).to eq(user)
end
context 'using eager loading' do
let!(:alice) { create(:user, email: 'alice@example.com') }
let!(:bob) { create(:user, email: 'hunter2@example.com') }
let(:alice_commit) do
described_class.new(RepoHelpers.sample_commit, project).tap do |c|
c.author_email = 'alice@example.com'
end
end
let(:bob_commit) do
# The commit for Bob uses one of his alternative Emails, instead of the
# primary one.
described_class.new(RepoHelpers.sample_commit, project).tap do |c|
c.author_email = 'bob@example.com'
end
end
let(:eve_commit) do
described_class.new(RepoHelpers.sample_commit, project).tap do |c|
c.author_email = 'eve@example.com'
end
end
let!(:commits) { [alice_commit, bob_commit, eve_commit] }
before do
create(:email, user: bob, email: 'bob@example.com')
end
it 'executes only two SQL queries' do
recorder = ActiveRecord::QueryRecorder.new do
# Running this first ensures we don't run one query for every
# commit.
commits.each(&:lazy_author)
# This forces the execution of the SQL queries necessary to load the
# data.
commits.each { |c| c.author.try(:id) }
end
expect(recorder.count).to eq(2)
end
it "preloads the authors for Commits matching a user's primary Email" do
commits.each(&:lazy_author)
expect(alice_commit.author).to eq(alice)
end
it "preloads the authors for Commits using a User's alternative Email" do
commits.each(&:lazy_author)
expect(bob_commit.author).to eq(bob)
end
it 'sets the author to Nil if an author could not be found for a Commit' do
commits.each(&:lazy_author)
expect(eve_commit.author).to be_nil
end
it 'does not execute SQL queries once the authors are preloaded' do
commits.each(&:lazy_author)
commits.each { |c| c.author.try(:id) }
recorder = ActiveRecord::QueryRecorder.new do
alice_commit.author
bob_commit.author
eve_commit.author
end
expect(recorder.count).to be_zero
end
end
end
 
describe '#to_reference' do
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment