Skip to content
Snippets Groups Projects
Unverified Commit f9565e30 authored by Zeger-Jan van de Weg's avatar Zeger-Jan van de Weg
Browse files

Batchload blobs for diff generation

After installing a new gem, batch-loader, a construct can be used to
queue data to be fetched in bulk. The gem was also introduced in both
gitlab-org/gitlab-ce!14680 and gitlab-org/gitlab-ce!14846, but those mrs
are not merged yet.

For the generation of diffs, both the old blob and the new blob need to
be loaded. This for every file in the diff, too. Now we collect all
these so we do 1 fetch. Three `.allow_n_plus_1_calls` have been removed,
which I expect to be valid, but this needs to be confirmed by a full CI
run.

Possibly closes:
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
- https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
parent 6dd89059
No related branches found
No related tags found
No related merge requests found
Showing
with 86 additions and 56 deletions
Loading
Loading
@@ -263,6 +263,8 @@ gem 'gettext_i18n_rails', '~> 1.8.0'
gem 'gettext_i18n_rails_js', '~> 1.2.0'
gem 'gettext', '~> 3.2.2', require: false, group: :development
 
gem 'batch-loader'
# Perf bar
gem 'peek', '~> 1.0.1'
gem 'peek-gc', '~> 0.0.2'
Loading
Loading
Loading
Loading
@@ -73,6 +73,7 @@ GEM
thread_safe (~> 0.3, >= 0.3.1)
babosa (1.0.2)
base32 (0.3.2)
batch-loader (1.1.1)
bcrypt (3.1.11)
bcrypt_pbkdf (1.0.0)
benchmark-ips (2.3.0)
Loading
Loading
@@ -982,6 +983,7 @@ DEPENDENCIES
awesome_print (~> 1.2.0)
babosa (~> 1.0.2)
base32 (~> 0.3.0)
batch-loader
bcrypt_pbkdf (~> 1.0)
benchmark-ips (~> 2.3.0)
better_errors (~> 2.1.0)
Loading
Loading
Loading
Loading
@@ -22,12 +22,7 @@ class Projects::CommitController < Projects::ApplicationController
apply_diff_view_cookie!
 
respond_to do |format|
format.html do
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37599
Gitlab::GitalyClient.allow_n_plus_1_calls do
render
end
end
format.html { render }
format.diff { render text: @commit.to_diff }
format.patch { render text: @commit.to_patch }
end
Loading
Loading
@@ -112,7 +107,7 @@ class Projects::CommitController < Projects::ApplicationController
end
 
def commit
@noteable = @commit ||= @project.commit(params[:id])
@noteable = @commit ||= @project.commit_by(oid: params[:id])
end
 
def define_commit_vars
Loading
Loading
Loading
Loading
@@ -10,10 +10,7 @@ class Projects::MergeRequests::DiffsController < Projects::MergeRequests::Applic
def show
@environment = @merge_request.environments_for(current_user).last
 
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37431
Gitlab::GitalyClient.allow_n_plus_1_calls do
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
render json: { html: view_to_html_string("projects/merge_requests/diffs/_diffs") }
end
 
def diff_for_path
Loading
Loading
Loading
Loading
@@ -76,12 +76,24 @@ class Blob < SimpleDelegator
new(blob, project)
end
 
def self.lazy(project, commit_id, path)
BatchLoader.for(commit_id: commit_id, path: path).batch do |items, loader|
project.repository.blobs_at(items.map(&:values)).each do |blob|
loader.call({ commit_id: blob.commit_id, path: blob.path }, blob) if blob
end
end
end
def initialize(blob, project = nil)
@project = project
 
super(blob)
end
 
def inspect
"#<#{self.class.name} oid:#{id[0..8]} commit:#{commit_id[0..8]} path:#{path}>"
end
# Returns the data of the blob.
#
# If the blob is a text based blob the content is converted to UTF-8 and any
Loading
Loading
@@ -95,7 +107,10 @@ class Blob < SimpleDelegator
end
 
def load_all_data!
super(project.repository) if project
# Endpoint needed: gitlab-org/gitaly#756
Gitlab::GitalyClient.allow_n_plus_1_calls do
super(project.repository) if project
end
end
 
def no_highlighting?
Loading
Loading
Loading
Loading
@@ -84,7 +84,7 @@ class Commit
end
 
def id
@raw.id
raw.id
end
 
def ==(other)
Loading
Loading
@@ -361,7 +361,7 @@ class Commit
@deltas ||= raw.deltas
end
 
def diffs(diff_options = nil)
def diffs(diff_options = {})
Gitlab::Diff::FileCollection::Commit.new(self, diff_options: diff_options)
end
 
Loading
Loading
Loading
Loading
@@ -478,6 +478,11 @@ class Repository
nil
end
 
# items is an Array like: [[oid, path], [oid1, path1]]
def blobs_at(items)
raw_repository.batch_blobs(items).map { |blob| Blob.decorate(blob, project) }
end
def root_ref
if raw_repository
raw_repository.root_ref
Loading
Loading
---
title: Fetch blobs in bulk when generating diffs
merge_request:
author:
type: performance
Rails.application.config.middleware.use(BatchLoader::Middleware)
Loading
Loading
@@ -25,6 +25,10 @@ module Gitlab
@repository = repository
@diff_refs = diff_refs
@fallback_diff_refs = fallback_diff_refs
# Ensure items are collected in the the batch
new_blob
old_blob
end
 
def position(position_marker, position_type: :text)
Loading
Loading
@@ -95,21 +99,15 @@ module Gitlab
end
 
def new_blob
return @new_blob if defined?(@new_blob)
sha = new_content_sha
return @new_blob = nil unless sha
return unless new_content_sha
 
@new_blob = repository.blob_at(sha, file_path)
Blob.lazy(repository.project, new_content_sha, file_path)
end
 
def old_blob
return @old_blob if defined?(@old_blob)
sha = old_content_sha
return @old_blob = nil unless sha
return unless old_content_sha
 
@old_blob = repository.blob_at(sha, old_path)
Blob.lazy(repository.project, old_content_sha, old_path)
end
 
def content_sha
Loading
Loading
Loading
Loading
@@ -22,10 +22,7 @@ module Gitlab
end
 
def diff_files
# n+1: https://gitlab.com/gitlab-org/gitlab-ce/issues/37445
Gitlab::GitalyClient.allow_n_plus_1_calls do
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
@diff_files ||= @diffs.decorate! { |diff| decorate_diff!(diff) }
end
 
def diff_file_with_old_path(old_path)
Loading
Loading
Loading
Loading
@@ -179,6 +179,8 @@ module Gitlab
)
end
end
rescue Rugged::ReferenceError
nil
end
 
def rugged_raw(repository, sha, limit:)
Loading
Loading
Loading
Loading
@@ -1161,6 +1161,11 @@ module Gitlab
Gitlab::Git::Blob.find(self, sha, path) unless Gitlab::Git.blank_ref?(sha)
end
 
# Items should be of format [[commit_id, path], [commit_id1, path1]]
def batch_blobs(items, blob_size_limit: nil)
Gitlab::Git::Blob.batch(self, items, blob_size_limit: blob_size_limit)
end
def commit_index(user, branch_name, index, options)
committer = user_to_committer(user)
 
Loading
Loading
require 'spec_helper'
 
describe Projects::CommitController do
let(:project) { create(:project, :repository) }
let(:user) { create(:user) }
set(:project) { create(:project, :repository) }
set(:user) { create(:user) }
let(:commit) { project.commit("master") }
let(:master_pickable_sha) { '7d3b0f7cff5f37573aea97cebfd5692ea1689924' }
let(:master_pickable_commit) { project.commit(master_pickable_sha) }
 
before do
sign_in(user)
project.team << [user, :master]
project.add_master(user)
end
 
describe 'GET show' do
Loading
Loading
Loading
Loading
@@ -116,12 +116,8 @@ describe Gitlab::Diff::File do
end
 
context 'when renamed' do
let(:commit) { project.commit('6907208d755b60ebeacb2e9dfea74c92c3449a1f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/js/commit.coffee') }
before do
allow(diff_file.new_blob).to receive(:id).and_return(diff_file.old_blob.id)
end
let(:commit) { project.commit('94bb47ca1297b7b3731ff2a36923640991e9236f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('CHANGELOG.md') }
 
it 'returns false' do
expect(diff_file.content_changed?).to be_falsey
Loading
Loading
Loading
Loading
@@ -16,6 +16,23 @@ describe Blob do
end
end
 
describe '.lazy' do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: 'e63f41fe459e62e1228fcef60d7189127aeba95a') }
it 'fetches all blobs when the first is accessed' do
changelog = described_class.lazy(project, commit.id, 'CHANGELOG')
contributing = described_class.lazy(project, commit.id, 'CONTRIBUTING.md')
expect(Gitlab::Git::Blob).to receive(:batch).once.and_call_original
expect(Gitlab::Git::Blob).not_to receive(:find)
# Access property so the values are loaded
changelog.id
contributing.id
end
end
describe '#data' do
context 'using a binary blob' do
it 'returns the data as-is' do
Loading
Loading
Loading
Loading
@@ -32,10 +32,8 @@ describe DiffViewer::Base do
end
 
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(false)
allow(diff_file.new_blob).to receive(:binary?).and_return(false)
end
let(:commit) { project.commit_by(oid: 'ae73cb07c9eeaf35924a10f713b364d32b2dd34f') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('Gemfile.zip') }
 
it 'returns false' do
expect(viewer_class.can_render?(diff_file)).to be_falsey
Loading
Loading
@@ -60,8 +58,7 @@ describe DiffViewer::Base do
 
context 'when the binaryness does not match' do
before do
allow(diff_file.old_blob).to receive(:binary?).and_return(true)
allow(diff_file.new_blob).to receive(:binary?).and_return(true)
allow_any_instance_of(Blob).to receive(:binary?).and_return(true)
end
 
it 'returns false' do
Loading
Loading
@@ -77,12 +74,12 @@ describe DiffViewer::Base do
end
 
context 'when the file was renamed and only the old blob is supported' do
let(:commit) { project.commit('2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:commit) { project.commit_by(oid: '2f63565e7aac07bcdadb654e253078b727143ec4') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/images/6049019_460s.jpg') }
 
before do
allow(diff_file).to receive(:renamed_file?).and_return(true)
allow(diff_file.new_blob).to receive(:extension).and_return('jpeg')
viewer_class.extensions = %w(notjpg)
end
 
it 'returns false' do
Loading
Loading
@@ -94,8 +91,7 @@ describe DiffViewer::Base do
describe '#collapsed?' do
context 'when the combined blob size is larger than the collapse limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(512.kilobytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(513.kilobytes)
allow(diff_file).to receive(:raw_size).and_return(1025.kilobytes)
end
 
it 'returns true' do
Loading
Loading
@@ -113,8 +109,7 @@ describe DiffViewer::Base do
describe '#too_large?' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
 
it 'returns true' do
Loading
Loading
@@ -132,8 +127,7 @@ describe DiffViewer::Base do
describe '#render_error' do
context 'when the combined blob size is larger than the size limit' do
before do
allow(diff_file.old_blob).to receive(:raw_size).and_return(2.megabytes)
allow(diff_file.new_blob).to receive(:raw_size).and_return(4.megabytes)
allow(diff_file).to receive(:raw_size).and_return(6.megabytes)
end
 
it 'returns :too_large' do
Loading
Loading
require 'spec_helper'
 
describe DiffViewer::ServerSide do
let(:project) { create(:project, :repository) }
let(:commit) { project.commit('570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
set(:project) { create(:project, :repository) }
let(:commit) { project.commit_by(oid: '570e7b2abdd848b95f2f578043fc23bd6f6fd24d') }
let!(:diff_file) { commit.diffs.diff_file_with_new_path('files/ruby/popen.rb') }
 
let(:viewer_class) do
Class.new(DiffViewer::Base) do
Loading
Loading
@@ -15,8 +15,7 @@ describe DiffViewer::ServerSide do
 
describe '#prepare!' do
it 'loads all diff file data' do
expect(diff_file.old_blob).to receive(:load_all_data!)
expect(diff_file.new_blob).to receive(:load_all_data!)
expect(Blob).to receive(:lazy).at_least(:twice)
 
subject.prepare!
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment