Skip to content
Snippets Groups Projects
Commit 03b020f2 authored by Dylan Griffith's avatar Dylan Griffith
Browse files

Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)

Previously we scheduled a worker to just some this but we were running
into performance issues when the build table was getting too large. So
now we've updated the code such that this column is updated immediately
and incremented/decremented by the correct amount whenever artifacts are
created or deleted.

We've also added the performance optimization that we do not update this
statistic if a project is deleted because it could result in many
updates for a project with many builds.
parent 947ed8b8
No related branches found
No related tags found
No related merge requests found
Loading
Loading
@@ -20,7 +20,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
 
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
Loading
Loading
@@ -95,8 +95,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) }
end
 
after_commit :update_project_statistics_after_save, on: [:create, :update]
after_commit :update_project_statistics, on: :destroy
after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
 
class << self
# This is needed for url_for to work,
Loading
Loading
@@ -664,16 +664,20 @@ module Ci
pipeline.config_processor.build_attributes(name)
end
 
def update_project_statistics
return unless project
def update_project_statistics_after_save
update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
end
 
ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
def update_project_statistics_after_destroy
update_project_statistics(-artifacts_size)
end
 
def update_project_statistics_after_save
if previous_changes.include?('artifacts_size')
update_project_statistics
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
project.pending_delete?
end
end
end
Loading
Loading
@@ -9,6 +9,8 @@ module Ci
 
before_save :update_file_store
before_save :set_size, if: :file_changed?
after_save :update_project_statistics_after_save, if: :size_changed?
after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
 
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
 
Loading
Loading
@@ -34,10 +36,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end
 
def set_size
self.size = file.size
end
def expire_in
expire_at - Time.now if expire_at
end
Loading
Loading
@@ -48,5 +46,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now
end
end
private
def set_size
self.size = file.size
end
def update_project_statistics_after_save
update_project_statistics(size.to_i - size_was.to_i)
end
def update_project_statistics_after_destroy
update_project_statistics(-self.size)
end
def update_project_statistics(difference)
ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
end
def project_destroyed?
# Use job.project to avoid extra DB query for project
job.project.pending_delete?
end
end
end
Loading
Loading
@@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base
 
before_save :update_storage_size
 
STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze
STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS
COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
 
def total_repository_size
repository_size + lfs_objects_size
end
 
def refresh!(only: nil)
STATISTICS_COLUMNS.each do |column, generator|
COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end
Loading
Loading
@@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size)
end
 
def update_build_artifacts_size
self.build_artifacts_size =
project.builds.sum(:artifacts_size) +
Ci::JobArtifact.artifacts_size_for(self.project)
def update_storage_size
self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
end
 
def update_storage_size
self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute))
def self.increment_statistic(project_id, key, amount)
raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
return if amount == 0
where(project_id: project_id)
.update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end
end
Loading
Loading
@@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
 
storage_options Gitlab.config.artifacts
 
def size
return super if model.size.nil?
def cached_size
return model.size if model.size.present? && !model.file_changed?
 
model.size
size
end
 
def store_dir
Loading
Loading
@@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage?
File.open(path, "rb") if path
else
::Gitlab::Ci::Trace::HttpIO.new(url, size) if url
::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end
end
 
Loading
Loading
---
title: Improve DB performance of calculating total artifacts size
merge_request: 17839
author:
type: performance
Loading
Loading
@@ -1384,29 +1384,51 @@ describe Ci::Build do
end
end
 
describe '#update_project_statistics' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics when the artifact size changes' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
context 'when updating the build' do
let(:build) { create(:ci_build, artifacts_size: 23) }
 
it 'updates project statistics' do
build.artifacts_size = 42
build.save!
expect(build).to receive(:update_project_statistics_after_save).and_call_original
expect { build.save! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(19)
end
 
it 'does not update project statistics when the artifact size stays the same' do
expect(ProjectCacheWorker).not_to receive(:perform_async)
context 'when the artifact size stays the same' do
it 'does not update project statistics' do
build.name = 'changed'
 
build.name = 'changed'
build.save!
expect(build).not_to receive(:update_project_statistics_after_save)
build.save!
end
end
end
 
it 'updates project statistics when the build is destroyed' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(build.project_id, [], [:build_artifacts_size])
context 'when destroying the build' do
let!(:build) { create(:ci_build, artifacts_size: 23) }
it 'updates project statistics' do
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { build.destroy! }
.to change { build.project.statistics.reload.build_artifacts_size }
.by(-23)
end
context 'when the build is destroyed due to the project being destroyed' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
 
build.destroy
build.project.update_attributes(pending_delete: true)
build.project.destroy!
end
end
end
 
Loading
Loading
require 'spec_helper'
 
describe Ci::JobArtifact do
set(:artifact) { create(:ci_job_artifact, :archive) }
let(:artifact) { create(:ci_job_artifact, :archive) }
 
describe "Associations" do
it { is_expected.to belong_to(:project) }
Loading
Loading
@@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end
end
 
describe '#set_size' do
it 'sets the size' do
context 'creating the artifact' do
let(:project) { create(:project) }
let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
it 'sets the size from the file size' do
expect(artifact.size).to eq(106365)
end
it 'updates the project statistics' do
expect { artifact }
.to change { project.statistics.reload.build_artifacts_size }
.by(106365)
end
end
context 'updating the artifact file' do
it 'updates the artifact size' do
artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
expect(artifact.size).to eq(1062)
end
it 'updates the project statistics' do
expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
.to change { artifact.project.statistics.reload.build_artifacts_size }
.by(1062 - 106365)
end
end
 
describe '#file' do
Loading
Loading
@@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil
end
end
context 'when destroying the artifact' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_pipeline, project: project) }
let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
it 'updates the project statistics' do
artifact = build.job_artifacts.first
expect(ProjectStatistics)
.to receive(:increment_statistic)
.and_call_original
expect { artifact.destroy }
.to change { project.statistics.reload.build_artifacts_size }
.by(-106365)
end
context 'when it is destroyed from the project level' do
it 'does not update the project statistics' do
expect(ProjectStatistics)
.not_to receive(:increment_statistic)
project.update_attributes(pending_delete: true)
project.destroy!
end
end
end
end
Loading
Loading
@@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project }
let(:statistics) { project.statistics }
 
describe 'constants' do
describe 'STORAGE_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
end
describe 'STATISTICS_COLUMNS' do
it 'is an array of symbols' do
expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
end
it 'includes all storage columns' do
expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
end
end
end
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) }
Loading
Loading
@@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size)
allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size)
end
 
Loading
Loading
@@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).to have_received(:update_build_artifacts_size)
end
end
 
Loading
Loading
@@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size)
expect(statistics).not_to have_received(:update_build_artifacts_size)
end
end
end
Loading
Loading
@@ -131,40 +108,6 @@ describe ProjectStatistics do
end
end
 
describe '#update_build_artifacts_size' do
let!(:pipeline) { create(:ci_pipeline, project: project) }
context 'when new job artifacts are calculated' do
let(:ci_build) { create(:ci_build, pipeline: pipeline) }
before do
create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
end
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to be(106365)
end
it 'calculates related build artifacts by project' do
expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
statistics.update_build_artifacts_size
end
end
context 'when legacy artifacts are used' do
let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
it "stores the size of related build artifacts" do
statistics.update_build_artifacts_size
expect(statistics.build_artifacts_size).to eq(10.megabytes)
end
end
end
describe '#update_storage_size' do
it "sums all storage counters" do
statistics.update!(
Loading
Loading
@@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5
end
end
describe '.increment_statistic' do
it 'increases the statistic by that amount' do
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
.to change { statistics.reload.build_artifacts_size }
.by(13)
end
context 'when the amount is 0' do
it 'does not execute a query' do
project
expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
.not_to exceed_query_limit(0)
end
end
context 'when using an invalid column' do
it 'raises an error' do
expect { described_class.increment_statistic(project.id, :id, 13) }
.to raise_error(ArgumentError, "Cannot increment attribute: id")
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment