Skip to content
Snippets Groups Projects
Unverified Commit 962222e4 authored by Francisco Javier López's avatar Francisco Javier López
Browse files

Backfill project snippet statistics

In this commit we add a background migration
to create/update project snippet statistics.

It also update the `snippets_size` in the project
and namespace statistics.
parent d32086eb
No related branches found
No related tags found
No related merge requests found
---
title: Backfill project snippet statistics
merge_request: 36444
author:
type: other
# frozen_string_literal: true
class SchedulePopulateProjectSnippetStatistics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 3.minutes
BATCH_SIZE = 2_500
MIGRATION = 'PopulateProjectSnippetStatistics'
disable_ddl_transaction!
class Snippet < ActiveRecord::Base
include EachBatch
self.table_name = 'snippets'
self.inheritance_column = :_type_disabled
end
def up
Snippet
.select(:id, :namespace_id, :project_id)
.joins('INNER JOIN projects ON projects.id = snippets.project_id')
.where(type: 'ProjectSnippet')
.order(:namespace_id, :project_id)
.each_batch(of: BATCH_SIZE) do |snippets, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [snippets.pluck(:id)])
end
end
def down
# no-op
end
end
Loading
Loading
@@ -23590,5 +23590,6 @@ COPY "schema_migrations" (version) FROM STDIN;
20200706005325
20200706170536
20200707071941
20200709101408
\.
 
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This class creates/updates those project snippets statistics
# that haven't been created nor initialized.
# It also updates the related project statistics and its root storage namespace stats
class PopulateProjectSnippetStatistics
def perform(snippet_ids)
project_snippets(snippet_ids).group_by(&:namespace_id).each do |namespace_id, namespace_snippets|
namespace_snippets.group_by(&:project).each do |project, snippets|
upsert_snippet_statistics(snippets)
project.statistics.refresh!(only: [:snippets_size])
rescue
error_message("Error updating statistics for project #{project.id}")
end
Namespaces::StatisticsRefresherService.new.execute(namespace_snippets.first.project.root_namespace)
rescue => e
error_message("Error updating statistics for namespace #{namespace_id}: #{e.message}")
end
end
private
def project_snippets(snippet_ids)
ProjectSnippet
.select('snippets.*, projects.namespace_id')
.where(id: snippet_ids)
.joins(:project)
.includes(:statistics)
.includes(snippet_repository: :shard)
.includes(project: [:route, :statistics, :namespace])
end
def upsert_snippet_statistics(snippets)
snippets.each do |snippet|
response = Snippets::UpdateStatisticsService.new(snippet).execute
error_message("#{response.message} snippet: #{snippet.id}") if response.error?
end
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Snippet Statistics Migration: #{message}")
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateProjectSnippetStatistics, :migration, schema: 20200626130220 do
let(:file_name) { 'file_name.rb' }
let(:content) { 'content' }
let(:snippets) { table(:snippets) }
let(:snippet_repositories) { table(:snippet_repositories) }
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:snippet_statistics) { table(:snippet_statistics) }
let(:project_statistics) { table(:project_statistics) }
let(:projects) { table(:projects) }
let(:user) { users.create!(id: 1, email: 'test@example.com', projects_limit: 100, username: 'test') }
let(:group1) { namespaces.create!(id: 10, type: 'Group', name: 'group1', path: 'group1') }
let(:group2) { namespaces.create!(id: 20, type: 'Group', name: 'group2', path: 'group2') }
let(:user_namespace) { namespaces.create!(id: 30, name: 'user', path: 'user', owner_id: user.id) }
# TODO create a project with a user namespace to check that the namespace stats are updated
let(:project1) { projects.create!(id: 1, name: 'test', path: 'test', namespace_id: group1.id) }
let(:project2) { projects.create!(id: 2, name: 'test1', path: 'test1', namespace_id: group2.id) }
let!(:project_stats1) { project_statistics.create!(id: 1, project_id: project1.id, namespace_id: project1.namespace_id, snippets_size: nil) }
let!(:project_stats2) { project_statistics.create!(id: 2, project_id: project2.id, namespace_id: project2.namespace_id, snippets_size: 1) }
let!(:snippet1) { snippets.create!(id: 1, type: 'ProjectSnippet', project_id: project1.id, author_id: user.id, file_name: file_name, content: content) }
let!(:snippet2) { snippets.create!(id: 2, type: 'ProjectSnippet', project_id: project1.id, author_id: user.id, file_name: file_name, content: content) }
let!(:snippet3) { snippets.create!(id: 3, type: 'ProjectSnippet', project_id: project2.id, author_id: user.id, file_name: file_name, content: content) }
let(:ids) { projects.pluck('MIN(id)', 'MAX(id)').first }
let(:migration) { described_class.new }
subject { migration.perform(*ids) }
before do
snippets.all.each do |s|
allow(s).to receive(:disk_path).and_return(disk_path(s))
TestEnv.copy_repo(s,
bare_repo: TestEnv.factory_repo_path_bare,
refs: TestEnv::BRANCH_SHA)
raw_repository(s).create_repository
end
end
after do
snippets.all.each { |s| raw_repository(s).remove }
end
# TODO create shared examples to check the namespace stats update
shared_examples 'updates the associated project statistics' do
specify do
subject
repo_size = snippet_statistics.where(snippet_id: snippets.where(project_id: project1.id)).sum(:repository_size)
expect(project_stats1.reload.snippets_size).to eq repo_size
repo_size = snippet_statistics.where(snippet_id: snippets.where(project_id: project2.id)).sum(:repository_size)
expect(project_stats2.reload.snippets_size).to eq repo_size
end
end
context 'when snippets has no snippet stastistics' do
it 'creates and initializes snippet statistics' do
expect(snippet_statistics.count).to be_zero
subject
expect(snippet_statistics.count).to eq snippets.count
# Checking that none of the values is 0 in any statistics
expect(snippet_statistics.all).to satisfy_all do |stat|
stat.attributes.except("snippet_id").values.none? { |v| v.zero? }
end
end
it_behaves_like 'updates the associated project statistics'
end
context "when snippet stats haven't been initialized" do
before do
snippets.all.each do |s|
snippet_statistics.create!(snippet_id: s.id)
end
end
it 'initializes all snippet statistics' do
expect(snippet_statistics.all).to satisfy_all do |stat|
stat.attributes.except("snippet_id").values.all? { |v| v.zero? }
end
subject
expect(snippet_statistics.count).to eq snippets.count
# Checking that none of the values is 0 in any statistics
expect(snippet_statistics.all).to satisfy_all do |stat|
stat.attributes.except("snippet_id").values.none? { |v| v.zero? }
end
end
it_behaves_like 'updates the associated project statistics'
end
# TODO Come back to these once we know how the migration is going to behave
# context 'when some snippet stats are initialized' do
# before do
# snippet_statistics.create!(snippet_id: snippet2.id, repository_size: 1)
# end
# it 'initializes only those not initialized' do
# expect(Snippets::UpdateStatisticsService).to receive(:new).with(Snippet.find(snippet1.id)).and_call_original
# expect(Snippets::UpdateStatisticsService).to receive(:new).with(Snippet.find(snippet3.id)).and_call_original
# expect(Snippets::UpdateStatisticsService).not_to receive(:new).with(Snippet.find(snippet2.id))
# subject
# end
# it_behaves_like 'updates the associated project statistics'
# end
# context 'when all snippet stats are already initialized' do
# it 'does not refresh any statistics' do
# snippets.all.each do |s|
# snippet_statistics.create!(snippet_id: s.id, repository_size: 1)
# end
# expect(Snippets::UpdateStatisticsService).not_to receive(:new)
# expect_any_instance_of(ProjectStatistics).not_to receive(:refresh!)
# subject
# end
# end
context 'when a snippet repository is empty' do
before do
raw_repository(snippet2).remove
end
it 'does not create the snippet statistics record for that snippet' do
expect(Snippets::UpdateStatisticsService).to receive(:new).exactly(3).times.and_call_original
subject
expect(snippet_statistics.count).to eq 2
expect(snippet_statistics.find_by(snippet_id: snippet2.id)).to be_nil
end
it 'logs the error' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).once
end
subject
end
end
def raw_repository(snippet)
Gitlab::Git::Repository.new('default',
"#{disk_path(snippet)}.git",
Gitlab::GlRepository::SNIPPET.identifier_for_container(snippet),
"@snippets/#{snippet.id}")
end
def hashed_repository(snippet)
Storage::Hashed.new(snippet, prefix: '@snippets')
end
def disk_path(snippet)
hashed_repository(snippet).disk_path
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment