Skip to content
Snippets Groups Projects
Commit 008120f8 authored by Douwe Maan's avatar Douwe Maan
Browse files

Merge branch '41777-include-cycle-time-in-usage-ping' into 'master'

Resolve "Include cycle time in usage ping"

Closes #41777

See merge request gitlab-org/gitlab-ce!16793
parents ccb080d9 522f4b2c
No related branches found
No related tags found
No related merge requests found
Showing
with 407 additions and 88 deletions
Loading
Loading
@@ -6,6 +6,12 @@ class CycleAnalytics
@options = options
end
 
def all_medians_per_stage
STAGES.each_with_object({}) do |stage_name, medians_per_stage|
medians_per_stage[stage_name] = self[stage_name].median
end
end
def summary
@summary ||= ::Gitlab::CycleAnalytics::StageSummary.new(@project,
from: @options[:from],
Loading
Loading
Loading
Loading
@@ -7,6 +7,7 @@ class AnalyticsStageEntity < Grape::Entity
expose :description
 
expose :median, as: :value do |stage|
stage.median && !stage.median.zero? ? distance_of_time_in_words(stage.median) : nil
# median returns a BatchLoader instance which we first have to unwrap by using to_i
!stage.median.to_i.zero? ? distance_of_time_in_words(stage.median) : nil
end
end
---
title: Include cycle time in usage ping data
merge_request: 16973
author:
type: added
Loading
Loading
@@ -8,13 +8,14 @@ module Gitlab
private
 
def base_query
@base_query ||= stage_query
@base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
 
def stage_query
def stage_query(project_ids)
query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id]))
.join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id]))
.where(issue_table[:project_id].eq(@project.id)) # rubocop:disable Gitlab/ModuleWithInstanceVariables
.project(issue_table[:project_id].as("project_id"))
.where(issue_table[:project_id].in(project_ids))
.where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
 
# Load merge_requests
Loading
Loading
Loading
Loading
@@ -21,17 +21,28 @@ module Gitlab
end
 
def median
cte_table = Arel::Table.new("cte_table_for_#{name}")
BatchLoader.for(@project.id).batch(key: name) do |project_ids, loader|
cte_table = Arel::Table.new("cte_table_for_#{name}")
 
# Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time).
# Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time).
# We compute the (end_time - start_time) interval, and give it an alias based on the current
# cycle analytics stage.
interval_query = Arel::Nodes::As.new(
cte_table,
subtract_datetimes(base_query.dup, start_time_attrs, end_time_attrs, name.to_s))
# Build a `SELECT` query. We find the first of the `end_time_attrs` that isn't `NULL` (call this end_time).
# Next, we find the first of the start_time_attrs that isn't `NULL` (call this start_time).
# We compute the (end_time - start_time) interval, and give it an alias based on the current
# cycle analytics stage.
interval_query = Arel::Nodes::As.new(cte_table,
subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s))
 
median_datetime(cte_table, interval_query, name)
if project_ids.one?
loader.call(@project.id, median_datetime(cte_table, interval_query, name))
else
begin
median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
loader.call(project_id, median)
end
rescue NotSupportedError
{}
end
end
end
end
 
def name
Loading
Loading
module Gitlab
module CycleAnalytics
module ProductionHelper
def stage_query
super
def stage_query(project_ids)
super(project_ids)
.where(mr_metrics_table[:first_deployed_to_production_at]
.gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
Loading
Loading
Loading
Loading
@@ -25,11 +25,11 @@ module Gitlab
_("Total test time for all commits/merges")
end
 
def stage_query
def stage_query(project_ids)
if @options[:branch]
super.where(build_table[:ref].eq(@options[:branch]))
super(project_ids).where(build_table[:ref].eq(@options[:branch]))
else
super
super(project_ids)
end
end
end
Loading
Loading
module Gitlab
module CycleAnalytics
class UsageData
PROJECTS_LIMIT = 10
attr_reader :projects, :options
def initialize
@projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT)
@options = { from: 7.days.ago }
end
def to_json
total = 0
values =
medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh|
calculations = stage_values(medians)
total += calculations.values.compact.sum
hsh[stage_name] = calculations
end
values[:total] = total
{ avg_cycle_analytics: values }
end
private
def medians_per_stage
projects.each_with_object({}) do |project, hsh|
::CycleAnalytics.new(project, options).all_medians_per_stage.each do |stage_name, median|
hsh[stage_name] ||= []
hsh[stage_name] << median
end
end
end
def stage_values(medians)
medians = medians.map(&:presence).compact
average = calc_average(medians)
{
average: average,
sd: standard_deviation(medians, average),
missing: projects.length - medians.length
}
end
def calc_average(values)
return if values.empty?
(values.sum / values.length).to_i
end
def standard_deviation(values, average)
Math.sqrt(sample_variance(values, average)).to_i
end
def sample_variance(values, average)
return 0 if values.length <= 1
sum = values.inject(0) do |acc, val|
acc + (val - average)**2
end
sum / (values.length - 1)
end
end
end
end
Loading
Loading
@@ -2,18 +2,14 @@
module Gitlab
module Database
module Median
NotSupportedError = Class.new(StandardError)
def median_datetime(arel_table, query_so_far, column_sym)
median_queries =
if Gitlab::Database.postgresql?
pg_median_datetime_sql(arel_table, query_so_far, column_sym)
elsif Gitlab::Database.mysql?
mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
end
results = Array.wrap(median_queries).map do |query|
ActiveRecord::Base.connection.execute(query)
end
extract_median(results).presence
extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence
end
def median_datetimes(arel_table, query_so_far, column_sym, partition_column)
extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence
end
 
def extract_median(results)
Loading
Loading
@@ -21,13 +17,21 @@ module Gitlab
 
if Gitlab::Database.postgresql?
result = result.first.presence
median = result['median'] if result
median.to_f if median
result['median']&.to_f if result
elsif Gitlab::Database.mysql?
result.to_a.flatten.first
end
end
 
def extract_medians(results)
median_values = results.compact.first.values
median_values.each_with_object({}) do |(id, median), hash|
hash[id.to_i] = median&.to_f
end
end
def mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
query = arel_table
.from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name))
Loading
Loading
@@ -53,7 +57,7 @@ module Gitlab
]
end
 
def pg_median_datetime_sql(arel_table, query_so_far, column_sym)
def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
Loading
Loading
@@ -64,41 +68,107 @@ module Gitlab
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
#
# If a partition column is used we will do the same operation but for separate partitions,
# when that happens the CTE might look like this:
#
# project_id | star_count | row_id | ct
# ------------+------------+--------+----
# 1 | 5 | 1 | 2
# 1 | 9 | 2 | 2
# 2 | 10 | 1 | 3
# 2 | 15 | 2 | 3
# 2 | 20 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(
cte_table,
arel_table
.project(
arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
arel_table.project("COUNT(1)").as('ct')).
arel_table.project(*rank_rows(arel_table, column_sym, partition_column)).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
 
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table.project(average([extract_epoch(cte_table[column_sym])], "median"))
.where(
Arel::Nodes::Between.new(
cte_table[:row_id],
Arel::Nodes::And.new(
[(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]
result =
cte_table
.project(*median_projections(cte_table, column_sym, partition_column))
.where(
Arel::Nodes::Between.new(
cte_table[:row_id],
Arel::Nodes::And.new(
[(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]
)
)
)
)
.with(query_so_far, cte)
.to_sql
.with(query_so_far, cte)
result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column
result.to_sql
end
 
private
 
def median_queries(arel_table, query_so_far, column_sym, partition_column = nil)
if Gitlab::Database.postgresql?
pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
elsif Gitlab::Database.mysql?
raise NotSupportedError, "partition_column is not supported for MySQL" if partition_column
mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
end
end
def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil)
queries = median_queries(arel_table, query_so_far, column_sym, partition_column)
Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) }
end
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
 
def rank_rows(arel_table, column_sym, partition_column)
column_row = arel_table[column_sym].as(column_sym.to_s)
if partition_column
partition_row = arel_table[partition_column]
row_id =
Arel::Nodes::Over.new(
Arel::Nodes::NamedFunction.new('rank', []),
Arel::Nodes::Window.new.partition(arel_table[partition_column])
.order(arel_table[column_sym])
).as('row_id')
count = arel_table.from(arel_table.alias)
.project('COUNT(*)')
.where(arel_table[partition_column].eq(arel_table.alias[partition_column]))
.as('ct')
[partition_row, column_row, row_id, count]
else
row_id =
Arel::Nodes::Over.new(
Arel::Nodes::NamedFunction.new('row_number', []),
Arel::Nodes::Window.new.order(arel_table[column_sym])
).as('row_id')
count = arel_table.project("COUNT(1)").as('ct')
[column_row, row_id, count]
end
end
def median_projections(table, column_sym, partition_column)
projections = []
projections << table[partition_column] if partition_column
projections << average([extract_epoch(table[column_sym])], "median")
projections
end
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
Loading
Loading
Loading
Loading
@@ -9,6 +9,7 @@ module Gitlab
license_usage_data.merge(system_usage_data)
.merge(features_usage_data)
.merge(components_usage_data)
.merge(cycle_analytics_usage_data)
end
 
def to_json(force_refresh: false)
Loading
Loading
@@ -71,6 +72,10 @@ module Gitlab
}
end
 
def cycle_analytics_usage_data
Gitlab::CycleAnalytics::UsageData.new.to_json
end
def features_usage_data
features_usage_data_ce
end
Loading
Loading
Loading
Loading
@@ -27,7 +27,7 @@ describe Projects::CycleAnalyticsController do
milestone = create(:milestone, project: project, created_at: 5.days.ago)
issue.update(milestone: milestone)
 
create_merge_request_closing_issue(issue)
create_merge_request_closing_issue(user, project, issue)
end
 
it 'is false' do
Loading
Loading
Loading
Loading
@@ -6,7 +6,7 @@ feature 'Cycle Analytics', :js do
let(:project) { create(:project, :repository) }
let(:issue) { create(:issue, project: project, created_at: 2.days.ago) }
let(:milestone) { create(:milestone, project: project) }
let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") }
let(:mr) { create_merge_request_closing_issue(user, project, issue, commit_message: "References #{issue.to_reference}") }
let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) }
 
context 'as an allowed user' do
Loading
Loading
@@ -41,8 +41,8 @@ feature 'Cycle Analytics', :js do
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
project.add_master(user)
 
create_cycle
deploy_master
@build = create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master(user, project)
 
sign_in(user)
visit project_cycle_analytics_path(project)
Loading
Loading
@@ -117,8 +117,8 @@ feature 'Cycle Analytics', :js do
project.add_guest(guest)
 
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
create_cycle
deploy_master
create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master(user, project)
 
sign_in(guest)
visit project_cycle_analytics_path(project)
Loading
Loading
@@ -166,16 +166,6 @@ feature 'Cycle Analytics', :js do
expect(find('.stage-events')).to have_content("!#{mr.iid}")
end
 
def create_cycle
issue.update(milestone: milestone)
pipeline.run
@build = create(:ci_build, pipeline: pipeline, status: :success, author: user)
merge_merge_requests_closing_issue(issue)
ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
end
def click_stage(stage_name)
find('.stage-nav li', text: stage_name).click
wait_for_requests
Loading
Loading
Loading
Loading
@@ -41,7 +41,7 @@ describe Gitlab::CycleAnalytics::BaseEventFetcher do
milestone = create(:milestone, project: project)
 
issue.update(milestone: milestone)
create_merge_request_closing_issue(issue)
create_merge_request_closing_issue(user, project, issue)
end
end
end
Loading
Loading
@@ -236,8 +236,8 @@ describe 'cycle analytics events' do
pipeline.run!
pipeline.succeed!
 
merge_merge_requests_closing_issue(context)
deploy_master
merge_merge_requests_closing_issue(user, project, context)
deploy_master(user, project)
end
 
it 'has the name' do
Loading
Loading
@@ -294,8 +294,8 @@ describe 'cycle analytics events' do
let!(:context) { create(:issue, project: project, created_at: 2.days.ago) }
 
before do
merge_merge_requests_closing_issue(context)
deploy_master
merge_merge_requests_closing_issue(user, project, context)
deploy_master(user, project)
end
 
it 'has the total time' do
Loading
Loading
@@ -334,7 +334,7 @@ describe 'cycle analytics events' do
def setup(context)
milestone = create(:milestone, project: project)
context.update(milestone: milestone)
mr = create_merge_request_closing_issue(context, commit_message: "References #{context.to_reference}")
mr = create_merge_request_closing_issue(user, project, context, commit_message: "References #{context.to_reference}")
 
ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
end
Loading
Loading
require 'spec_helper'
describe Gitlab::CycleAnalytics::UsageData do
describe '#to_json' do
before do
Timecop.freeze do
user = create(:user, :admin)
projects = create_list(:project, 2, :repository)
projects.each_with_index do |project, time|
issue = create(:issue, project: project, created_at: (time + 1).hour.ago)
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
milestone = create(:milestone, project: project)
mr = create_merge_request_closing_issue(user, project, issue, commit_message: "References #{issue.to_reference}")
pipeline = create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr)
create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master(user, project, environment: 'staging')
deploy_master(user, project)
end
end
end
shared_examples 'a valid usage data result' do
it 'returns the aggregated usage data of every selected project' do
result = subject.to_json
expect(result).to have_key(:avg_cycle_analytics)
CycleAnalytics::STAGES.each do |stage|
expect(result[:avg_cycle_analytics]).to have_key(stage)
stage_values = result[:avg_cycle_analytics][stage]
expected_values = expect_values_per_stage[stage]
expected_values.each_pair do |op, value|
expect(stage_values).to have_key(op)
if op == :missing
expect(stage_values[op]).to eq(value)
else
# delta is used because of git timings that Timecop does not stub
expect(stage_values[op].to_i).to be_within(5).of(value.to_i)
end
end
end
end
end
context 'when using postgresql', :postgresql do
let(:expect_values_per_stage) do
{
issue: {
average: 5400,
sd: 2545,
missing: 0
},
plan: {
average: 2,
sd: 2,
missing: 0
},
code: {
average: nil,
sd: 0,
missing: 2
},
test: {
average: nil,
sd: 0,
missing: 2
},
review: {
average: 0,
sd: 0,
missing: 0
},
staging: {
average: 0,
sd: 0,
missing: 0
},
production: {
average: 5400,
sd: 2545,
missing: 0
}
}
end
it_behaves_like 'a valid usage data result'
end
context 'when using mysql', :mysql do
let(:expect_values_per_stage) do
{
issue: {
average: nil,
sd: 0,
missing: 2
},
plan: {
average: nil,
sd: 0,
missing: 2
},
code: {
average: nil,
sd: 0,
missing: 2
},
test: {
average: nil,
sd: 0,
missing: 2
},
review: {
average: nil,
sd: 0,
missing: 2
},
staging: {
average: nil,
sd: 0,
missing: 2
},
production: {
average: nil,
sd: 0,
missing: 2
}
}
end
it_behaves_like 'a valid usage data result'
end
end
end
require 'spec_helper'
describe Gitlab::Database::Median do
let(:dummy_class) do
Class.new do
include Gitlab::Database::Median
end
end
subject(:median) { dummy_class.new }
describe '#median_datetimes' do
it 'raises NotSupportedError', :mysql do
expect { median.median_datetimes(nil, nil, nil, :project_id) }.to raise_error(dummy_class::NotSupportedError, "partition_column is not supported for MySQL")
end
end
end
Loading
Loading
@@ -36,6 +36,7 @@ describe Gitlab::UsageData do
gitlab_shared_runners
git
database
avg_cycle_analytics
))
end
 
Loading
Loading
Loading
Loading
@@ -18,11 +18,11 @@ describe 'CycleAnalytics#code' do
end]],
end_time_conditions: [["merge request that closes issue is created",
-> (context, data) do
context.create_merge_request_closing_issue(data[:issue])
context.create_merge_request_closing_issue(context.user, context.project, data[:issue])
end]],
post_fn: -> (context, data) do
context.merge_merge_requests_closing_issue(data[:issue])
context.deploy_master
context.merge_merge_requests_closing_issue(context.user, context.project, data[:issue])
context.deploy_master(context.user, context.project)
end)
 
context "when a regular merge request (that doesn't close the issue) is created" do
Loading
Loading
@@ -30,10 +30,10 @@ describe 'CycleAnalytics#code' do
issue = create(:issue, project: project)
 
create_commit_referencing_issue(issue)
create_merge_request_closing_issue(issue, message: "Closes nothing")
create_merge_request_closing_issue(user, project, issue, message: "Closes nothing")
 
merge_merge_requests_closing_issue(issue)
deploy_master
merge_merge_requests_closing_issue(user, project, issue)
deploy_master(user, project)
 
expect(subject[:code].median).to be_nil
end
Loading
Loading
@@ -50,10 +50,10 @@ describe 'CycleAnalytics#code' do
end]],
end_time_conditions: [["merge request that closes issue is created",
-> (context, data) do
context.create_merge_request_closing_issue(data[:issue])
context.create_merge_request_closing_issue(context.user, context.project, data[:issue])
end]],
post_fn: -> (context, data) do
context.merge_merge_requests_closing_issue(data[:issue])
context.merge_merge_requests_closing_issue(context.user, context.project, data[:issue])
end)
 
context "when a regular merge request (that doesn't close the issue) is created" do
Loading
Loading
@@ -61,9 +61,9 @@ describe 'CycleAnalytics#code' do
issue = create(:issue, project: project)
 
create_commit_referencing_issue(issue)
create_merge_request_closing_issue(issue, message: "Closes nothing")
create_merge_request_closing_issue(user, project, issue, message: "Closes nothing")
 
merge_merge_requests_closing_issue(issue)
merge_merge_requests_closing_issue(user, project, issue)
 
expect(subject[:code].median).to be_nil
end
Loading
Loading
Loading
Loading
@@ -26,8 +26,8 @@ describe 'CycleAnalytics#issue' do
end]],
post_fn: -> (context, data) do
if data[:issue].persisted?
context.create_merge_request_closing_issue(data[:issue].reload)
context.merge_merge_requests_closing_issue(data[:issue])
context.create_merge_request_closing_issue(context.user, context.project, data[:issue].reload)
context.merge_merge_requests_closing_issue(context.user, context.project, data[:issue])
end
end)
 
Loading
Loading
@@ -37,8 +37,8 @@ describe 'CycleAnalytics#issue' do
issue = create(:issue, project: project)
issue.update(label_ids: [regular_label.id])
 
create_merge_request_closing_issue(issue)
merge_merge_requests_closing_issue(issue)
create_merge_request_closing_issue(user, project, issue)
merge_merge_requests_closing_issue(user, project, issue)
 
expect(subject[:issue].median).to be_nil
end
Loading
Loading
Loading
Loading
@@ -29,8 +29,8 @@ describe 'CycleAnalytics#plan' do
context.create_commit_referencing_issue(data[:issue], branch_name: data[:branch_name])
end]],
post_fn: -> (context, data) do
context.create_merge_request_closing_issue(data[:issue], source_branch: data[:branch_name])
context.merge_merge_requests_closing_issue(data[:issue])
context.create_merge_request_closing_issue(context.user, context.project, data[:issue], source_branch: data[:branch_name])
context.merge_merge_requests_closing_issue(context.user, context.project, data[:issue])
end)
 
context "when a regular label (instead of a list label) is added to the issue" do
Loading
Loading
@@ -41,8 +41,8 @@ describe 'CycleAnalytics#plan' do
issue.update(label_ids: [label.id])
create_commit_referencing_issue(issue, branch_name: branch_name)
 
create_merge_request_closing_issue(issue, source_branch: branch_name)
merge_merge_requests_closing_issue(issue)
create_merge_request_closing_issue(user, project, issue, source_branch: branch_name)
merge_merge_requests_closing_issue(user, project, issue)
 
expect(subject[:issue].median).to be_nil
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment