Skip to content
Snippets Groups Projects
Commit 335ee79a authored by Tiago Botelho's avatar Tiago Botelho
Browse files

Refactors median code to work with both single and multiple projects

parent 3f31da9c
No related branches found
No related tags found
No related merge requests found
Showing with 236 additions and 88 deletions
Loading
Loading
@@ -7,8 +7,8 @@ class CycleAnalytics
end
 
def all_medians_per_stage
STAGES.each_with_object({}) do |stage_name, hsh|
hsh[stage_name] = self[stage_name].median
STAGES.each_with_object({}) do |stage_name, medians_per_stage|
medians_per_stage[stage_name] = self[stage_name].median
end
end
 
Loading
Loading
Loading
Loading
@@ -7,6 +7,8 @@ class AnalyticsStageEntity < Grape::Entity
expose :description
 
expose :median, as: :value do |stage|
distance_of_time_in_words(stage.median) if stage.median && !(stage.median.blank? || stage.median.zero?)
if stage.median && !(stage.median.nil? || stage.median.zero?)
distance_of_time_in_words(stage.median)
end
end
end
Loading
Loading
@@ -8,14 +8,14 @@ module Gitlab
private
 
def base_query
@base_query ||= stage_query([@project.id]) # rubocop:disable Gitlab/ModuleWithInstanceVariables
@base_query ||= stage_query(@project.id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
 
def stage_query(project_ids)
query = mr_closing_issues_table.join(issue_table).on(issue_table[:id].eq(mr_closing_issues_table[:issue_id]))
.join(issue_metrics_table).on(issue_table[:id].eq(issue_metrics_table[:issue_id]))
.project(issue_table[:project_id].as("project_id"))
.where(issue_table[:project_id].in(project_ids))
.where(issue_table[:project_id].in(Array(project_ids)))
.where(issue_table[:created_at].gteq(@options[:from])) # rubocop:disable Gitlab/ModuleWithInstanceVariables
 
# Load merge_requests
Loading
Loading
Loading
Loading
@@ -31,8 +31,12 @@ module Gitlab
interval_query = Arel::Nodes::As.new(cte_table,
subtract_datetimes(stage_query(project_ids), start_time_attrs, end_time_attrs, name.to_s))
 
median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
loader.call(project_id, median)
if project_ids.size == 1
loader.call(@project.id, median_datetime(cte_table, interval_query, name))
else
median_datetimes(cte_table, interval_query, name, :project_id)&.each do |project_id, median|
loader.call(project_id, median)
end
end
end
end
Loading
Loading
Loading
Loading
@@ -5,27 +5,21 @@ module Gitlab
 
attr_reader :projects, :options
 
def initialize(projects, options)
@projects = projects
@options = options
def initialize
@projects = Project.sorted_by_activity.limit(PROJECTS_LIMIT)
@options = { from: 7.days.ago }
end
 
def to_json
total = 0
values = {}
 
medians_per_stage.each do |stage_name, medians|
medians = medians.map(&:presence).compact
values =
medians_per_stage.each_with_object({}) do |(stage_name, medians), hsh|
calculations = stage_values(medians)
 
stage_values = {
average: calc_average(medians),
sd: standard_deviation(medians),
missing: projects.length - medians.length
}
total += stage_values.values.compact.sum
values[stage_name] = stage_values
end
total += calculations.values.compact.sum
hsh[stage_name] = calculations
end
 
values[:total] = total
 
Loading
Loading
@@ -43,26 +37,36 @@ module Gitlab
end
end
 
def stage_values(medians)
medians = medians.map(&:presence).compact
average = calc_average(medians)
{
average: average,
sd: standard_deviation(medians, average),
missing: projects.length - medians.length
}
end
def calc_average(values)
return if values.empty?
 
(values.sum / values.length).to_i
end
 
def sample_variance(values)
def standard_deviation(values, average)
Math.sqrt(sample_variance(values, average)).to_i
end
def sample_variance(values, average)
return 0 if values.length <= 1
 
avg = calc_average(values)
sum = values.inject(0) do |acc, val|
acc + (val - avg)**2
acc + (val - average)**2
end
 
sum / (values.length - 1)
end
def standard_deviation(values)
Math.sqrt(sample_variance(values)).to_i
end
end
end
end
Loading
Loading
@@ -2,32 +2,34 @@
module Gitlab
module Database
module Median
def median_datetime(arel_table, query_so_far, column_sym)
extract_median(execute_queries(arel_table, query_so_far, column_sym)).presence
end
def median_datetimes(arel_table, query_so_far, column_sym, partition_column)
median_queries =
if Gitlab::Database.postgresql?
pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
elsif Gitlab::Database.mysql?
mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
end
results = Array.wrap(median_queries).map do |query|
ActiveRecord::Base.connection.execute(query)
end
extract_medians(results).presence
extract_medians(execute_queries(arel_table, query_so_far, column_sym, partition_column)).presence
end
 
def extract_medians(results)
def extract_median(results)
result = results.compact.first
 
if Gitlab::Database.postgresql?
result.values.map do |id, median|
[id.to_i, median&.to_f]
end.to_h
result = result.first.presence
result['median']&.to_f if result
elsif Gitlab::Database.mysql?
result.to_a.flatten.first
end
end
 
def extract_medians(results)
return {} if Gitlab::Database.mysql?
results.compact.first.values.map do |id, median|
[id.to_i, median&.to_f]
end.to_h
end
def mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
query = arel_table
.from(arel_table.project(Arel.sql('*')).order(arel_table[column_sym]).as(arel_table.table_name))
Loading
Loading
@@ -53,7 +55,7 @@ module Gitlab
]
end
 
def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
def pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column = nil)
# Create a CTE with the column we're operating on, row number (after sorting by the column
# we're operating on), and count of the table we're operating on (duplicated across) all rows
# of the CTE. For example, if we're looking to find the median of the `projects.star_count`
Loading
Loading
@@ -64,49 +66,107 @@ module Gitlab
# 5 | 1 | 3
# 9 | 2 | 3
# 15 | 3 | 3
#
# If a partition column is used we will do the same operation but for separate partitions,
# when that happens the CTE might look like this:
#
# project_id | star_count | row_id | ct
# ------------+------------+--------+----
# 1 | 5 | 1 | 2
# 1 | 9 | 2 | 2
# 2 | 10 | 1 | 3
# 2 | 15 | 2 | 3
# 2 | 20 | 3 | 3
cte_table = Arel::Table.new("ordered_records")
cte = Arel::Nodes::As.new(
cte_table,
arel_table
.project(
arel_table[partition_column],
arel_table[column_sym].as(column_sym.to_s),
Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("rank", []),
Arel::Nodes::Window.new.partition(arel_table[partition_column])
.order(arel_table[column_sym])).as('row_id'),
arel_table.from(arel_table.alias)
.project("COUNT(*)")
.where(arel_table[partition_column].eq(arel_table.alias[partition_column])).as('ct')).
arel_table.project(*rank_rows(arel_table, column_sym, partition_column)).
# Disallow negative values
where(arel_table[column_sym].gteq(zero_interval)))
 
# From the CTE, select either the middle row or the middle two rows (this is accomplished
# by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
# selected rows, and this is the median value.
cte_table
.project(cte_table[partition_column])
.project(average([extract_epoch(cte_table[column_sym])], "median"))
.where(
Arel::Nodes::Between.new(
cte_table[:row_id],
Arel::Nodes::And.new(
[(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]
result =
cte_table
.project(*median_projections(cte_table, column_sym, partition_column))
.where(
Arel::Nodes::Between.new(
cte_table[:row_id],
Arel::Nodes::And.new(
[(cte_table[:ct] / Arel.sql('2.0')),
(cte_table[:ct] / Arel.sql('2.0') + 1)]
)
)
)
)
.with(query_so_far, cte)
.group(cte_table[partition_column])
.order(cte_table[partition_column])
.to_sql
.with(query_so_far, cte)
result.group(cte_table[partition_column]).order(cte_table[partition_column]) if partition_column
result.to_sql
end
 
private
 
def median_queries(arel_table, query_so_far, column_sym, partition_column = nil)
if Gitlab::Database.postgresql?
pg_median_datetime_sql(arel_table, query_so_far, column_sym, partition_column)
elsif Gitlab::Database.mysql?
mysql_median_datetime_sql(arel_table, query_so_far, column_sym)
end
end
def execute_queries(arel_table, query_so_far, column_sym, partition_column = nil)
queries = median_queries(arel_table, query_so_far, column_sym, partition_column)
Array.wrap(queries).map { |query| ActiveRecord::Base.connection.execute(query) }
end
def average(args, as)
Arel::Nodes::NamedFunction.new("AVG", args, as)
end
 
def rank_rows(arel_table, column_sym, partition_column)
column_row = arel_table[column_sym].as(column_sym.to_s)
if partition_column
partition_row = arel_table[partition_column]
row_id =
Arel::Nodes::Over.new(
Arel::Nodes::NamedFunction.new('rank', []),
Arel::Nodes::Window.new.partition(arel_table[partition_column])
.order(arel_table[column_sym])
).as('row_id')
count = arel_table.from(arel_table.alias)
.project('COUNT(*)')
.where(arel_table[partition_column].eq(arel_table.alias[partition_column]))
.as('ct')
[partition_row, column_row, row_id, count]
else
row_id =
Arel::Nodes::Over.new(
Arel::Nodes::NamedFunction.new('row_number', []),
Arel::Nodes::Window.new.order(arel_table[column_sym])
).as('row_id')
count = arel_table.project("COUNT(1)").as('ct')
[column_row, row_id, count]
end
end
def median_projections(table, column_sym, partition_column)
if partition_column
[table[partition_column],
average([extract_epoch(table[column_sym])], "median")]
else
[average([extract_epoch(table[column_sym])], "median")]
end
end
def extract_epoch(arel_attribute)
Arel.sql(%Q{EXTRACT(EPOCH FROM "#{arel_attribute.relation.name}"."#{arel_attribute.name}")})
end
Loading
Loading
Loading
Loading
@@ -73,12 +73,7 @@ module Gitlab
end
 
def cycle_analytics_usage_data
# We only want to generate this data for instances that use PostgreSQL
return {} if Gitlab::Database.mysql?
projects = Project.sorted_by_activity.limit(Gitlab::CycleAnalytics::UsageData::PROJECTS_LIMIT)
Gitlab::CycleAnalytics::UsageData.new(projects, { from: 7.days.ago }).to_json
Gitlab::CycleAnalytics::UsageData.new.to_json
end
 
def features_usage_data
Loading
Loading
Loading
Loading
@@ -41,7 +41,7 @@ feature 'Cycle Analytics', :js do
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
project.add_master(user)
 
create_cycle
@build = create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
 
sign_in(user)
Loading
Loading
@@ -117,7 +117,7 @@ feature 'Cycle Analytics', :js do
project.add_guest(guest)
 
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
create_cycle
create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
 
sign_in(guest)
Loading
Loading
@@ -166,16 +166,6 @@ feature 'Cycle Analytics', :js do
expect(find('.stage-events')).to have_content("!#{mr.iid}")
end
 
def create_cycle
issue.update(milestone: milestone)
pipeline.run
@build = create(:ci_build, pipeline: pipeline, status: :success, author: user)
merge_merge_requests_closing_issue(issue)
ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
end
def click_stage(stage_name)
find('.stage-nav li', text: stage_name).click
wait_for_requests
Loading
Loading
require 'spec_helper'
describe Gitlab::CycleAnalytics::UsageData do
let(:project) { create(:project, :repository) }
let(:user) { create(:user, :admin) }
let(:issue) { create(:issue, project: project, created_at: 2.days.ago) }
let(:milestone) { create(:milestone, project: project) }
let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") }
let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) }
subject { described_class.new([project]) }
describe '#to_json' do
before do
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
end
it 'returns the aggregated usage data of every selected project' do
result = subject.to_json
avg_cycle_analytics = result[:avg_cycle_analytics]
expect(result).to have_key(:avg_cycle_analytics)
CycleAnalytics::STAGES.each do |stage_name|
stage_values = avg_cycle_analytics[stage_name]
expect(avg_cycle_analytics).to have_key(stage_name)
expect(stage_values).to have_key(:average)
expect(stage_values).to have_key(:sd)
expect(stage_values).to have_key(:missing)
end
end
end
end
require 'spec_helper'
describe Gitlab::Database::Median do
describe '#extract_medians' do
context 'when using MySQL' do
it 'returns an empty hash' do
values = [["1", "1000"]]
allow(Gitlab::Database).to receive(:mysql?).and_return(true)
expect(described_class.new.extract_median(values)).eq({})
end
end
end
end
require 'spec_helper'
describe CycleAnalytics do
let(:project) { create(:project, :repository) }
let(:from_date) { 10.days.ago }
let(:user) { create(:user, :admin) }
let(:issue) { create(:issue, project: project, created_at: 2.days.ago) }
let(:milestone) { create(:milestone, project: project) }
let(:mr) { create_merge_request_closing_issue(issue, commit_message: "References #{issue.to_reference}") }
let(:pipeline) { create(:ci_empty_pipeline, status: 'created', project: project, ref: mr.source_branch, sha: mr.source_branch_sha, head_pipeline_of: mr) }
subject { described_class.new(project, from: from_date) }
describe '#all_medians_per_stage' do
before do
allow_any_instance_of(Gitlab::ReferenceExtractor).to receive(:issues).and_return([issue])
create_cycle(user, project, issue, mr, milestone, pipeline)
deploy_master
end
it 'returns every median for each stage for a specific project' do
values = described_class::STAGES.each_with_object({}) do |stage_name, hsh|
hsh[stage_name] = subject[stage_name].median.presence
end
expect(subject.all_medians_per_stage).to eq(values)
end
end
end
Loading
Loading
@@ -26,6 +26,18 @@ module CycleAnalyticsHelpers
ref: 'refs/heads/master').execute
end
 
def create_cycle(user, project, issue, mr, milestone, pipeline)
issue.update(milestone: milestone)
pipeline.run
ci_build = create(:ci_build, pipeline: pipeline, status: :success, author: user)
merge_merge_requests_closing_issue(issue)
ProcessCommitWorker.new.perform(project.id, user.id, mr.commits.last.to_hash)
ci_build
end
def create_merge_request_closing_issue(issue, message: nil, source_branch: nil, commit_message: 'commit message')
if !source_branch || project.repository.commit(source_branch).blank?
source_branch = generate(:branch)
Loading
Loading
Loading
Loading
@@ -50,7 +50,7 @@ module CycleAnalyticsHelpers
end
 
median_time_difference = time_differences.sort[2]
expect(subject[phase].median.presence).to be_within(5).of(median_time_difference)
expect(subject[phase].median).to be_within(5).of(median_time_difference)
end
 
context "when the data belongs to another project" do
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment