From 7d69ff3ddf0fb83c6a1ec02f85b01b454080b647 Mon Sep 17 00:00:00 2001
From: Timothy Andrew <mail@timothyandrew.net>
Date: Fri, 16 Sep 2016 11:34:49 +0530
Subject: [PATCH] Move cycle analytics calculations to SQL.

1. Use Arel for composable queries.
2. For a project with ~10k issues, the page loads in around 600ms.
   Previously, a project with ~5k issues would have a ~20s page load
   time.
---
 app/models/concerns/database_median.rb        |  54 ++++++++++
 app/models/cycle_analytics.rb                 | 101 +++++++++++-------
 .../cycle_analytics/table_references.rb       |  25 +++++
 3 files changed, 144 insertions(+), 36 deletions(-)
 create mode 100644 app/models/concerns/database_median.rb
 create mode 100644 app/models/cycle_analytics/table_references.rb

diff --git a/app/models/concerns/database_median.rb b/app/models/concerns/database_median.rb
new file mode 100644
index 00000000000..5af1ca772ae
--- /dev/null
+++ b/app/models/concerns/database_median.rb
@@ -0,0 +1,54 @@
+module DatabaseMedian
+  extend ActiveSupport::Concern
+
+  def median_datetime(arel_table, query_so_far, column_sym)
+    # TODO: MySQL
+    pg_median_datetime(arel_table, query_so_far, column_sym)
+  end
+
+
+  # https://www.periscopedata.com/blog/medians-in-sql.html
+  def pg_median_datetime(arel_table, query_so_far, column_sym)
+    # Create a CTE with the column we're operating on, row number (after sorting by the column
+    # we're operating on), and count of the table we're operating on (duplicated across) all rows
+    # of the CTE. For example, if we're looking to find the median of the `projects.star_count`
+    # column, the CTE might look like this:
+    #
+    #  star_count | row_id | ct
+    # ------------+--------+----
+    #           5 |      1 |  3
+    #           9 |      2 |  3
+    #          15 |      3 |  3
+    cte_table = Arel::Table.new(("ordered_records"))
+    cte = Arel::Nodes::As.new(cte_table,
+                              arel_table.
+                                project(arel_table[column_sym].as(column_sym.to_s),
+                                                 Arel::Nodes::Over.new(Arel::Nodes::NamedFunction.new("row_number", []),
+                                                                       Arel::Nodes::Window.new.order(arel_table[column_sym])).as('row_id'),
+                                                 arel_table.project("COUNT(1)").as('ct')).
+                                # Disallow negative values
+                                where(arel_table[column_sym].gteq(zero_interval)))
+
+    # From the CTE, select either the middle row or the middle two rows (this is accomplished
+    # by 'where cte.row_id between cte.ct / 2.0 AND cte.ct / 2.0 + 1'). Find the average of the
+    # selected rows, and this is the median value.
+    cte_table.project(Arel::Nodes::NamedFunction.new("AVG",
+                                                     [extract_epoch(cte_table[column_sym])],
+                                                     "median")).
+      where(Arel::Nodes::Between.new(cte_table[:row_id],
+                                     Arel::Nodes::And.new([(cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0')),
+                                                           (cte_table[:ct] / Arel::Nodes::SqlLiteral.new('2.0') + 1)]))).
+      with(query_so_far, cte)
+  end
+
+  private
+
+  def extract_epoch(arel_attribute)
+    Arel::Nodes::SqlLiteral.new("EXTRACT(EPOCH FROM \"#{arel_attribute.relation.name}\".\"#{arel_attribute.name}\")")
+  end
+
+  # Need to cast '0' to an INTERVAL before we can check if the interval is positive
+  def zero_interval
+    Arel::Nodes::NamedFunction.new("CAST", [Arel::Nodes::SqlLiteral.new("'0' AS INTERVAL")])
+  end
+end
diff --git a/app/models/cycle_analytics.rb b/app/models/cycle_analytics.rb
index 17a80115fe5..c26d50d693c 100644
--- a/app/models/cycle_analytics.rb
+++ b/app/models/cycle_analytics.rb
@@ -1,9 +1,9 @@
 class CycleAnalytics
-  attr_reader :from
+  include DatabaseMedian
 
   def initialize(project, from:)
+    @project = project
     @from = from
-    @queries = Queries.new(project)
   end
 
   def as_json(options = {})
@@ -14,65 +14,94 @@ class CycleAnalytics
   end
 
   def issue
-    calculate_metric(@queries.issues(created_after: @from),
-                     -> (data_point) { data_point[:issue].created_at },
-                     [@queries.issue_first_associated_with_milestone_at, @queries.issue_first_added_to_list_label_at])
+    calculate_metric!(:issue,
+                      TableReferences.issues[:created_at],
+                      [TableReferences.issue_metrics[:first_associated_with_milestone_at],
+                       TableReferences.issue_metrics[:first_added_to_board_at]],
+                      load_merge_requests: false)
   end
 
   def plan
-    calculate_metric(@queries.issues(created_after: @from),
-                     [@queries.issue_first_associated_with_milestone_at, @queries.issue_first_added_to_list_label_at],
-                     @queries.issue_first_mentioned_in_commit_at)
+    calculate_metric!(:plan,
+                      [TableReferences.issue_metrics[:first_associated_with_milestone_at],
+                       TableReferences.issue_metrics[:first_added_to_board_at]],
+                      TableReferences.issue_metrics[:first_mentioned_in_commit_at],
+                      load_merge_requests: false)
   end
 
   def code
-    calculate_metric(@queries.merge_requests_closing_issues(created_after: @from),
-                     @queries.issue_first_mentioned_in_commit_at,
-                     -> (data_point) { data_point[:merge_request].created_at })
+    calculate_metric!(:code,
+                      TableReferences.issue_metrics[:first_mentioned_in_commit_at],
+                      TableReferences.merge_requests[:created_at],
+                      load_merge_requests: true)
   end
 
   def test
-    calculate_metric(@queries.merge_requests_closing_issues(created_after: @from),
-                     @queries.merge_request_build_started_at,
-                     @queries.merge_request_build_finished_at)
+    calculate_metric!(:test,
+                      TableReferences.merge_request_metrics[:latest_build_started_at],
+                      TableReferences.merge_request_metrics[:latest_build_finished_at],
+                      load_merge_requests: true)
   end
 
   def review
-    calculate_metric(@queries.merge_requests_closing_issues(created_after: @from),
-                     -> (data_point) { data_point[:merge_request].created_at },
-                     @queries.merge_request_merged_at)
+    calculate_metric!(:review,
+                      TableReferences.merge_requests[:created_at],
+                      TableReferences.merge_request_metrics[:merged_at],
+                      load_merge_requests: true)
   end
 
   def staging
-    calculate_metric(@queries.merge_requests_closing_issues(created_after: @from),
-                     @queries.merge_request_merged_at,
-                     @queries.merge_request_deployed_to_production_at)
+    calculate_metric!(:staging,
+                      TableReferences.merge_request_metrics[:merged_at],
+                      TableReferences.merge_request_metrics[:first_deployed_to_production_at],
+                      load_merge_requests: true)
   end
 
   def production
-    calculate_metric(@queries.merge_requests_closing_issues(created_after: @from),
-                     -> (data_point) { data_point[:issue].created_at },
-                     @queries.merge_request_deployed_to_production_at)
+    calculate_metric!(:production,
+                      TableReferences.issues[:created_at],
+                      TableReferences.merge_request_metrics[:first_deployed_to_production_at],
+                      load_merge_requests: true)
   end
 
   private
 
-  def calculate_metric(data, start_time_fns, end_time_fns)
-    times = data.map do |data_point|
-      start_time = Array.wrap(start_time_fns).map { |fn| fn[data_point] }.compact.first
-      end_time = Array.wrap(end_time_fns).map { |fn| fn[data_point] }.compact.first
+  def calculate_metric!(name, start_time_attrs, end_time_attrs, load_merge_requests: false)
+    cte_table = Arel::Table.new("cte_table_for_#{name}")
 
-      if start_time.present? && end_time.present? && end_time >= start_time
-        end_time - start_time
-      end
-    end
+    # Add a `SELECT` for (end_time - start-time), and add an alias for it.
+    # Note: We use COALESCE to pick up the first non-null column for end_time / start_time.
+    query = Arel::Nodes::As.new(
+      cte_table,
+      base_query(load_merge_requests: load_merge_requests).project(
+        Arel::Nodes::Subtraction.new(
+        Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(end_time_attrs)),
+        Arel::Nodes::NamedFunction.new("COALESCE", Array.wrap(start_time_attrs))
+      ).as(name.to_s)))
 
-    median(times.compact.sort)
+    query = median_datetime(cte_table, query, name)
+    median = ActiveRecord::Base.connection.execute(query.to_sql).first['median']
+    median.to_f if median.present?
   end
 
-  def median(coll)
-    return if coll.empty?
-    size = coll.length
-    (coll[size / 2] + coll[(size - 1) / 2]) / 2.0
+  # Join table with a row for every <issue,merge_request> pair (where the merge request
+  # closes the given issue) with issue and merge request metrics included. The metrics
+  # are loaded with an inner join, so issues / merge requests without metrics are
+  # automatically excluded.
+  def base_query(load_merge_requests:)
+    arel_table = TableReferences.merge_requests_closing_issues
+
+    query = arel_table.join(TableReferences.issues).on(TableReferences.issues[:id].eq(arel_table[:issue_id])).
+            join(TableReferences.issue_metrics).on(TableReferences.issues[:id].eq(TableReferences.issue_metrics[:issue_id])).
+            where(TableReferences.issues[:project_id].eq(@project.id)).
+            where(TableReferences.issues[:deleted_at].eq(nil)).
+            where(TableReferences.issues[:created_at].gteq(@from))
+
+    if load_merge_requests
+      query.join(TableReferences.merge_requests, Arel::Nodes::OuterJoin).on(TableReferences.merge_requests[:id].eq(arel_table[:merge_request_id])).
+        join(TableReferences.merge_request_metrics).on(TableReferences.merge_requests[:id].eq(TableReferences.merge_request_metrics[:merge_request_id]))
+    else
+      query
+    end
   end
 end
diff --git a/app/models/cycle_analytics/table_references.rb b/app/models/cycle_analytics/table_references.rb
new file mode 100644
index 00000000000..f276723ee0e
--- /dev/null
+++ b/app/models/cycle_analytics/table_references.rb
@@ -0,0 +1,25 @@
+class CycleAnalytics
+  module TableReferences
+    class << self
+      def issues
+        Issue.arel_table
+      end
+
+      def issue_metrics
+        Issue::Metrics.arel_table
+      end
+
+      def merge_requests
+        MergeRequest.arel_table
+      end
+
+      def merge_request_metrics
+        MergeRequest::Metrics.arel_table
+      end
+
+      def merge_requests_closing_issues
+        MergeRequestsClosingIssues.arel_table
+      end
+    end
+  end
+end
-- 
GitLab