Skip to content
Snippets Groups Projects
Verified Commit 8a72f5c4 authored by Yorick Peterse's avatar Yorick Peterse
Browse files

Added FromUnion to easily select from a UNION

This commit adds the module `FromUnion`, which provides the class method
`from_union`. This simplifies the process of selecting data from the
result of a UNION, and reduces the likelihood of making mistakes. As a
result, instead of this:

    union = Gitlab::SQL::Union.new([foo, bar])

    Foo.from("(#{union.to_sql}) #{Foo.table_name}")

We can now write this instead:

    Foo.from_union([foo, bar])

This commit also includes some changes to make this new setup work
properly. For example, a bug in Rails 4
(https://github.com/rails/rails/issues/24193) would break the use of
`from("sub-query-here").includes(:relation)` in certain cases. There was
also a CI query which appeared to repeat a lot of conditions from an
outer query on an inner query, which isn't necessary.

Finally, we include a RuboCop cop to ensure developers use this new
module, instead of using Gitlab::SQL::Union directly.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/51307
parent 78b3eea7
No related branches found
No related tags found
1 merge request!10495Merge Requests - Assignee
Showing
with 135 additions and 79 deletions
Loading
Loading
@@ -18,7 +18,7 @@ class MembersFinder
group_members = GroupMembersFinder.new(group).execute(include_descendants: include_descendants) # rubocop: disable CodeReuse/Finder
group_members = group_members.non_invite
 
union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false)
union = Gitlab::SQL::Union.new([project_members, group_members], remove_duplicates: false) # rubocop: disable Gitlab/Union
 
sql = distinct_on(union)
 
Loading
Loading
Loading
Loading
@@ -89,9 +89,7 @@ class SnippetsFinder < UnionFinder
 
# We use a UNION here instead of OR clauses since this results in better
# performance.
union = Gitlab::SQL::Union.new([authorized_projects.select('projects.id'), visible_projects.select('projects.id')])
Project.from("(#{union.to_sql}) AS #{Project.table_name}")
Project.from_union([authorized_projects, visible_projects])
end
# rubocop: enable CodeReuse/ActiveRecord
 
Loading
Loading
Loading
Loading
@@ -164,16 +164,13 @@ class TodosFinder
 
# rubocop: disable CodeReuse/ActiveRecord
def by_group(items)
if group?
groups = group.self_and_descendants
project_todos = items.where(project_id: Project.where(group: groups).select(:id))
group_todos = items.where(group_id: groups.select(:id))
return items unless group?
 
union = Gitlab::SQL::Union.new([project_todos, group_todos])
items = Todo.from("(#{union.to_sql}) #{Todo.table_name}")
end
groups = group.self_and_descendants
project_todos = items.where(project_id: Project.where(group: groups).select(:id))
group_todos = items.where(group_id: groups.select(:id))
 
items
Todo.from_union([project_todos, group_todos])
end
# rubocop: enable CodeReuse/ActiveRecord
 
Loading
Loading
# frozen_string_literal: true
 
class UnionFinder
# rubocop: disable CodeReuse/ActiveRecord
def find_union(segments, klass)
if segments.length > 1
union = Gitlab::SQL::Union.new(segments.map { |s| s.select(:id) })
unless klass < FromUnion
raise TypeError, "#{klass.inspect} must include the FromUnion module"
end
 
klass.where("#{klass.table_name}.id IN (#{union.to_sql})")
if segments.length > 1
klass.from_union(segments)
else
segments.first
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
# frozen_string_literal: true
 
class Badge < ActiveRecord::Base
include FromUnion
# This structure sets the placeholders that the urls
# can have. This hash also sets which action to ask when
# the placeholder is found.
Loading
Loading
Loading
Loading
@@ -7,6 +7,7 @@ module Ci
include IgnorableColumn
include RedisCacheable
include ChronicDurationAttribute
include FromUnion
 
RUNNER_QUEUE_EXPIRY_TIME = 60.minutes
ONLINE_CONTACT_TIMEOUT = 1.hour
Loading
Loading
@@ -57,18 +58,26 @@ module Ci
}
 
scope :owned_or_instance_wide, -> (project_id) do
union = Gitlab::SQL::Union.new(
[belonging_to_project(project_id), belonging_to_parent_group_of_project(project_id), instance_type],
from_union(
[
belonging_to_project(project_id),
belonging_to_parent_group_of_project(project_id),
instance_type
],
remove_duplicates: false
)
from("(#{union.to_sql}) ci_runners")
end
 
scope :assignable_for, ->(project) do
# FIXME: That `to_sql` is needed to workaround a weird Rails bug.
# Without that, placeholders would miss one and couldn't match.
#
# We use "unscoped" here so that any current Ci::Runner filters don't
# apply to the inner query, which is not necessary.
exclude_runners = unscoped { project.runners.select(:id) }.to_sql
where(locked: false)
.where.not("ci_runners.id IN (#{project.runners.select(:id).to_sql})")
.where.not("ci_runners.id IN (#{exclude_runners})")
.project_type
end
 
Loading
Loading
# frozen_string_literal: true
module FromUnion
extend ActiveSupport::Concern
class_methods do
# Produces a query that uses a FROM to select data using a UNION.
#
# Using a FROM for a UNION has in the past lead to better query plans. As
# such, we generally recommend this pattern instead of using a WHERE IN.
#
# Example:
# users = User.from_union([User.where(id: 1), User.where(id: 2)])
#
# This would produce the following SQL query:
#
# SELECT *
# FROM (
# SELECT *
# FROM users
# WHERE id = 1
#
# UNION
#
# SELECT *
# FROM users
# WHERE id = 2
# ) users;
#
# members - An Array of ActiveRecord::Relation objects to use in the UNION.
#
# remove_duplicates - A boolean indicating if duplicate entries should be
# removed. Defaults to true.
#
# alias_as - The alias to use for the sub query. Defaults to the name of the
# table of the current model.
# rubocop: disable Gitlab/Union
def from_union(members, remove_duplicates: true, alias_as: table_name)
union = Gitlab::SQL::Union
.new(members, remove_duplicates: remove_duplicates)
.to_sql
# This pattern is necessary as a bug in Rails 4 can cause the use of
# `from("string here").includes(:foo)` to break ActiveRecord. This is
# fixed in https://github.com/rails/rails/pull/25374, which is released as
# part of Rails 5.
from([Arel.sql("(#{union}) #{alias_as}")])
end
# rubocop: enable Gitlab/Union
end
end
Loading
Loading
@@ -3,6 +3,7 @@
class Event < ActiveRecord::Base
include Sortable
include IgnorableColumn
include FromUnion
default_scope { reorder(nil) }
 
CREATED = 1
Loading
Loading
Loading
Loading
@@ -304,14 +304,12 @@ class Group < Namespace
# 3. They belong to a sub-group or project in such sub-group
# 4. They belong to an ancestor group
def direct_and_indirect_users
union = Gitlab::SQL::Union.new([
User.from_union([
User
.where(id: direct_and_indirect_members.select(:user_id))
.reorder(nil),
project_users_with_descendants
])
User.from("(#{union.to_sql}) #{User.table_name}")
end
 
# Returns all users that are members of projects
Loading
Loading
Loading
Loading
@@ -7,6 +7,7 @@ class Label < ActiveRecord::Base
include Gitlab::SQL::Pattern
include OptionallySearch
include Sortable
include FromUnion
 
# Represents a "No Label" state used for filtering Issues and Merge
# Requests that have no label assigned.
Loading
Loading
Loading
Loading
@@ -14,6 +14,7 @@ class MergeRequest < ActiveRecord::Base
include Gitlab::Utils::StrongMemoize
include LabelEventable
include ReactiveCaching
include FromUnion
 
self.reactive_cache_key = ->(model) { [model.project.id, model.iid] }
self.reactive_cache_refresh_interval = 10.minutes
Loading
Loading
@@ -237,11 +238,10 @@ class MergeRequest < ActiveRecord::Base
def self.in_projects(relation)
# unscoping unnecessary conditions that'll be applied
# when executing `where("merge_requests.id IN (#{union.to_sql})")`
source = unscoped.where(source_project_id: relation).select(:id)
target = unscoped.where(target_project_id: relation).select(:id)
union = Gitlab::SQL::Union.new([source, target])
source = unscoped.where(source_project_id: relation)
target = unscoped.where(target_project_id: relation)
 
where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
from_union([source, target])
end
 
# This is used after project import, to reset the IDs to the correct
Loading
Loading
@@ -740,11 +740,8 @@ class MergeRequest < ActiveRecord::Base
# compared to using OR statements. We're using UNION ALL since the queries
# used won't produce any duplicates (e.g. a note for a commit can't also be
# a note for an MR).
union = Gitlab::SQL::Union
.new([notes, commit_notes], remove_duplicates: false)
.to_sql
Note.from("(#{union}) #{Note.table_name}")
Note
.from_union([notes, commit_notes], remove_duplicates: false)
.includes(:noteable)
end
 
Loading
Loading
Loading
Loading
@@ -11,6 +11,7 @@ class Namespace < ActiveRecord::Base
include Gitlab::SQL::Pattern
include IgnorableColumn
include FeatureGate
include FromUnion
 
ignore_column :deleted_at
 
Loading
Loading
Loading
Loading
@@ -17,6 +17,7 @@ class Note < ActiveRecord::Base
include Editable
include Gitlab::SQL::Pattern
include ThrottledTouch
include FromUnion
 
module SpecialRole
FIRST_TIME_CONTRIBUTOR = :first_time_contributor
Loading
Loading
Loading
Loading
@@ -29,6 +29,7 @@ class Project < ActiveRecord::Base
include BatchDestroyDependentAssociations
include FeatureGate
include OptionallySearch
include FromUnion
extend Gitlab::Cache::RequestCache
 
extend Gitlab::ConfigHelper
Loading
Loading
@@ -1493,8 +1494,7 @@ class Project < ActiveRecord::Base
end
 
def all_runners
union = Gitlab::SQL::Union.new([runners, group_runners, shared_runners])
Ci::Runner.from("(#{union.to_sql}) ci_runners")
Ci::Runner.from_union([runners, group_runners, shared_runners])
end
 
def active_runners
Loading
Loading
@@ -2022,12 +2022,10 @@ class Project < ActiveRecord::Base
def badges
return project_badges unless group
 
group_badges_rel = GroupBadge.where(group: group.self_and_ancestors)
union = Gitlab::SQL::Union.new([project_badges.select(:id),
group_badges_rel.select(:id)])
Badge.where("id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Badge.from_union([
project_badges,
GroupBadge.where(group: group.self_and_ancestors)
])
end
 
def merge_requests_allowing_push_to_user(user)
Loading
Loading
# frozen_string_literal: true
 
class ProjectAuthorization < ActiveRecord::Base
include FromUnion
belongs_to :user
belongs_to :project
 
Loading
Loading
@@ -8,9 +10,9 @@ class ProjectAuthorization < ActiveRecord::Base
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
 
def self.select_from_union(union)
select(['project_id', 'MAX(access_level) AS access_level'])
.from("(#{union.to_sql}) #{ProjectAuthorization.table_name}")
def self.select_from_union(relations)
from_union(relations)
.select(['project_id', 'MAX(access_level) AS access_level'])
.group(:project_id)
end
 
Loading
Loading
Loading
Loading
@@ -12,6 +12,7 @@ class Snippet < ActiveRecord::Base
include Spammable
include Editable
include Gitlab::SQL::Pattern
include FromUnion
 
cache_markdown_field :title, pipeline: :single_line
cache_markdown_field :description
Loading
Loading
Loading
Loading
@@ -2,6 +2,7 @@
 
class Todo < ActiveRecord::Base
include Sortable
include FromUnion
 
ASSIGNED = 1
MENTIONED = 2
Loading
Loading
Loading
Loading
@@ -20,6 +20,7 @@ class User < ActiveRecord::Base
include BlocksJsonSerialization
include WithUploads
include OptionallySearch
include FromUnion
 
DEFAULT_NOTIFICATION_LEVEL = :participating
 
Loading
Loading
@@ -286,11 +287,9 @@ class User < ActiveRecord::Base
# user_id - The ID of the user to include.
def self.union_with_user(user_id = nil)
if user_id.present?
union = Gitlab::SQL::Union.new([all, User.unscoped.where(id: user_id)])
# We use "unscoped" here so that any inner conditions are not repeated for
# the outer query, which would be redundant.
User.unscoped.from("(#{union.to_sql}) #{User.table_name}")
User.unscoped.from_union([all, User.unscoped.where(id: user_id)])
else
all
end
Loading
Loading
@@ -354,9 +353,8 @@ class User < ActiveRecord::Base
 
emails = joins(:emails).where(emails: { email: email })
emails = emails.confirmed if confirmed
union = Gitlab::SQL::Union.new([users, emails])
 
from("(#{union.to_sql}) #{table_name}")
from_union([users, emails])
end
 
def filter(filter_name)
Loading
Loading
@@ -676,10 +674,10 @@ class User < ActiveRecord::Base
 
# Returns the groups a user has access to, either through a membership or a project authorization
def authorized_groups
union = Gitlab::SQL::Union
.new([groups.select(:id), authorized_projects.select(:namespace_id)])
Group.where("namespaces.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Group.from_union([
groups,
authorized_projects.joins(:namespace).select('namespaces.*')
])
end
 
# Returns the groups a user is a member of, either directly or through a parent group
Loading
Loading
@@ -744,7 +742,15 @@ class User < ActiveRecord::Base
end
 
def owned_projects
@owned_projects ||= Project.from("(#{owned_projects_union.to_sql}) AS projects")
@owned_projects ||= Project.from_union(
[
Project.where(namespace: namespace),
Project.joins(:project_authorizations)
.where("projects.namespace_id <> ?", namespace.id)
.where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER })
],
remove_duplicates: false
)
end
 
# Returns projects which user can admin issues on (for example to move an issue to that project).
Loading
Loading
@@ -1138,17 +1144,17 @@ class User < ActiveRecord::Base
 
def ci_owned_runners
@ci_owned_runners ||= begin
project_runner_ids = Ci::RunnerProject
project_runners = Ci::RunnerProject
.where(project: authorized_projects(Gitlab::Access::MAINTAINER))
.select(:runner_id)
.joins(:runner)
.select('ci_runners.*')
 
group_runner_ids = Ci::RunnerNamespace
group_runners = Ci::RunnerNamespace
.where(namespace_id: owned_or_maintainers_groups.select(:id))
.select(:runner_id)
.joins(:runner)
.select('ci_runners.*')
 
union = Gitlab::SQL::Union.new([project_runner_ids, group_runner_ids])
Ci::Runner.where("ci_runners.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
Ci::Runner.from_union([project_runners, group_runners])
end
end
 
Loading
Loading
@@ -1380,15 +1386,6 @@ class User < ActiveRecord::Base
Gitlab::CurrentSettings.usage_stats_set_by_user_id == self.id
end
 
def owned_projects_union
Gitlab::SQL::Union.new([
Project.where(namespace: namespace),
Project.joins(:project_authorizations)
.where("projects.namespace_id <> ?", namespace.id)
.where(project_authorizations: { user_id: id, access_level: Gitlab::Access::OWNER })
], remove_duplicates: false)
end
# Added according to https://github.com/plataformatec/devise/blob/7df57d5081f9884849ca15e4fde179ef164a575f/README.md#activejob-integration
def send_devise_notification(notification, *args)
return true unless can?(:receive_notifications)
Loading
Loading
Loading
Loading
@@ -34,13 +34,13 @@ module Labels
 
# rubocop: disable CodeReuse/ActiveRecord
def labels_to_transfer
label_ids = []
label_ids << group_labels_applied_to_issues.select(:id)
label_ids << group_labels_applied_to_merge_requests.select(:id)
union = Gitlab::SQL::Union.new(label_ids)
Label.where("labels.id IN (#{union.to_sql})").reorder(nil).uniq # rubocop:disable GitlabSecurity/SqlInjection
Label
.from_union([
group_labels_applied_to_issues,
group_labels_applied_to_merge_requests
])
.reorder(nil)
.uniq
end
# rubocop: enable CodeReuse/ActiveRecord
 
Loading
Loading
Loading
Loading
@@ -30,8 +30,9 @@ module Gitlab
note_events = event_counts(date_from, :merge_requests)
.having(action: [Event::COMMENTED])
 
union = Gitlab::SQL::Union.new([repo_events, issue_events, mr_events, note_events])
events = Event.find_by_sql(union.to_sql).map(&:attributes)
events = Event
.from_union([repo_events, issue_events, mr_events, note_events])
.map(&:attributes)
 
@activity_dates = events.each_with_object(Hash.new {|h, k| h[k] = 0 }) do |event, activities|
activities[event["date"]] += event["total_amount"]
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment