Skip to content
Snippets Groups Projects
Verified Commit 682a7020 authored by Douwe Maan's avatar Douwe Maan Committed by Yorick Peterse
Browse files

Merge branch 'rework-authorizations-performance' into 'master'

Rework project authorizations and nested groups for better performance

See merge request !10885
parent 6f2e590e
No related branches found
No related tags found
1 merge request!12258Update Prometheus Merge Request Metrics page
Pipeline #
Showing
with 414 additions and 161 deletions
Loading
Loading
@@ -64,6 +64,8 @@ class GroupsController < Groups::ApplicationController
end
 
def subgroups
return not_found unless Group.supports_nested_groups?
@nested_groups = GroupsFinder.new(current_user, parent: group).execute
@nested_groups = @nested_groups.search(params[:filter_groups]) if params[:filter_groups].present?
end
Loading
Loading
Loading
Loading
@@ -84,89 +84,6 @@ module Routable
joins(:route).where(wheres.join(' OR '))
end
end
# Builds a relation to find multiple objects that are nested under user membership
#
# Usage:
#
# Klass.member_descendants(1)
#
# Returns an ActiveRecord::Relation.
def member_descendants(user_id)
joins(:route).
joins("INNER JOIN routes r2 ON routes.path LIKE CONCAT(r2.path, '/%')
INNER JOIN members ON members.source_id = r2.source_id
AND members.source_type = r2.source_type").
where('members.user_id = ?', user_id)
end
# Builds a relation to find multiple objects that are nested under user
# membership. Includes the parent, as opposed to `#member_descendants`
# which only includes the descendants.
#
# Usage:
#
# Klass.member_self_and_descendants(1)
#
# Returns an ActiveRecord::Relation.
def member_self_and_descendants(user_id)
joins(:route).
joins("INNER JOIN routes r2 ON routes.path LIKE CONCAT(r2.path, '/%')
OR routes.path = r2.path
INNER JOIN members ON members.source_id = r2.source_id
AND members.source_type = r2.source_type").
where('members.user_id = ?', user_id)
end
# Returns all objects in a hierarchy, where any node in the hierarchy is
# under the user membership.
#
# Usage:
#
# Klass.member_hierarchy(1)
#
# Examples:
#
# Given the following group tree...
#
# _______group_1_______
# | |
# | |
# nested_group_1 nested_group_2
# | |
# | |
# nested_group_1_1 nested_group_2_1
#
#
# ... the following results are returned:
#
# * the user is a member of group 1
# => 'group_1',
# 'nested_group_1', nested_group_1_1',
# 'nested_group_2', 'nested_group_2_1'
#
# * the user is a member of nested_group_2
# => 'group1',
# 'nested_group_2', 'nested_group_2_1'
#
# * the user is a member of nested_group_2_1
# => 'group1',
# 'nested_group_2', 'nested_group_2_1'
#
# Returns an ActiveRecord::Relation.
def member_hierarchy(user_id)
paths = member_self_and_descendants(user_id).pluck('routes.path')
return none if paths.empty?
wheres = paths.map do |path|
"#{connection.quote(path)} = routes.path
OR
#{connection.quote(path)} LIKE CONCAT(routes.path, '/%')"
end
joins(:route).where(wheres.join(' OR '))
end
end
 
def full_name
Loading
Loading
Loading
Loading
@@ -3,7 +3,11 @@ module SelectForProjectAuthorization
 
module ClassMethods
def select_for_project_authorization
select("members.user_id, projects.id AS project_id, members.access_level")
select("projects.id AS project_id, members.access_level")
end
def select_as_master_for_project_authorization
select(["projects.id AS project_id", "#{Gitlab::Access::MASTER} AS access_level"])
end
end
end
Loading
Loading
@@ -37,6 +37,10 @@ class Group < Namespace
after_save :update_two_factor_requirement
 
class << self
def supports_nested_groups?
Gitlab::Database.postgresql?
end
# Searches for groups matching the given query.
#
# This method uses ILIKE on PostgreSQL and LIKE on MySQL.
Loading
Loading
@@ -77,7 +81,7 @@ class Group < Namespace
if current_scope.joins_values.include?(:shared_projects)
joins('INNER JOIN namespaces project_namespace ON project_namespace.id = projects.namespace_id')
.where('project_namespace.share_with_group_lock = ?', false)
.select("members.user_id, projects.id AS project_id, LEAST(project_group_links.group_access, members.access_level) AS access_level")
.select("projects.id AS project_id, LEAST(project_group_links.group_access, members.access_level) AS access_level")
else
super
end
Loading
Loading
Loading
Loading
@@ -176,26 +176,20 @@ class Namespace < ActiveRecord::Base
projects.with_shared_runners.any?
end
 
# Scopes the model on ancestors of the record
# Returns all the ancestors of the current namespaces.
def ancestors
if parent_id
path = route ? route.path : full_path
paths = []
return self.class.none unless parent_id
 
until path.blank?
path = path.rpartition('/').first
paths << path
end
self.class.joins(:route).where('routes.path IN (?)', paths).reorder('routes.path ASC')
else
self.class.none
end
Gitlab::GroupHierarchy.
new(self.class.where(id: parent_id)).
base_and_ancestors
end
 
# Scopes the model on direct and indirect children of the record
# Returns all the descendants of the current namespace.
def descendants
self.class.joins(:route).merge(Route.inside_path(route.path)).reorder('routes.path ASC')
Gitlab::GroupHierarchy.
new(self.class.where(parent_id: id)).
base_and_descendants
end
 
def user_ids_for_project_authorizations
Loading
Loading
Loading
Loading
@@ -6,6 +6,12 @@ class ProjectAuthorization < ActiveRecord::Base
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
 
def self.select_from_union(union)
select(['project_id', 'MAX(access_level) AS access_level']).
from("(#{union.to_sql}) #{ProjectAuthorization.table_name}").
group(:project_id)
end
def self.insert_authorizations(rows, per_batch = 1000)
rows.each_slice(per_batch) do |slice|
tuples = slice.map do |tuple|
Loading
Loading
Loading
Loading
@@ -9,9 +9,12 @@ class User < ActiveRecord::Base
include Sortable
include CaseSensitivity
include TokenAuthenticatable
include IgnorableColumn
 
DEFAULT_NOTIFICATION_LEVEL = :participating
 
ignore_column :authorized_projects_populated
add_authentication_token_field :authentication_token
add_authentication_token_field :incoming_email_token
 
Loading
Loading
@@ -200,7 +203,6 @@ class User < ActiveRecord::Base
scope :blocked, -> { with_states(:blocked, :ldap_blocked) }
scope :external, -> { where(external: true) }
scope :active, -> { with_state(:active).non_internal }
scope :not_in_project, ->(project) { project.users.present? ? where("id not in (:ids)", ids: project.users.map(&:id) ) : all }
scope :without_projects, -> { where('id NOT IN (SELECT DISTINCT(user_id) FROM members WHERE user_id IS NOT NULL AND requested_at IS NULL)') }
scope :todo_authors, ->(user_id, state) { where(id: Todo.where(user_id: user_id, state: state).select(:author_id)) }
scope :order_recent_sign_in, -> { reorder(Gitlab::Database.nulls_last_order('last_sign_in_at', 'DESC')) }
Loading
Loading
@@ -492,23 +494,16 @@ class User < ActiveRecord::Base
Group.where("namespaces.id IN (#{union.to_sql})")
end
 
def nested_groups
Group.member_descendants(id)
end
# Returns a relation of groups the user has access to, including their parent
# and child groups (recursively).
def all_expanded_groups
Group.member_hierarchy(id)
Gitlab::GroupHierarchy.new(groups).all_groups
end
 
def expanded_groups_requiring_two_factor_authentication
all_expanded_groups.where(require_two_factor_authentication: true)
end
 
def nested_groups_projects
Project.joins(:namespace).where('namespaces.parent_id IS NOT NULL').
member_descendants(id)
end
def refresh_authorized_projects
Users::RefreshAuthorizedProjectsService.new(self).execute
end
Loading
Loading
@@ -517,18 +512,15 @@ class User < ActiveRecord::Base
project_authorizations.where(project_id: project_ids).delete_all
end
 
def set_authorized_projects_column
unless authorized_projects_populated
update_column(:authorized_projects_populated, true)
end
end
def authorized_projects(min_access_level = nil)
refresh_authorized_projects unless authorized_projects_populated
# We're overriding an association, so explicitly call super with no arguments or it would be passed as `force_reload` to the association
# We're overriding an association, so explicitly call super with no
# arguments or it would be passed as `force_reload` to the association
projects = super()
projects = projects.where('project_authorizations.access_level >= ?', min_access_level) if min_access_level
if min_access_level
projects = projects.
where('project_authorizations.access_level >= ?', min_access_level)
end
 
projects
end
Loading
Loading
Loading
Loading
@@ -73,12 +73,11 @@ module Users
# remove - The IDs of the authorization rows to remove.
# add - Rows to insert in the form `[user id, project id, access level]`
def update_authorizations(remove = [], add = [])
return if remove.empty? && add.empty? && user.authorized_projects_populated
return if remove.empty? && add.empty?
 
User.transaction do
user.remove_project_authorizations(remove) unless remove.empty?
ProjectAuthorization.insert_authorizations(add) unless add.empty?
user.set_authorized_projects_column
end
 
# Since we batch insert authorization rows, Rails' associations may get
Loading
Loading
@@ -101,38 +100,13 @@ module Users
end
 
def fresh_authorizations
ProjectAuthorization.
unscoped.
select('project_id, MAX(access_level) AS access_level').
from("(#{project_authorizations_union.to_sql}) #{ProjectAuthorization.table_name}").
group(:project_id)
end
private
# Returns a union query of projects that the user is authorized to access
def project_authorizations_union
relations = [
# Personal projects
user.personal_projects.select("#{user.id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
# Projects the user is a member of
user.projects.select_for_project_authorization,
# Projects of groups the user is a member of
user.groups_projects.select_for_project_authorization,
# Projects of subgroups of groups the user is a member of
user.nested_groups_projects.select_for_project_authorization,
# Projects shared with groups the user is a member of
user.groups.joins(:shared_projects).select_for_project_authorization,
# Projects shared with subgroups of groups the user is a member of
user.nested_groups.joins(:shared_projects).select_for_project_authorization
]
klass = if Group.supports_nested_groups?
Gitlab::ProjectAuthorizations::WithNestedGroups
else
Gitlab::ProjectAuthorizations::WithoutNestedGroups
end
 
Gitlab::SQL::Union.new(relations)
klass.new(user).calculate
end
end
end
Loading
Loading
@@ -2,6 +2,7 @@
= nav_link(page: group_path(@group)) do
= link_to group_path(@group) do
Projects
= nav_link(page: subgroups_group_path(@group)) do
= link_to subgroups_group_path(@group) do
Subgroups
- if Group.supports_nested_groups?
= nav_link(page: subgroups_group_path(@group)) do
= link_to subgroups_group_path(@group) do
Subgroups
---
title: >
Project authorizations are calculated much faster when using PostgreSQL, and
nested groups support for MySQL has been removed
merge_request: 10885
author:
# Adds support for WITH statements when using PostgreSQL. The code here is taken
# from https://github.com/shmay/ctes_in_my_pg which at the time of writing has
# not been pushed to RubyGems. The license of this repository is as follows:
#
# The MIT License (MIT)
#
# Copyright (c) 2012 Dan McClain
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
module ActiveRecord
class Relation
class Merger # :nodoc:
def normal_values
NORMAL_VALUES + [:with]
end
end
end
end
module ActiveRecord::Querying
delegate :with, to: :all
end
module ActiveRecord
class Relation
# WithChain objects act as placeholder for queries in which #with does not have any parameter.
# In this case, #with must be chained with #recursive to return a new relation.
class WithChain
def initialize(scope)
@scope = scope
end
# Returns a new relation expressing WITH RECURSIVE
def recursive(*args)
@scope.with_values += args
@scope.recursive_value = true
@scope
end
end
def with_values
@values[:with] || []
end
def with_values=(values)
raise ImmutableRelation if @loaded
@values[:with] = values
end
def recursive_value=(value)
raise ImmutableRelation if @loaded
@values[:recursive] = value
end
def recursive_value
@values[:recursive]
end
def with(opts = :chain, *rest)
if opts == :chain
WithChain.new(spawn)
elsif opts.blank?
self
else
spawn.with!(opts, *rest)
end
end
def with!(opts = :chain, *rest) # :nodoc:
if opts == :chain
WithChain.new(self)
else
self.with_values += [opts] + rest
self
end
end
def build_arel
arel = super()
build_with(arel) if @values[:with]
arel
end
def build_with(arel)
with_statements = with_values.flat_map do |with_value|
case with_value
when String
with_value
when Hash
with_value.map do |name, expression|
case expression
when String
select = Arel::Nodes::SqlLiteral.new "(#{expression})"
when ActiveRecord::Relation, Arel::SelectManager
select = Arel::Nodes::SqlLiteral.new "(#{expression.to_sql})"
end
Arel::Nodes::As.new Arel::Nodes::SqlLiteral.new("\"#{name}\""), select
end
when Arel::Nodes::As
with_value
end
end
unless with_statements.empty?
if recursive_value
arel.with :recursive, with_statements
else
arel.with with_statements
end
end
end
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class RescheduleProjectAuthorizations < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
class User < ActiveRecord::Base
self.table_name = 'users'
end
def up
offset = 0
batch = 5000
start = Time.now
loop do
relation = User.where('id > ?', offset)
user_ids = relation.limit(batch).reorder(id: :asc).pluck(:id)
break if user_ids.empty?
offset = user_ids.last
# This will schedule each batch 5 minutes after the previous batch was
# scheduled. This smears out the load over time, instead of immediately
# scheduling a million jobs.
Sidekiq::Client.push_bulk(
'queue' => 'authorized_projects',
'args' => user_ids.zip,
'class' => 'AuthorizedProjectsWorker',
'at' => start.to_i
)
start += 5.minutes
end
end
def down
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
# This migration depends on code external to it. For example, it relies on
# updating a namespace to also rename directories (uploads, GitLab pages, etc).
# The alternative is to copy hundreds of lines of code into this migration,
# adjust them where needed, etc; something which doesn't work well at all.
class TurnNestedGroupsIntoRegularGroupsForMysql < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
def run_migration?
Gitlab::Database.mysql?
end
def up
return unless run_migration?
# For all sub-groups we need to give the right people access. We do this as
# follows:
#
# 1. Get all the ancestors for the current namespace
# 2. Get all the members of these namespaces, along with their higher access
# level
# 3. Give these members access to the current namespace
Namespace.unscoped.where('parent_id IS NOT NULL').find_each do |namespace|
rows = []
existing = namespace.members.pluck(:user_id)
all_members_for(namespace).each do |member|
next if existing.include?(member[:user_id])
rows << {
access_level: member[:access_level],
source_id: namespace.id,
source_type: 'Namespace',
user_id: member[:user_id],
notification_level: 3, # global
type: 'GroupMember',
created_at: Time.current,
updated_at: Time.current
}
end
bulk_insert_members(rows)
# This method relies on the parent to determine the proper path.
# Because we reset "parent_id" this method will not return the right path
# when moving namespaces.
full_path_was = namespace.send(:full_path_was)
namespace.define_singleton_method(:full_path_was) { full_path_was }
namespace.update!(parent_id: nil, path: new_path_for(namespace))
end
end
def down
# There is no way to go back from regular groups to nested groups.
end
# Generates a new (unique) path for a namespace.
def new_path_for(namespace)
counter = 1
base = namespace.full_path.tr('/', '-')
new_path = base
while Namespace.unscoped.where(path: new_path).exists?
new_path = base + "-#{counter}"
counter += 1
end
new_path
end
# Returns an Array containing all the ancestors of the current namespace.
#
# This method is not particularly efficient, but it's probably still faster
# than using the "routes" table. Most importantly of all, it _only_ depends
# on the namespaces table and the "parent_id" column.
def ancestors_for(namespace)
ancestors = []
current = namespace
while current&.parent_id
# We're using find_by(id: ...) here to deal with cases where the
# parent_id may point to a missing row.
current = Namespace.unscoped.select([:id, :parent_id]).
find_by(id: current.parent_id)
ancestors << current.id if current
end
ancestors
end
# Returns a relation containing all the members that have access to any of
# the current namespace's parent namespaces.
def all_members_for(namespace)
Member.
unscoped.
select(['user_id', 'MAX(access_level) AS access_level']).
where(source_type: 'Namespace', source_id: ancestors_for(namespace)).
group(:user_id)
end
def bulk_insert_members(rows)
return if rows.empty?
keys = rows.first.keys
tuples = rows.map do |row|
row.map { |(_, value)| connection.quote(value) }
end
execute <<-EOF.strip_heredoc
INSERT INTO members (#{keys.join(', ')})
VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
EOF
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddIndexProjectGroupLinksGroupId < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :project_group_links, :group_id
end
def down
remove_concurrent_index :project_group_links, :group_id
end
end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class RemoveUsersAuthorizedProjectsPopulated < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def change
remove_column :users, :authorized_projects_populated, :boolean
end
end
Loading
Loading
@@ -923,6 +923,8 @@ ActiveRecord::Schema.define(version: 20170606202615) do
t.date "expires_at"
end
 
add_index "project_group_links", ["group_id"], name: "index_project_group_links_on_group_id", using: :btree
create_table "project_import_data", force: :cascade do |t|
t.integer "project_id"
t.text "data"
Loading
Loading
@@ -1350,7 +1352,6 @@ ActiveRecord::Schema.define(version: 20170606202615) do
t.boolean "external", default: false
t.string "incoming_email_token"
t.string "organization"
t.boolean "authorized_projects_populated"
t.boolean "require_two_factor_authentication_from_group", default: false, null: false
t.integer "two_factor_grace_period", default: 48, null: false
t.boolean "ghost"
Loading
Loading
Loading
Loading
@@ -13,6 +13,15 @@ up to 20 levels of nested groups, which among other things can help you to:
- **Make it easier to manage people and control visibility.** Give people
different [permissions][] depending on their group [membership](#membership).
 
## Database Requirements
Nested groups are only supported when you use PostgreSQL. Supporting nested
groups on MySQL in an efficient way is not possible due to MySQL's limitations.
See the following links for more information:
* <https://gitlab.com/gitlab-org/gitlab-ce/issues/30472>
* <https://gitlab.com/gitlab-org/gitlab-ce/merge_requests/10885>
## Overview
 
A group can have many subgroups inside it, and at the same time a group can have
Loading
Loading
Loading
Loading
@@ -145,7 +145,10 @@ module API
expose :web_url
expose :request_access_enabled
expose :full_name, :full_path
expose :parent_id
if ::Group.supports_nested_groups?
expose :parent_id
end
 
expose :statistics, if: :statistics do
with_options format_with: -> (value) { value.to_i } do
Loading
Loading
Loading
Loading
@@ -70,7 +70,11 @@ module API
params do
requires :name, type: String, desc: 'The name of the group'
requires :path, type: String, desc: 'The path of the group'
optional :parent_id, type: Integer, desc: 'The parent group id for creating nested group'
if ::Group.supports_nested_groups?
optional :parent_id, type: Integer, desc: 'The parent group id for creating nested group'
end
use :optional_params
end
post do
Loading
Loading
Loading
Loading
@@ -133,7 +133,10 @@ module API
expose :web_url
expose :request_access_enabled
expose :full_name, :full_path
expose :parent_id
if ::Group.supports_nested_groups?
expose :parent_id
end
 
expose :statistics, if: :statistics do
with_options format_with: -> (value) { value.to_i } do
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment