Skip to content
Snippets Groups Projects
Verified Commit b9a57a93 authored by Yorick Peterse's avatar Yorick Peterse
Browse files

Smarter refreshing of authorized projects

Prior to this commit the refreshing of authorized projects was done in
two steps:

1. Remove existing authorizations
2. Insert a new list of all authorizations

This can lead to a high amount of dead tuples as every time all rows are
being replaced. For example, if a user with 100 authorizations is given
access to a new project this would lead to:

* 100 rows being removed
* 101 new rows being inserted

This commit changes the way this system works so it only removes/inserts
what is necessary. Using the above example this would lead to only 1 new
row being inserted, with the initial 100 being left untouched.

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/25257
parent a50cd9eb
No related branches found
No related tags found
No related merge requests found
Pipeline #
Loading
Loading
@@ -5,4 +5,17 @@ class ProjectAuthorization < ActiveRecord::Base
validates :project, presence: true
validates :access_level, inclusion: { in: Gitlab::Access.all_values }, presence: true
validates :user, uniqueness: { scope: [:project, :access_level] }, presence: true
def self.insert_authorizations(rows, per_batch = 1000)
rows.each_slice(per_batch) do |slice|
tuples = slice.map do |tuple|
tuple.map { |value| connection.quote(value) }
end
connection.execute <<-EOF.strip_heredoc
INSERT INTO project_authorizations (user_id, project_id, access_level)
VALUES #{tuples.map { |tuple| "(#{tuple.join(', ')})" }.join(', ')}
EOF
end
end
end
Loading
Loading
@@ -443,22 +443,16 @@ class User < ActiveRecord::Base
end
 
def refresh_authorized_projects
transaction do
project_authorizations.delete_all
# project_authorizations_union can return multiple records for the same
# project/user with different access_level so we take row with the maximum
# access_level
project_authorizations.connection.execute <<-SQL
INSERT INTO project_authorizations (user_id, project_id, access_level)
SELECT user_id, project_id, MAX(access_level) AS access_level
FROM (#{project_authorizations_union.to_sql}) sub
GROUP BY user_id, project_id
SQL
unless authorized_projects_populated
update_column(:authorized_projects_populated, true)
end
Users::RefreshAuthorizedProjectsService.new(self).execute
end
def remove_project_authorizations(project_ids)
project_authorizations.where(id: project_ids).delete_all
end
def set_authorized_projects_column
unless authorized_projects_populated
update_column(:authorized_projects_populated, true)
end
end
 
Loading
Loading
@@ -905,18 +899,6 @@ class User < ActiveRecord::Base
 
private
 
# Returns a union query of projects that the user is authorized to access
def project_authorizations_union
relations = [
personal_projects.select("#{id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
groups_projects.select_for_project_authorization,
projects.select_for_project_authorization,
groups.joins(:shared_projects).select_for_project_authorization
]
Gitlab::SQL::Union.new(relations)
end
def ci_projects_union
scope = { access_level: [Gitlab::Access::MASTER, Gitlab::Access::OWNER] }
groups = groups_projects.where(members: scope)
Loading
Loading
module Users
# Service for refreshing the authorized projects of a user.
#
# This particular service class can not be used to update data for the same
# user concurrently. Doing so could lead to an incorrect state. To ensure this
# doesn't happen a caller must synchronize access (e.g. using
# `Gitlab::ExclusiveLease`).
#
# Usage:
#
# user = User.find_by(username: 'alice')
# service = Users::RefreshAuthorizedProjectsService.new(some_user)
# service.execute
class RefreshAuthorizedProjectsService
attr_reader :user
LEASE_TIMEOUT = 1.minute.to_i
# user - The User for which to refresh the authorized projects.
def initialize(user)
@user = user
# We need an up to date User object that has access to all relations that
# may have been created earlier. The only way to ensure this is to reload
# the User object.
user.reload
end
def execute
current = current_authorizations_per_project
fresh = fresh_access_levels_per_project
remove = current.each_with_object([]) do |(project_id, row), array|
# rows not in the new list or with a different access level should be
# removed.
if !fresh[project_id] || fresh[project_id] != row.access_level
array << row.id
end
end
add = fresh.each_with_object([]) do |(project_id, level), array|
# rows not in the old list or with a different access level should be
# added.
if !current[project_id] || current[project_id].access_level != level
array << [user.id, project_id, level]
end
end
update_with_lease(remove, add)
end
# Updates thel ist of authorizations using an exclusive lease.
def update_with_lease(remove = [], add = [])
lease_key = "refresh_authorized_projects:#{user.id}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. If we don't do so we may end up
# not updating the list of authorized projects properly. To prevent
# hammering Redis too much we'll wait for a bit between retries.
sleep(1)
end
begin
update_authorizations(remove, add)
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
end
# Updates the list of authorizations for the current user.
#
# remove - The IDs of the authorization rows to remove.
# add - Rows to insert in the form `[user id, project id, access level]`
def update_authorizations(remove = [], add = [])
return if remove.empty? && add.empty?
User.transaction do
user.remove_project_authorizations(remove) unless remove.empty?
ProjectAuthorization.insert_authorizations(add) unless add.empty?
user.set_authorized_projects_column
end
# Since we batch insert authorization rows, Rails' associations may get
# out of sync. As such we force a reload of the User object.
user.reload
end
def fresh_access_levels_per_project
fresh_authorizations.each_with_object({}) do |row, hash|
hash[row.project_id] = row.access_level
end
end
def current_authorizations_per_project
current_authorizations.each_with_object({}) do |row, hash|
hash[row.project_id] = row
end
end
def current_authorizations
user.project_authorizations.select(:id, :project_id, :access_level)
end
def fresh_authorizations
ProjectAuthorization.
unscoped.
select('project_id, MAX(access_level) AS access_level').
from("(#{project_authorizations_union.to_sql}) #{ProjectAuthorization.table_name}").
group(:project_id)
end
private
# Returns a union query of projects that the user is authorized to access
def project_authorizations_union
relations = [
user.personal_projects.select("#{user.id} AS user_id, projects.id AS project_id, #{Gitlab::Access::MASTER} AS access_level"),
user.groups_projects.select_for_project_authorization,
user.projects.select_for_project_authorization,
user.groups.joins(:shared_projects).select_for_project_authorization
]
Gitlab::SQL::Union.new(relations)
end
end
end
Loading
Loading
@@ -2,8 +2,6 @@ class AuthorizedProjectsWorker
include Sidekiq::Worker
include DedicatedSidekiqQueue
 
LEASE_TIMEOUT = 1.minute.to_i
def self.bulk_perform_async(args_list)
Sidekiq::Client.push_bulk('class' => self, 'args' => args_list)
end
Loading
Loading
@@ -11,24 +9,6 @@ class AuthorizedProjectsWorker
def perform(user_id)
user = User.find_by(id: user_id)
 
refresh(user) if user
end
def refresh(user)
lease_key = "refresh_authorized_projects:#{user.id}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. If we don't do so we may end up
# not updating the list of authorized projects properly. To prevent
# hammering Redis too much we'll wait for a bit between retries.
sleep(1)
end
begin
user.refresh_authorized_projects
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
user.refresh_authorized_projects if user
end
end
require 'spec_helper'
describe ProjectAuthorization do
let(:user) { create(:user) }
let(:project1) { create(:empty_project) }
let(:project2) { create(:empty_project) }
describe '.insert_authorizations' do
it 'inserts the authorizations' do
described_class.
insert_authorizations([[user.id, project1.id, Gitlab::Access::MASTER]])
expect(user.project_authorizations.count).to eq(1)
end
it 'inserts rows in batches' do
described_class.insert_authorizations([
[user.id, project1.id, Gitlab::Access::MASTER],
[user.id, project2.id, Gitlab::Access::MASTER],
], 1)
expect(user.project_authorizations.count).to eq(2)
end
end
end
require 'spec_helper'
describe Users::RefreshAuthorizedProjectsService do
let(:project) { create(:empty_project) }
let(:user) { project.namespace.owner }
let(:service) { described_class.new(user) }
def create_authorization(project, user, access_level = Gitlab::Access::MASTER)
ProjectAuthorization.
create!(project: project, user: user, access_level: access_level)
end
describe '#execute' do
before do
user.project_authorizations.delete_all
end
it 'updates the authorized projects of the user' do
project2 = create(:empty_project)
to_remove = create_authorization(project2, user)
expect(service).to receive(:update_with_lease).
with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
service.execute
end
it 'sets the access level of a project to the highest available level' do
to_remove = create_authorization(project, user, Gitlab::Access::DEVELOPER)
expect(service).to receive(:update_with_lease).
with([to_remove.id], [[user.id, project.id, Gitlab::Access::MASTER]])
service.execute
end
end
describe '#update_with_lease', :redis do
it 'refreshes the authorizations using a lease' do
expect_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
and_return('foo')
expect(Gitlab::ExclusiveLease).to receive(:cancel).
with(an_instance_of(String), 'foo')
expect(service).to receive(:update_authorizations).with([1], [])
service.update_with_lease([1])
end
end
describe '#update_authorizations' do
it 'does nothing when there are no rows to add and remove' do
expect(user).not_to receive(:remove_project_authorizations)
expect(ProjectAuthorization).not_to receive(:insert_authorizations)
expect(user).not_to receive(:set_authorized_projects_column)
service.update_authorizations([], [])
end
it 'removes authorizations that should be removed' do
authorization = create_authorization(project, user)
service.update_authorizations([authorization.id])
expect(user.project_authorizations).to be_empty
end
it 'inserts authorizations that should be added' do
service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
authorizations = user.project_authorizations
expect(authorizations.length).to eq(1)
expect(authorizations[0].user_id).to eq(user.id)
expect(authorizations[0].project_id).to eq(project.id)
expect(authorizations[0].access_level).to eq(Gitlab::Access::MASTER)
end
it 'populates the authorized projects column' do
# make sure we start with a nil value no matter what the default in the
# factory may be.
user.update(authorized_projects_populated: nil)
service.update_authorizations([], [[user.id, project.id, Gitlab::Access::MASTER]])
expect(user.authorized_projects_populated).to eq(true)
end
end
describe '#fresh_access_levels_per_project' do
let(:hash) { service.fresh_access_levels_per_project }
it 'returns a Hash' do
expect(hash).to be_an_instance_of(Hash)
end
it 'sets the keys to the project IDs' do
expect(hash.keys).to eq([project.id])
end
it 'sets the values to the access levels' do
expect(hash.values).to eq([Gitlab::Access::MASTER])
end
end
describe '#current_authorizations_per_project' do
before { create_authorization(project, user) }
let(:hash) { service.current_authorizations_per_project }
it 'returns a Hash' do
expect(hash).to be_an_instance_of(Hash)
end
it 'sets the keys to the project IDs' do
expect(hash.keys).to eq([project.id])
end
it 'sets the values to the project authorization rows' do
expect(hash.values).to eq([ProjectAuthorization.first])
end
end
describe '#current_authorizations' do
context 'without authorizations' do
it 'returns an empty list' do
expect(service.current_authorizations.empty?).to eq(true)
end
end
context 'with an authorization' do
before { create_authorization(project, user) }
let(:row) { service.current_authorizations.take }
it 'returns the currently authorized projects' do
expect(service.current_authorizations.length).to eq(1)
end
it 'includes the row ID for every row' do
expect(row.id).to be_a_kind_of(Numeric)
end
it 'includes the project ID for every row' do
expect(row.project_id).to eq(project.id)
end
it 'includes the access level for every row' do
expect(row.access_level).to eq(Gitlab::Access::MASTER)
end
end
end
describe '#fresh_authorizations' do
it 'returns the new authorized projects' do
expect(service.fresh_authorizations.length).to eq(1)
end
it 'returns the highest access level' do
project.team.add_guest(user)
rows = service.fresh_authorizations.to_a
expect(rows.length).to eq(1)
expect(rows.first.access_level).to eq(Gitlab::Access::MASTER)
end
context 'every returned row' do
let(:row) { service.fresh_authorizations.take }
it 'includes the project ID' do
expect(row.project_id).to eq(project.id)
end
it 'includes the access level' do
expect(row.access_level).to eq(Gitlab::Access::MASTER)
end
end
end
end
Loading
Loading
@@ -7,27 +7,17 @@ describe AuthorizedProjectsWorker do
it "refreshes user's authorized projects" do
user = create(:user)
 
expect(worker).to receive(:refresh).with(an_instance_of(User))
expect_any_instance_of(User).to receive(:refresh_authorized_projects)
 
worker.perform(user.id)
end
 
context "when the user is not found" do
it "does nothing" do
expect(worker).not_to receive(:refresh)
expect_any_instance_of(User).not_to receive(:refresh_authorized_projects)
 
described_class.new.perform(-1)
end
end
end
describe '#refresh', redis: true do
it 'refreshes the authorized projects of the user' do
user = create(:user)
expect(user).to receive(:refresh_authorized_projects)
worker.refresh(user)
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment