Skip to content
Snippets Groups Projects
Unverified Commit 5b5a8ca0 authored by Yorick Peterse's avatar Yorick Peterse Committed by Yorick Peterse
Browse files

WIP: Support multiple databases in the DB LB

Changelog: added
parent 4edc6582
No related branches found
No related tags found
No related merge requests found
Showing
with 201 additions and 364 deletions
# frozen_string_literal: true
 
ActiveRecord::Base.singleton_class.attr_accessor :load_balancing_proxy
Gitlab::Database.main.disable_prepared_statements
Gitlab::Application.configure do |config|
config.middleware.use(Gitlab::Database::LoadBalancing::RackMiddleware)
end
 
# This hijacks the "connection" method to ensure both
# `ActiveRecord::Base.connection` and all models use the same load
# balancing proxy.
ActiveRecord::Base.singleton_class.prepend(Gitlab::Database::LoadBalancing::ActiveRecordProxy)
# The load balancer needs to be configured immediately, and re-configured after
# forking. This ensures queries that run before forking use the load balancer,
# and queries running after a fork don't run into any errors when using dead
# database connections.
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63485 for more
# information.
setup = proc do
lb = Gitlab::Database::LoadBalancing::LoadBalancer.new(
Gitlab::Database::LoadBalancing.configuration,
primary_only: !Gitlab::Database::LoadBalancing.enable_replicas?
)
ActiveRecord::Base.load_balancing_proxy =
Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
# Populate service discovery immediately if it is configured
Gitlab::Database::LoadBalancing.perform_service_discovery
end
setup.call
# Database queries may be run before we fork, so we must set up the load
# balancer as early as possible. When we do fork, we need to make sure all the
# hosts are disconnected.
Gitlab::Cluster::LifecycleEvents.on_before_fork do
Gitlab::Database::LoadBalancing.proxy.load_balancer.disconnect!
end
# Service discovery only needs to run in the worker processes, as the main one
# won't be running many (if any) database queries.
Gitlab::Cluster::LifecycleEvents.on_worker_start do
setup.call
Gitlab::Database::LoadBalancing.start_service_discovery
Gitlab::Database::LoadBalancing::MODELS.each do |model|
# The load balancer needs to be configured immediately, and re-configured
# after forking. This ensures queries that run before forking use the load
# balancer, and queries running after a fork don't run into any errors when
# using dead database connections.
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63485 for more
# information.
Gitlab::Database::LoadBalancing::Setup.new(model).setup
# Database queries may be run before we fork, so we must set up the load
# balancer as early as possible. When we do fork, we need to make sure all the
# hosts are disconnected.
Gitlab::Cluster::LifecycleEvents.on_before_fork do
model.connection.disconnect!
end
# Service discovery only needs to run in the worker processes, as the main one
# won't be running many (if any) database queries.
Gitlab::Cluster::LifecycleEvents.on_worker_start do
Gitlab::Database::LoadBalancing::Setup
.new(model, start_service_discovery: true)
.setup
end
end
Loading
Loading
@@ -18,44 +18,30 @@ module LoadBalancing
ActiveRecord::ConnectionNotEstablished
].freeze
 
def self.proxy
ActiveRecord::Base.load_balancing_proxy
end
# Returns a Hash containing the load balancing configuration.
def self.configuration
@configuration ||= Configuration.for_model(ActiveRecord::Base)
end
# The models to enable the load balancer for.
MODELS = [
# We use ActiveRecord::Base instead of ApplicationRecord to ensure
# that code that doesn't use ApplicationRecord also goes through the
# load balancer.
ActiveRecord::Base,
::Ci::CiDatabaseRecord
].freeze
 
# Returns `true` if the use of load balancing replicas should be enabled.
#
# This is disabled for Rake tasks to ensure e.g. database migrations
# always produce consistent results.
def self.enable_replicas?
return false if Gitlab::Runtime.rake?
def self.each_load_balancer
return to_enum(__method__) unless block_given?
 
configured?
end
def self.configured?
configuration.load_balancing_enabled? ||
configuration.service_discovery_enabled?
MODELS.each do |model|
yield model.connection.load_balancer
end
end
 
def self.start_service_discovery
return unless configuration.service_discovery_enabled?
ServiceDiscovery
.new(proxy.load_balancer, **configuration.service_discovery)
.start
def self.release_hosts
each_load_balancer(&:release_host)
end
 
def self.perform_service_discovery
return unless configuration.service_discovery_enabled?
ServiceDiscovery
.new(proxy.load_balancer, **configuration.service_discovery)
.perform_service_discovery
# TODO: remove
def self.proxy
ActiveRecord::Base.connection
end
 
DB_ROLES = [
Loading
Loading
Loading
Loading
@@ -16,7 +16,7 @@ def self.wrapper
 
inner.call
ensure
::Gitlab::Database::LoadBalancing.proxy.load_balancer.release_host
::Gitlab::Database::LoadBalancing.release_hosts
::Gitlab::Database::LoadBalancing::Session.clear_session
end
end
Loading
Loading
# frozen_string_literal: true
module Gitlab
module Database
module LoadBalancing
# Module injected into ActiveRecord::Base to allow hijacking of the
# "connection" method.
module ActiveRecordProxy
def connection
::Gitlab::Database::LoadBalancing.proxy
end
end
end
end
end
Loading
Loading
@@ -66,7 +66,14 @@ def initialize(model, hosts = [])
}
end
 
# Returns `true` if the use of load balancing replicas should be
# enabled.
#
# This is disabled for Rake tasks to ensure e.g. database migrations
# always produce consistent results.
def load_balancing_enabled?
return false if Gitlab::Runtime.rake?
hosts.any? || service_discovery_enabled?
end
 
Loading
Loading
Loading
Loading
@@ -12,22 +12,26 @@ class LoadBalancer
 
REPLICA_SUFFIX = '_replica'
 
attr_reader :host_list, :configuration
attr_reader :name, :host_list, :configuration
 
# configuration - An instance of `LoadBalancing::Configuration` that
# contains the configuration details (such as the hosts)
# for this load balancer.
# primary_only - If set, the replicas are ignored and the primary is
# always used.
def initialize(configuration, primary_only: false)
def initialize(configuration)
@configuration = configuration
@primary_only = primary_only
@host_list =
if primary_only
HostList.new([PrimaryHost.new(self)])
else
if configuration.load_balancing_enabled?
HostList.new(configuration.hosts.map { |addr| Host.new(addr, self) })
else
HostList.new([PrimaryHost.new(self)])
end
name = @configuration.model.connection_db_config.name.to_sym
# If no explicit connection names are specified, "primary" is used. We
# normalize this to "main" so callers of this method don't have to
# worry about both.
@name = name == :primary ? :main : name
end
 
def disconnect!(timeout: 120)
Loading
Loading
Loading
Loading
@@ -68,14 +68,10 @@ def stick_if_necessary(env)
end
 
def clear
load_balancer.release_host
::Gitlab::Database::LoadBalancing.release_hosts
::Gitlab::Database::LoadBalancing::Session.clear_session
end
 
def load_balancer
::Gitlab::Database::LoadBalancing.proxy.load_balancer
end
# Determines the sticking namespace and identifier based on the Rack
# environment.
#
Loading
Loading
# frozen_string_literal: true
module Gitlab
module Database
module LoadBalancing
# Class for setting up load balancing of a specific model.
class Setup
def initialize(model, start_service_discovery: false)
@model = model
@configuration = Configuration.for_model(model)
@start_service_discovery = start_service_discovery
end
def setup
disable_prepared_statements
setup_load_balancer
setup_service_discovery
end
def disable_prepared_statements
db_config_object = @model.connection_db_config
config =
db_config_object.configuration_hash.merge(prepared_statements: false)
hash_config = ActiveRecord::DatabaseConfigurations::HashConfig.new(
db_config_object.env_name,
db_config_object.name,
config
)
@model.establish_connection(hash_config)
end
def setup_load_balancer
lb = LoadBalancer.new(@configuration)
# We just use a simple `class_attribute` here so we don't need to
# inject any modules and/or expose unnecessary methods.
@model.class_attribute(:connection)
@model.connection = ConnectionProxy.new(lb)
end
def setup_service_discovery
return unless @configuration.service_discovery_enabled?
lb = @model.connection.load_balancer
sv = ServiceDiscovery.new(lb, **@configuration.service_discovery)
sv.perform_service_discovery
sv.start if @start_service_discovery
end
end
end
end
end
Loading
Loading
@@ -30,26 +30,23 @@ def load_balancing_enabled?(worker_class)
end
 
def set_data_consistency_locations!(job)
# Once we add support for multiple databases to our load balancer, we would use something like this:
# job['wal_locations'] = Gitlab::Database::DATABASES.transform_values do |connection|
# connection.load_balancer.primary_write_location
# end
#
job['wal_locations'] = { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => wal_location } if wal_location
end
locations = {}
 
def wal_location
strong_memoize(:wal_location) do
if Session.current.use_primary?
load_balancer.primary_write_location
else
load_balancer.host.database_replica_location
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
if (location = wal_location_for(lb))
locations[lb.name] = location
end
end
job['wal_locations'] = locations
end
 
def load_balancer
LoadBalancing.proxy.load_balancer
def wal_location_for(load_balancer)
if Session.current.use_primary?
load_balancer.primary_write_location
else
load_balancer.host.database_replica_location
end
end
end
end
Loading
Loading
Loading
Loading
@@ -29,7 +29,7 @@ def call(worker, job, _queue)
private
 
def clear
release_hosts
LoadBalancing.release_hosts
Session.clear_session
end
 
Loading
Loading
@@ -90,26 +90,14 @@ def not_yet_retried?(job)
end
 
def all_databases_has_replica_caught_up?(wal_locations)
wal_locations.all? do |_config_name, location|
# Once we add support for multiple databases to our load balancer, we would use something like this:
# Gitlab::Database::DATABASES[config_name].load_balancer.select_up_to_date_host(location)
load_balancer.select_up_to_date_host(location)
LoadBalancing.each_load_balancer.all? do |lb|
if (location = wal_locations[lb.name])
lb.select_up_to_date_host(location)
else
false
end
end
end
def release_hosts
# Once we add support for multiple databases to our load balancer, we would use something like this:
# connection.load_balancer.primary_write_location
#
# Gitlab::Database::DATABASES.values.each do |connection|
# connection.load_balancer.release_host
# end
load_balancer.release_host
end
def load_balancer
LoadBalancing.proxy.load_balancer
end
end
end
end
Loading
Loading
Loading
Loading
@@ -5,7 +5,7 @@
RSpec.describe Gitlab::Database::LoadBalancing::ActionCableCallbacks, :request_store do
describe '.wrapper' do
it 'uses primary and then releases the connection and clears the session' do
expect(Gitlab::Database::LoadBalancing).to receive_message_chain(:proxy, :load_balancer, :release_host)
expect(Gitlab::Database::LoadBalancing).to receive(:release_hosts)
expect(Gitlab::Database::LoadBalancing::Session).to receive(:clear_session)
 
described_class.wrapper.call(
Loading
Loading
@@ -18,7 +18,7 @@
 
context 'with an exception' do
it 'releases the connection and clears the session' do
expect(Gitlab::Database::LoadBalancing).to receive_message_chain(:proxy, :load_balancer, :release_host)
expect(Gitlab::Database::LoadBalancing).to receive(:release_hosts)
expect(Gitlab::Database::LoadBalancing::Session).to receive(:clear_session)
 
expect do
Loading
Loading
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::LoadBalancing::ActiveRecordProxy do
describe '#connection' do
it 'returns a connection proxy' do
dummy = Class.new do
include Gitlab::Database::LoadBalancing::ActiveRecordProxy
end
proxy = double(:proxy)
expect(Gitlab::Database::LoadBalancing).to receive(:proxy)
.and_return(proxy)
expect(dummy.new.connection).to eq(proxy)
end
end
end
Loading
Loading
@@ -108,6 +108,14 @@
end
 
describe '#load_balancing_enabled?' do
it 'returns false when running inside a Rake task' do
config = described_class.new(ActiveRecord::Base, %w[foo bar])
allow(Gitlab::Runtime).to receive(:rake?).and_return(true)
expect(config.load_balancing_enabled?).to eq(false)
end
it 'returns true when hosts are configured' do
config = described_class.new(ActiveRecord::Base, %w[foo bar])
 
Loading
Loading
Loading
Loading
@@ -47,10 +47,13 @@ def twice_wrapped_exception(top, middle, original)
end
 
describe '#initialize' do
it 'ignores the hosts when the primary_only option is enabled' do
it 'ignores the hosts when load balancing is disabled' do
config = Gitlab::Database::LoadBalancing::Configuration
.new(ActiveRecord::Base, [db_host])
lb = described_class.new(config, primary_only: true)
allow(config).to receive(:load_balancing_enabled?).and_return(false)
lb = described_class.new(config)
hosts = lb.host_list.hosts
 
expect(hosts.length).to eq(1)
Loading
Loading
@@ -140,10 +143,13 @@ def twice_wrapped_exception(top, middle, original)
.to yield_with_args(ActiveRecord::Base.retrieve_connection)
end
 
it 'uses the primary when the primary_only option is enabled' do
it 'uses the primary when load balancing is disabled' do
config = Gitlab::Database::LoadBalancing::Configuration
.new(ActiveRecord::Base)
lb = described_class.new(config, primary_only: true)
allow(config).to receive(:load_balancing_enabled?).and_return(false)
lb = described_class.new(config)
 
# When no hosts are configured, we don't want to produce any warnings, as
# they aren't useful/too noisy.
Loading
Loading
Loading
Loading
@@ -177,34 +177,18 @@
 
describe '#clear' do
it 'clears the currently used host and session' do
lb = double(:lb)
session = spy(:session)
 
allow(middleware).to receive(:load_balancer).and_return(lb)
expect(lb).to receive(:release_host)
stub_const('Gitlab::Database::LoadBalancing::Session', session)
 
expect(Gitlab::Database::LoadBalancing).to receive(:release_hosts)
middleware.clear
 
expect(session).to have_received(:clear_session)
end
end
 
describe '.load_balancer' do
it 'returns a the load balancer' do
proxy = double(:proxy)
expect(Gitlab::Database::LoadBalancing).to receive(:proxy)
.and_return(proxy)
expect(proxy).to receive(:load_balancer)
middleware.load_balancer
end
end
describe '#sticking_namespaces_and_ids' do
context 'using a Warden request' do
it 'returns the warden user if present' do
Loading
Loading
Loading
Loading
@@ -5,7 +5,6 @@
RSpec.describe Gitlab::Database::LoadBalancing::SidekiqClientMiddleware do
let(:middleware) { described_class.new }
 
let(:load_balancer) { Gitlab::Database::LoadBalancing.proxy.load_balancer }
let(:worker_class) { 'TestDataConsistencyWorker' }
let(:job) { { "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e" } }
 
Loading
Loading
@@ -84,9 +83,15 @@ def perform(*args)
end
 
it 'passes database_replica_location' do
expected_location = { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => location }
expected_location = {}
 
expect(load_balancer).to receive_message_chain(:host, "database_replica_location").and_return(location)
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
expect(lb.host)
.to receive(:database_replica_location)
.and_return(location)
expected_location[lb.name] = location
end
 
run_middleware
 
Loading
Loading
@@ -102,9 +107,15 @@ def perform(*args)
end
 
it 'passes primary write location', :aggregate_failures do
expected_location = { Gitlab::Database::MAIN_DATABASE_NAME.to_sym => location }
expected_location = {}
 
expect(load_balancer).to receive(:primary_write_location).and_return(location)
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
expect(lb)
.to receive(:primary_write_location)
.and_return(location)
expected_location[lb.name] = location
end
 
run_middleware
 
Loading
Loading
@@ -136,8 +147,10 @@ def perform(*args)
let(:job) { { "job_id" => "a180b47c-3fd6-41b8-81e9-34da61c3400e", 'wal_locations' => wal_locations } }
 
before do
allow(load_balancer).to receive(:primary_write_location).and_return(new_location)
allow(load_balancer).to receive(:database_replica_location).and_return(new_location)
Gitlab::Database::LoadBalancing.each_load_balancer do |lb|
allow(lb).to receive(:primary_write_location).and_return(new_location)
allow(lb).to receive(:database_replica_location).and_return(new_location)
end
end
 
shared_examples_for 'does not set database location again' do |use_primary|
Loading
Loading
Loading
Loading
@@ -34,7 +34,7 @@
let(:last_write_location) { 'foo' }
 
before do
allow(described_class).to receive(:load_balancer).and_return(lb)
#TODO:allow(described_class).to receive(:load_balancer).and_return(lb)
 
allow(described_class).to receive(:last_write_location_for)
.with(:user, 42)
Loading
Loading
@@ -100,7 +100,7 @@
let(:lb) { double(:lb) }
 
before do
allow(described_class).to receive(:load_balancer).and_return(lb)
#TODO:allow(described_class).to receive(:load_balancer).and_return(lb)
end
 
it 'simply returns if no write location could be found' do
Loading
Loading
@@ -143,7 +143,7 @@
before do
lb = double(:lb, primary_write_location: 'foo')
 
allow(described_class).to receive(:load_balancer).and_return(lb)
#TODO:allow(described_class).to receive(:load_balancer).and_return(lb)
end
 
it 'sticks an entity to the primary', :aggregate_failures do
Loading
Loading
@@ -177,7 +177,7 @@
it 'updates the write location with the load balancer' do
lb = double(:lb, primary_write_location: 'foo')
 
allow(described_class).to receive(:load_balancer).and_return(lb)
#TODO:allow(described_class).to receive(:load_balancer).and_return(lb)
 
expect(described_class).to receive(:set_write_location_for)
.with(:user, 42, 'foo')
Loading
Loading
@@ -210,24 +210,11 @@
end
end
 
describe '.load_balancer' do
it 'returns a the load balancer' do
proxy = double(:proxy)
expect(Gitlab::Database::LoadBalancing).to receive(:proxy)
.and_return(proxy)
expect(proxy).to receive(:load_balancer)
described_class.load_balancer
end
end
describe '.select_caught_up_replicas' do
let(:lb) { double(:lb) }
 
before do
allow(described_class).to receive(:load_balancer).and_return(lb)
#TODO:allow(described_class).to receive(:load_balancer).and_return(lb)
end
 
context 'with no write location' do
Loading
Loading
Loading
Loading
@@ -3,173 +3,32 @@
require 'spec_helper'
 
RSpec.describe Gitlab::Database::LoadBalancing do
describe '.proxy' do
it 'returns the connection proxy' do
proxy = double(:connection_proxy)
describe '.each_load_balancer' do
it 'yields every load balancer to the supplied block' do
lbs = []
 
allow(ActiveRecord::Base)
.to receive(:load_balancing_proxy)
.and_return(proxy)
expect(described_class.proxy).to eq(proxy)
end
end
describe '.configuration' do
it 'returns the configuration for the load balancer' do
raw = ActiveRecord::Base.connection_db_config.configuration_hash
cfg = described_class.configuration
# There isn't much to test here as the load balancing settings might not
# (and likely aren't) set when running tests.
expect(cfg.pool_size).to eq(raw[:pool])
end
end
describe '.enable_replicas?' do
context 'when hosts are specified' do
before do
allow(described_class.configuration)
.to receive(:hosts)
.and_return(%w(foo))
end
it 'returns true' do
expect(described_class.enable_replicas?).to eq(true)
end
it 'returns true when Sidekiq is being used' do
allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true)
expect(described_class.enable_replicas?).to eq(true)
end
it 'returns false when running inside a Rake task' do
allow(Gitlab::Runtime).to receive(:rake?).and_return(true)
expect(described_class.enable_replicas?).to eq(false)
end
end
context 'when no hosts are specified but service discovery is enabled' do
it 'returns true' do
allow(described_class.configuration).to receive(:hosts).and_return([])
allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(false)
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(true)
expect(described_class.enable_replicas?).to eq(true)
end
end
context 'when no hosts are specified and service discovery is disabled' do
it 'returns false' do
allow(described_class.configuration).to receive(:hosts).and_return([])
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(false)
expect(described_class.enable_replicas?).to eq(false)
described_class.each_load_balancer do |lb|
lbs << lb
end
end
end
 
describe '.configured?' do
it 'returns true when hosts are configured' do
allow(described_class.configuration)
.to receive(:hosts)
.and_return(%w[foo])
expect(described_class.configured?).to eq(true)
expect(lbs.length).to eq(described_class::MODELS.length)
end
 
it 'returns true when service discovery is enabled' do
allow(described_class.configuration).to receive(:hosts).and_return([])
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(true)
it 'returns an Enumerator when no block is given' do
res = described_class.each_load_balancer
 
expect(described_class.configured?).to eq(true)
end
it 'returns false when neither service discovery nor hosts are configured' do
allow(described_class.configuration).to receive(:hosts).and_return([])
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(false)
expect(described_class.configured?).to eq(false)
expect(res.next)
.to be_an_instance_of(Gitlab::Database::LoadBalancing::LoadBalancer)
end
end
 
describe '.start_service_discovery' do
it 'does not start if service discovery is disabled' do
expect(Gitlab::Database::LoadBalancing::ServiceDiscovery)
.not_to receive(:new)
described_class.start_service_discovery
end
it 'starts service discovery if enabled' do
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(true)
instance = double(:instance)
config = Gitlab::Database::LoadBalancing::Configuration
.new(ActiveRecord::Base)
lb = Gitlab::Database::LoadBalancing::LoadBalancer.new(config)
proxy = Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
allow(described_class)
.to receive(:proxy)
.and_return(proxy)
expect(Gitlab::Database::LoadBalancing::ServiceDiscovery)
.to receive(:new)
.with(lb, an_instance_of(Hash))
.and_return(instance)
expect(instance)
.to receive(:start)
described_class.start_service_discovery
end
end
describe '.perform_service_discovery' do
it 'does nothing if service discovery is disabled' do
expect(Gitlab::Database::LoadBalancing::ServiceDiscovery)
.not_to receive(:new)
described_class.perform_service_discovery
end
it 'performs service discovery when enabled' do
allow(described_class.configuration)
.to receive(:service_discovery_enabled?)
.and_return(true)
sv = instance_spy(Gitlab::Database::LoadBalancing::ServiceDiscovery)
cfg = Gitlab::Database::LoadBalancing::Configuration
.new(ActiveRecord::Base)
lb = Gitlab::Database::LoadBalancing::LoadBalancer.new(cfg)
proxy = Gitlab::Database::LoadBalancing::ConnectionProxy.new(lb)
allow(described_class)
.to receive(:proxy)
.and_return(proxy)
expect(Gitlab::Database::LoadBalancing::ServiceDiscovery)
.to receive(:new)
.with(lb, cfg.service_discovery)
.and_return(sv)
expect(sv).to receive(:perform_service_discovery)
describe '.release_hosts' do
it 'releases the host of every load balancer' do
described_class.each_load_balancer do |lb|
expect(lb).to receive(:release_host)
end
 
described_class.perform_service_discovery
described_class.release_hosts
end
end
 
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment