Skip to content
Snippets Groups Projects
Commit 691a1fa5 authored by Valery Sizov's avatar Valery Sizov
Browse files

Geo: Implement PITR recovery before promotion of secondary node

If replication was paused on secondary, we need to use the last point
in time for the database when we promote the node.
parent 0fd1c7a6
No related branches found
No related tags found
No related merge requests found
---
title: 'Geo: Implement PITR recovery before promotion of secondary node'
merge_request: 4636
author:
type: fixed
Loading
Loading
@@ -3,8 +3,14 @@ require 'rainbow/ext/string'
 
module Geo
class PromoteToPrimaryNode
def initialize(base_path, options)
@base_path = base_path
PITR_FILE_NAME = 'geo-pitr-file'.freeze
attr_accessor :base_path, :data_path, :ctl
def initialize(instance, options)
@base_path = instance.base_path
@data_path = instance.data_path
@ctl = instance
@options = options
end
 
Loading
Loading
@@ -24,6 +30,10 @@ module Geo
 
private
 
def postgresql_version
@postgresql_version ||= GitlabCtl::PostgreSQL.postgresql_version(data_path)
end
def run_preflight_checks
return true if @options[:skip_preflight_checks]
 
Loading
Loading
@@ -61,6 +71,8 @@ module Geo
end
 
def promote_postgresql_to_primary
return if recovery_to_point_in_time
puts
puts 'Promoting the PostgreSQL to primary...'.color(:yellow)
puts
Loading
Loading
@@ -68,6 +80,61 @@ module Geo
run_command('/opt/gitlab/embedded/bin/gitlab-pg-ctl promote', live: true).error!
end
 
def recovery_to_point_in_time
lsn = lsn_from_pitr_file
return unless lsn
puts
puts "Recovery to point #{lsn}...".color(:yellow)
puts
write_recovery_settings(lsn)
run_command('gitlab-ctl restart postgresql', live: true).error!
end
def lsn_from_pitr_file
geo_pitr_file = "#{data_path}/postgresql/data/#{PITR_FILE_NAME}"
return nil unless File.exist?(geo_pitr_file)
File.read(geo_pitr_file)
end
def built_recovery_setting_for_pitr(lsn)
<<-EOF
recovery_target_lsn = '#{lsn}'
recovery_target_action = 'promote'
EOF
end
def write_recovery_settings(lsn)
settings = built_recovery_setting_for_pitr(lsn)
if postgresql_version >= 12
puts "* PostgreSQL 12 or newer. Writing settings to postgresql.conf".color(:green)
write_geo_config_file(settings)
else
puts "* Writing recovery.conf".color(:green)
write_recovery_conf(settings)
end
end
def write_geo_config_file(settings)
geo_conf_file = "#{data_path}/postgresql/data/gitlab-geo.conf"
File.write(geo_conf_file, settings)
end
def write_recovery_conf(settings)
recovery_conf = "#{data_path}/postgresql/data/recovery.conf"
File.write(recovery_conf, settings, mode: "a")
end
def reconfigure
puts
puts 'Reconfiguring...'.color(:yellow)
Loading
Loading
Loading
Loading
@@ -19,6 +19,8 @@ module Geo
 
def execute!
@replication_process.send(@action.to_sym)
process_pitr_file
rescue Geo::PsqlError => e
puts "Postgres encountered an error: #{e.message}"
exit 1
Loading
Loading
@@ -33,6 +35,30 @@ module Geo
 
private
 
attr_reader :action, :ctl
def process_pitr_file
geo_pitr_file_path = "#{ctl.data_path}/postgresql/data/#{Geo::PromoteToPrimaryNode::PITR_FILE_NAME}"
if action == 'pause'
puts "* Create Geo PITR file".color(:green)
File.write(geo_pitr_file_path, current_lsn)
elsif action == 'resume'
puts "* Remove Geo PITR file".color(:green)
File.delete(geo_pitr_file_path) if File.exist?(geo_pitr_file_path)
end
end
def current_lsn
run_query('SELECT pg_last_wal_replay_lsn()')
end
def run_query(query)
GitlabCtl::Util.get_command_output("gitlab-psql -d postgres -c '#{query}' -q -t").strip
end
def parse_options!
opts_parser = OptionParser.new do |opts|
opts.banner = "Usage: gitlab-ctl replication-process-#{@action} [options]"
Loading
Loading
Loading
Loading
@@ -18,7 +18,7 @@ require "#{base_path}/embedded/service/omnibus-ctl-ee/lib/geo/promote_to_primary
#
 
add_command_under_category('promote-to-primary-node', 'gitlab-geo', 'Promote to primary node', 2) do |cmd_name, *args|
Geo::PromoteToPrimaryNode.new(base_path, get_ctl_options).execute
Geo::PromoteToPrimaryNode.new(self, get_ctl_options).execute
end
 
def get_ctl_options
Loading
Loading
Loading
Loading
@@ -7,7 +7,9 @@ require 'gitlab_ctl/util'
RSpec.describe Geo::PromoteToPrimaryNode, '#execute' do
let(:options) { { skip_preflight_checks: true } }
 
subject(:command) { described_class.new(nil, options) }
let(:instance) { double(base_path: '/opt/gitlab/embedded', data_path: '/var/opt/gitlab/postgresql/data') }
subject(:command) { described_class.new(instance, options) }
 
let(:temp_directory) { Dir.mktmpdir }
let(:gitlab_config_path) { File.join(temp_directory, 'gitlab.rb') }
Loading
Loading
@@ -23,6 +25,62 @@ RSpec.describe Geo::PromoteToPrimaryNode, '#execute' do
FileUtils.rm_rf(temp_directory)
end
 
describe '#promote_postgresql_to_primary' do
before do
allow(STDIN).to receive(:gets).and_return('y')
allow(command).to receive(:run_preflight_checks).and_return(true)
allow(command).to receive(:reconfigure).and_return(true)
allow(command).to receive(:promote_to_primary).and_return(true)
allow(command).to receive(:success_message).and_return(true)
allow(command).to receive(:run_command).and_return(double('error!' => nil))
end
context 'when PITR file does not exist' do
it 'does not run PITR recovery' do
expect(command).not_to receive(:write_recovery_settings)
command.execute
end
end
context 'when PITR file exists' do
let(:lsn) { '16/B374D848' }
before do
allow(command).to receive(:lsn_from_pitr_file).and_return(lsn)
end
it 'runs PITR recovery' do
expect(command).to receive(:write_recovery_settings).with(lsn)
expect { command.execute }.to output(
/Recovery to point #{lsn}/).to_stdout
end
context 'PG version 11' do
it 'runs PITR recovery' do
allow(command).to receive(:postgresql_version).and_return(11)
expect(command).to receive(:write_recovery_conf)
expect { command.execute }.to output(
/\* Writing recovery.conf/).to_stdout
end
end
context 'PG version 12' do
it 'runs PITR recovery' do
allow(command).to receive(:postgresql_version).and_return(12)
expect(command).to receive(:write_geo_config_file)
expect { command.execute }.to output(
/\* PostgreSQL 12 or newer. Writing settings to postgresql.conf/).to_stdout
end
end
end
end
describe '#run_preflight_checks' do
before do
allow(STDIN).to receive(:gets).and_return('y')
Loading
Loading
@@ -57,7 +115,7 @@ RSpec.describe Geo::PromoteToPrimaryNode, '#execute' do
 
it 'passes given options to preflight checks command' do
expect(Geo::PromotionPreflightChecks).to receive(:new).with(
nil, options).and_call_original
'/opt/gitlab/embedded', options).and_call_original
 
command.execute
end
Loading
Loading
Loading
Loading
@@ -4,19 +4,26 @@ $LOAD_PATH << './files/gitlab-ctl-commands-ee/lib'
$LOAD_PATH << './files/gitlab-ctl-commands/lib'
 
require 'geo/replication_toggle_command'
require 'geo/promote_to_primary_node'
require 'gitlab_ctl/util'
 
RSpec.describe Geo::ReplicationToggleCommand do
let(:status) { double('Command status', error?: false) }
let(:arguments) { [] }
let(:ctl_instance) { double('gitlab-ctl instance', base_path: '') }
let(:ctl_instance) { double('gitlab-ctl instance', base_path: '', data_path: 'data_path') }
before do
allow_any_instance_of(Geo::ReplicationToggleCommand).to receive(:current_lsn).and_return('16/B374D848')
end
 
describe 'pause' do
subject { described_class.new(ctl_instance, 'pause', arguments) }
 
it 'calls pause' do
expect_any_instance_of(Geo::ReplicationProcess).to receive(:pause)
expect(File).to receive(:write).with('data_path/postgresql/data/geo-pitr-file', '16/B374D848')
 
subject.execute!
expect { subject.execute! }.to output(/Create Geo PITR file/).to_stdout
end
 
it 'rescues and exits if postgres has an error' do
Loading
Loading
@@ -33,8 +40,9 @@ RSpec.describe Geo::ReplicationToggleCommand do
it 'uses the specified database' do
expect(Geo::ReplicationProcess).to receive(:new).with(any_args, { db_name: 'database_i_want' }).and_call_original
expect_any_instance_of(Geo::ReplicationProcess).to receive(:pause)
expect(File).to receive(:write).with('data_path/postgresql/data/geo-pitr-file', '16/B374D848')
 
subject.execute!
expect { subject.execute! }.to output(/Create Geo PITR file/).to_stdout
end
end
end
Loading
Loading
@@ -45,7 +53,7 @@ RSpec.describe Geo::ReplicationToggleCommand do
it 'calls resume' do
expect_any_instance_of(Geo::ReplicationProcess).to receive(:resume)
 
subject.execute!
expect { subject.execute! }.to output(/Remove Geo PITR file/).to_stdout
end
 
it 'rescues and exits if postgres has an error' do
Loading
Loading
@@ -71,7 +79,7 @@ RSpec.describe Geo::ReplicationToggleCommand do
expect(Geo::ReplicationProcess).to receive(:new).with(any_args, { db_name: 'database_i_want' }).and_call_original
expect_any_instance_of(Geo::ReplicationProcess).to receive(:resume)
 
subject.execute!
expect { subject.execute! }.to output(/Remove Geo PITR file/).to_stdout
end
end
end
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment