Skip to content
Snippets Groups Projects
Commit 26ac691a authored by Paweł Chojnacki's avatar Paweł Chojnacki Committed by Rémy Coutable
Browse files

Instrument Unicorn with Ruby exporter

parent 53c626bc
No related branches found
No related tags found
No related merge requests found
Showing with 425 additions and 47 deletions
Loading
@@ -285,6 +285,7 @@ group :metrics do
Loading
@@ -285,6 +285,7 @@ group :metrics do
   
# Prometheus # Prometheus
gem 'prometheus-client-mmap', '~>0.7.0.beta5' gem 'prometheus-client-mmap', '~>0.7.0.beta5'
gem 'raindrops', '~> 0.18'
end end
   
group :development do group :development do
Loading
Loading
Loading
@@ -599,7 +599,7 @@ GEM
Loading
@@ -599,7 +599,7 @@ GEM
premailer-rails (1.9.7) premailer-rails (1.9.7)
actionmailer (>= 3, < 6) actionmailer (>= 3, < 6)
premailer (~> 1.7, >= 1.7.9) premailer (~> 1.7, >= 1.7.9)
prometheus-client-mmap (0.7.0.beta5) prometheus-client-mmap (0.7.0.beta7)
mmap2 (~> 2.2.6) mmap2 (~> 2.2.6)
pry (0.10.4) pry (0.10.4)
coderay (~> 1.1.0) coderay (~> 1.1.0)
Loading
@@ -658,7 +658,7 @@ GEM
Loading
@@ -658,7 +658,7 @@ GEM
thor (>= 0.18.1, < 2.0) thor (>= 0.18.1, < 2.0)
rainbow (2.2.2) rainbow (2.2.2)
rake rake
raindrops (0.17.0) raindrops (0.18.0)
rake (10.5.0) rake (10.5.0)
rblineprof (0.3.6) rblineprof (0.3.6)
debugger-ruby_core_source (~> 1.3) debugger-ruby_core_source (~> 1.3)
Loading
@@ -1062,6 +1062,7 @@ DEPENDENCIES
Loading
@@ -1062,6 +1062,7 @@ DEPENDENCIES
rails-deprecated_sanitizer (~> 1.0.3) rails-deprecated_sanitizer (~> 1.0.3)
rails-i18n (~> 4.0.9) rails-i18n (~> 4.0.9)
rainbow (~> 2.2) rainbow (~> 2.2)
raindrops (~> 0.18)
rblineprof (~> 0.3.6) rblineprof (~> 0.3.6)
rdoc (~> 4.2) rdoc (~> 4.2)
recaptcha (~> 3.0) recaptcha (~> 3.0)
Loading
Loading
Loading
@@ -543,6 +543,10 @@ production: &base
Loading
@@ -543,6 +543,10 @@ production: &base
# enabled: true # enabled: true
# host: localhost # host: localhost
# port: 3808 # port: 3808
prometheus:
# Time between sampling of unicorn socket metrics, in seconds
# unicorn_sampler_interval: 10
   
# #
# 5. Extra customization # 5. Extra customization
Loading
Loading
Loading
@@ -494,6 +494,12 @@ Settings.webpack.dev_server['enabled'] ||= false
Loading
@@ -494,6 +494,12 @@ Settings.webpack.dev_server['enabled'] ||= false
Settings.webpack.dev_server['host'] ||= 'localhost' Settings.webpack.dev_server['host'] ||= 'localhost'
Settings.webpack.dev_server['port'] ||= 3808 Settings.webpack.dev_server['port'] ||= 3808
   
#
# Prometheus metrics settings
#
Settings['prometheus'] ||= Settingslogic.new({})
Settings.prometheus['unicorn_sampler_interval'] ||= 10
# #
# Testing settings # Testing settings
# #
Loading
Loading
Loading
@@ -119,6 +119,13 @@ def instrument_classes(instrumentation)
Loading
@@ -119,6 +119,13 @@ def instrument_classes(instrumentation)
end end
# rubocop:enable Metrics/AbcSize # rubocop:enable Metrics/AbcSize
   
Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.prometheus.unicorn_sampler_interval).start
Gitlab::Application.configure do |config|
# 0 should be Sentry to catch errors in this middleware
config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware)
end
if Gitlab::Metrics.enabled? if Gitlab::Metrics.enabled?
require 'pathname' require 'pathname'
require 'influxdb' require 'influxdb'
Loading
@@ -175,7 +182,7 @@ if Gitlab::Metrics.enabled?
Loading
@@ -175,7 +182,7 @@ if Gitlab::Metrics.enabled?
   
GC::Profiler.enable GC::Profiler.enable
   
Gitlab::Metrics::Sampler.new.start Gitlab::Metrics::InfluxSampler.initialize_instance.start
   
module TrackNewRedisConnections module TrackNewRedisConnections
def connect(*args) def connect(*args)
Loading
Loading
require 'logger'
module Gitlab
module Metrics
class BaseSampler
def self.initialize_instance(*args)
raise "#{name} singleton instance already initialized" if @instance
@instance = new(*args)
at_exit(&@instance.method(:stop))
@instance
end
def self.instance
@instance
end
attr_reader :running
# interval - The sampling interval in seconds.
def initialize(interval)
interval_half = interval.to_f / 2
@interval = interval
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
@mutex = Mutex.new
end
def enabled?
true
end
def start
return unless enabled?
@mutex.synchronize do
return if running
@running = true
@thread = Thread.new do
sleep(sleep_interval)
while running
safe_sample
sleep(sleep_interval)
end
end
end
end
def stop
@mutex.synchronize do
return unless running
@running = false
if @thread
@thread.wakeup if @thread.alive?
@thread.join
@thread = nil
end
end
end
def safe_sample
sample
rescue => e
Rails.logger.warn("#{self.class}: #{e}, stopping")
stop
end
def sample
raise NotImplementedError
end
# Returns the sleep interval with a random adjustment.
#
# The random adjustment is put in place to ensure we:
#
# 1. Don't generate samples at the exact same interval every time (thus
# potentially missing anything that happens in between samples).
# 2. Don't sample data at the same interval two times in a row.
def sleep_interval
while step = @interval_steps.sample
if step != @last_step
@last_step = step
return @interval + @last_step
end
end
end
end
end
end
module Gitlab
module Metrics
class ConnectionRackMiddleware
def initialize(app)
@app = app
end
def self.rack_request_count
@rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count')
end
def self.rack_response_count
@rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count')
end
def self.rack_uncaught_errors_count
@rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count')
end
def self.rack_execution_time
@rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time',
{}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10])
end
def call(env)
method = env['REQUEST_METHOD'].downcase
started = Time.now.to_f
begin
ConnectionRackMiddleware.rack_request_count.increment(method: method)
status, headers, body = @app.call(env)
ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status)
[status, headers, body]
rescue
ConnectionRackMiddleware.rack_uncaught_errors_count.increment
raise
ensure
elapsed = Time.now.to_f - started
ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed)
end
end
end
end
end
Loading
@@ -5,14 +5,11 @@ module Gitlab
Loading
@@ -5,14 +5,11 @@ module Gitlab
# This class is used to gather statistics that can't be directly associated # This class is used to gather statistics that can't be directly associated
# with a transaction such as system memory usage, garbage collection # with a transaction such as system memory usage, garbage collection
# statistics, etc. # statistics, etc.
class Sampler class InfluxSampler < BaseSampler
# interval - The sampling interval in seconds. # interval - The sampling interval in seconds.
def initialize(interval = Metrics.settings[:sample_interval]) def initialize(interval = Metrics.settings[:sample_interval])
interval_half = interval.to_f / 2 super(interval)
@last_step = nil
@interval = interval
@interval_steps = (-interval_half..interval_half).step(0.1).to_a
@last_step = nil
   
@metrics = [] @metrics = []
   
Loading
@@ -26,18 +23,6 @@ module Gitlab
Loading
@@ -26,18 +23,6 @@ module Gitlab
end end
end end
   
def start
Thread.new do
Thread.current.abort_on_exception = true
loop do
sleep(sleep_interval)
sample
end
end
end
def sample def sample
sample_memory_usage sample_memory_usage
sample_file_descriptors sample_file_descriptors
Loading
@@ -86,7 +71,7 @@ module Gitlab
Loading
@@ -86,7 +71,7 @@ module Gitlab
end end
   
def sample_gc def sample_gc
time = GC::Profiler.total_time * 1000.0 time = GC::Profiler.total_time * 1000.0
stats = GC.stat.merge(total_time: time) stats = GC.stat.merge(total_time: time)
   
# We want the difference of GC runs compared to the last sample, not the # We want the difference of GC runs compared to the last sample, not the
Loading
@@ -111,23 +96,6 @@ module Gitlab
Loading
@@ -111,23 +96,6 @@ module Gitlab
def sidekiq? def sidekiq?
Sidekiq.server? Sidekiq.server?
end end
# Returns the sleep interval with a random adjustment.
#
# The random adjustment is put in place to ensure we:
#
# 1. Don't generate samples at the exact same interval every time (thus
# potentially missing anything that happens in between samples).
# 2. Don't sample data at the same interval two times in a row.
def sleep_interval
while step = @interval_steps.sample
if step != @last_step
@last_step = step
return @interval + @last_step
end
end
end
end end
end end
end end
Loading
@@ -29,8 +29,8 @@ module Gitlab
Loading
@@ -29,8 +29,8 @@ module Gitlab
provide_metric(name) || registry.summary(name, docstring, base_labels) provide_metric(name) || registry.summary(name, docstring, base_labels)
end end
   
def gauge(name, docstring, base_labels = {}) def gauge(name, docstring, base_labels = {}, multiprocess_mode = :all)
provide_metric(name) || registry.gauge(name, docstring, base_labels) provide_metric(name) || registry.gauge(name, docstring, base_labels, multiprocess_mode)
end end
   
def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS) def histogram(name, docstring, base_labels = {}, buckets = ::Prometheus::Client::Histogram::DEFAULT_BUCKETS)
Loading
Loading
module Gitlab
module Metrics
class UnicornSampler < BaseSampler
def initialize(interval)
super(interval)
end
def unicorn_active_connections
@unicorn_active_connections ||= Gitlab::Metrics.gauge(:unicorn_active_connections, 'Unicorn active connections', {}, :max)
end
def unicorn_queued_connections
@unicorn_queued_connections ||= Gitlab::Metrics.gauge(:unicorn_queued_connections, 'Unicorn queued connections', {}, :max)
end
def enabled?
# Raindrops::Linux.tcp_listener_stats is only present on Linux
unicorn_with_listeners? && Raindrops::Linux.respond_to?(:tcp_listener_stats)
end
def sample
Raindrops::Linux.tcp_listener_stats(tcp_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'tcp', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'tcp', address: addr }, stats.queued)
end
Raindrops::Linux.unix_listener_stats(unix_listeners).each do |addr, stats|
unicorn_active_connections.set({ type: 'unix', address: addr }, stats.active)
unicorn_queued_connections.set({ type: 'unix', address: addr }, stats.queued)
end
end
private
def tcp_listeners
@tcp_listeners ||= Unicorn.listener_names.grep(%r{\A[^/]+:\d+\z})
end
def unix_listeners
@unix_listeners ||= Unicorn.listener_names - tcp_listeners
end
def unicorn_with_listeners?
defined?(Unicorn) && Unicorn.listener_names.any?
end
end
end
end
require 'spec_helper' require 'spec_helper'
require_relative '../../config/initializers/8_metrics'
   
describe 'instrument_classes', lib: true do describe 'instrument_classes', lib: true do
let(:config) { double(:config) } let(:config) { double(:config) }
   
let(:unicorn_sampler) { double(:unicorn_sampler) }
let(:influx_sampler) { double(:influx_sampler) }
before do before do
allow(config).to receive(:instrument_method) allow(config).to receive(:instrument_method)
allow(config).to receive(:instrument_methods) allow(config).to receive(:instrument_methods)
allow(config).to receive(:instrument_instance_method) allow(config).to receive(:instrument_instance_method)
allow(config).to receive(:instrument_instance_methods) allow(config).to receive(:instrument_instance_methods)
allow(Gitlab::Metrics::UnicornSampler).to receive(:initialize_instance).and_return(unicorn_sampler)
allow(Gitlab::Metrics::InfluxSampler).to receive(:initialize_instance).and_return(influx_sampler)
allow(unicorn_sampler).to receive(:start)
allow(influx_sampler).to receive(:start)
allow(Gitlab::Application).to receive(:configure)
end end
   
it 'can autoload and instrument all files' do it 'can autoload and instrument all files' do
require_relative '../../config/initializers/8_metrics'
expect { instrument_classes(config) }.not_to raise_error expect { instrument_classes(config) }.not_to raise_error
end end
end end
require 'spec_helper'
describe Gitlab::Metrics::ConnectionRackMiddleware do
let(:app) { double('app') }
subject { described_class.new(app) }
around do |example|
Timecop.freeze { example.run }
end
describe '#call' do
let(:status) { 100 }
let(:env) { { 'REQUEST_METHOD' => 'GET' } }
let(:stack_result) { [status, {}, 'body'] }
before do
allow(app).to receive(:call).and_return(stack_result)
end
context '@app.call succeeds with 200' do
before do
allow(app).to receive(:call).and_return([200, nil, nil])
end
it 'increments response count with status label' do
expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get'))
subject.call(env)
end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
subject.call(env)
end
it 'measures execution time' do
execution_time = 10
allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds)
end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
subject.call(env)
end
end
context '@app.call throws exception' do
let(:rack_response_count) { double('rack_response_count') }
before do
allow(app).to receive(:call).and_raise(StandardError)
allow(described_class).to receive(:rack_response_count).and_return(rack_response_count)
end
it 'increments exceptions count' do
expect(described_class).to receive_message_chain(:rack_uncaught_errors_count, :increment)
expect { subject.call(env) }.to raise_error(StandardError)
end
it 'increments requests count' do
expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get')
expect { subject.call(env) }.to raise_error(StandardError)
end
it "does't increment response count" do
expect(described_class.rack_response_count).not_to receive(:increment)
expect { subject.call(env) }.to raise_error(StandardError)
end
it 'measures execution time' do
execution_time = 10
allow(app).to receive(:call) do |*args|
Timecop.freeze(execution_time.seconds)
raise StandardError
end
expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time)
expect { subject.call(env) }.to raise_error(StandardError)
end
end
end
end
require 'spec_helper' require 'spec_helper'
   
describe Gitlab::Metrics::Sampler do describe Gitlab::Metrics::InfluxSampler do
let(:sampler) { described_class.new(5) } let(:sampler) { described_class.new(5) }
   
after do after do
Loading
@@ -8,10 +8,10 @@ describe Gitlab::Metrics::Sampler do
Loading
@@ -8,10 +8,10 @@ describe Gitlab::Metrics::Sampler do
end end
   
describe '#start' do describe '#start' do
it 'gathers a sample at a given interval' do it 'runs once and gathers a sample at a given interval' do
expect(sampler).to receive(:sleep).with(a_kind_of(Numeric)) expect(sampler).to receive(:sleep).with(a_kind_of(Numeric)).twice
expect(sampler).to receive(:sample) expect(sampler).to receive(:sample).once
expect(sampler).to receive(:loop).and_yield expect(sampler).to receive(:running).and_return(false, true, false)
   
sampler.start.join sampler.start.join
end end
Loading
Loading
require 'spec_helper'
describe Gitlab::Metrics::UnicornSampler do
subject { described_class.new(1.second) }
describe '#sample' do
let(:unicorn) { double('unicorn') }
let(:raindrops) { double('raindrops') }
let(:stats) { double('stats') }
before do
stub_const('Unicorn', unicorn)
stub_const('Raindrops::Linux', raindrops)
allow(raindrops).to receive(:unix_listener_stats).and_return({})
allow(raindrops).to receive(:tcp_listener_stats).and_return({})
end
context 'unicorn listens on unix sockets' do
let(:socket_address) { '/some/sock' }
let(:sockets) { [socket_address] }
before do
allow(unicorn).to receive(:listener_names).and_return(sockets)
end
it 'samples socket data' do
expect(raindrops).to receive(:unix_listener_stats).with(sockets)
subject.sample
end
context 'stats collected' do
before do
allow(stats).to receive(:active).and_return('active')
allow(stats).to receive(:queued).and_return('queued')
allow(raindrops).to receive(:unix_listener_stats).and_return({ socket_address => stats })
end
it 'updates metrics type unix and with addr' do
labels = { type: 'unix', address: socket_address }
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
subject.sample
end
end
end
context 'unicorn listens on tcp sockets' do
let(:tcp_socket_address) { '0.0.0.0:8080' }
let(:tcp_sockets) { [tcp_socket_address] }
before do
allow(unicorn).to receive(:listener_names).and_return(tcp_sockets)
end
it 'samples socket data' do
expect(raindrops).to receive(:tcp_listener_stats).with(tcp_sockets)
subject.sample
end
context 'stats collected' do
before do
allow(stats).to receive(:active).and_return('active')
allow(stats).to receive(:queued).and_return('queued')
allow(raindrops).to receive(:tcp_listener_stats).and_return({ tcp_socket_address => stats })
end
it 'updates metrics type unix and with addr' do
labels = { type: 'tcp', address: tcp_socket_address }
expect(subject).to receive_message_chain(:unicorn_active_connections, :set).with(labels, 'active')
expect(subject).to receive_message_chain(:unicorn_queued_connections, :set).with(labels, 'queued')
subject.sample
end
end
end
end
describe '#start' do
context 'when enabled' do
before do
allow(subject).to receive(:enabled?).and_return(true)
end
it 'creates new thread' do
expect(Thread).to receive(:new)
subject.start
end
end
context 'when disabled' do
before do
allow(subject).to receive(:enabled?).and_return(false)
end
it "doesn't create new thread" do
expect(Thread).not_to receive(:new)
subject.start
end
end
end
end
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment