diff --git a/app/controllers/sessions_controller.rb b/app/controllers/sessions_controller.rb index e0e72170d1e72753538a51cfac648a0c954eade5..0e8a57f8e0316d1b6a9f1fb92ba03c09b73ade32 100644 --- a/app/controllers/sessions_controller.rb +++ b/app/controllers/sessions_controller.rb @@ -48,7 +48,7 @@ class SessionsController < Devise::SessionsController private def login_counter - @login_counter ||= Gitlab::Metrics.counter(:user_session_logins, 'User sign in count') + @login_counter ||= Gitlab::Metrics.counter(:user_session_logins_total, 'User sign in count') end # Handle an "initial setup" state, where there's only one user, it's an admin, diff --git a/config/initializers/8_metrics.rb b/config/initializers/8_metrics.rb index c80d28746d62c5bdb4e0b62c6a4603046752c0c0..25630b298ce32857620b7a9cdce8406422c20f55 100644 --- a/config/initializers/8_metrics.rb +++ b/config/initializers/8_metrics.rb @@ -123,7 +123,7 @@ Gitlab::Metrics::UnicornSampler.initialize_instance(Settings.monitoring.unicorn_ Gitlab::Application.configure do |config| # 0 should be Sentry to catch errors in this middleware - config.middleware.insert(1, Gitlab::Metrics::ConnectionRackMiddleware) + config.middleware.insert(1, Gitlab::Metrics::RequestsRackMiddleware) end if Gitlab::Metrics.enabled? diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index 07c05b5a6fb229c0e26262a3264eed426f8537b7..edb2dff3e480bd868a0e22bd17c5582294c31f74 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -23,21 +23,24 @@ Currently the embedded Prometheus server is not automatically configured to coll In this experimental phase, only a few metrics are available: -| Metric | Type | Description | -| ------ | ---- | ----------- | -| db_ping_timeout | Gauge | Whether or not the last database ping timed out | -| db_ping_success | Gauge | Whether or not the last database ping succeeded | -| db_ping_latency | Gauge | Round trip time of the database ping | -| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out | -| redis_ping_success | Gauge | Whether or not the last redis ping succeeded | -| redis_ping_latency | Gauge | Round trip time of the redis ping | -| filesystem_access_latency | gauge | Latency in accessing a specific filesystem | -| filesystem_accessible | gauge | Whether or not a specific filesystem is accessible | -| filesystem_write_latency | gauge | Write latency of a specific filesystem | -| filesystem_writable | gauge | Whether or not the filesystem is writable | -| filesystem_read_latency | gauge | Read latency of a specific filesystem | -| filesystem_readable | gauge | Whether or not the filesystem is readable | -| user_sessions_logins | Counter | Counter of how many users have logged in | +| Metric | Type | Description | +| --------------------------------- | --------- | ----------- | +| db_ping_timeout | Gauge | Whether or not the last database ping timed out | +| db_ping_success | Gauge | Whether or not the last database ping succeeded | +| db_ping_latency_seconds | Gauge | Round trip time of the database ping | +| filesystem_access_latency_seconds | Gauge | Latency in accessing a specific filesystem | +| filesystem_accessible | Gauge | Whether or not a specific filesystem is accessible | +| filesystem_write_latency_seconds | Gauge | Write latency of a specific filesystem | +| filesystem_writable | Gauge | Whether or not the filesystem is writable | +| filesystem_read_latency_seconds | Gauge | Read latency of a specific filesystem | +| filesystem_readable | Gauge | Whether or not the filesystem is readable | +| http_requests_total | Counter | Rack request count | +| http_request_duration_seconds | Histogram | HTTP response time from rack middleware | +| rack_uncaught_errors_total | Counter | Rack connections handling uncaught errors count | +| redis_ping_timeout | Gauge | Whether or not the last redis ping timed out | +| redis_ping_success | Gauge | Whether or not the last redis ping succeeded | +| redis_ping_latency_seconds | Gauge | Round trip time of the redis ping | +| user_session_logins_total | Counter | Counter of how many users have logged in | [↠Back to the main Prometheus page](index.md) diff --git a/lib/gitlab/health_checks/fs_shards_check.rb b/lib/gitlab/health_checks/fs_shards_check.rb index 70da4080cae67bce6d3cb0bb4c299cb7edf45007..bebde857b1685c61e898fdab40d929c12c2bfa77 100644 --- a/lib/gitlab/health_checks/fs_shards_check.rb +++ b/lib/gitlab/health_checks/fs_shards_check.rb @@ -35,9 +35,9 @@ module Gitlab repository_storages.flat_map do |storage_name| tmp_file_path = tmp_file_path(storage_name) [ - operation_metrics(:filesystem_accessible, :filesystem_access_latency, -> { storage_stat_test(storage_name) }, shard: storage_name), - operation_metrics(:filesystem_writable, :filesystem_write_latency, -> { storage_write_test(tmp_file_path) }, shard: storage_name), - operation_metrics(:filesystem_readable, :filesystem_read_latency, -> { storage_read_test(tmp_file_path) }, shard: storage_name) + operation_metrics(:filesystem_accessible, :filesystem_access_latency_seconds, -> { storage_stat_test(storage_name) }, shard: storage_name), + operation_metrics(:filesystem_writable, :filesystem_write_latency_seconds, -> { storage_write_test(tmp_file_path) }, shard: storage_name), + operation_metrics(:filesystem_readable, :filesystem_read_latency_seconds, -> { storage_read_test(tmp_file_path) }, shard: storage_name) ].flatten end end diff --git a/lib/gitlab/health_checks/simple_abstract_check.rb b/lib/gitlab/health_checks/simple_abstract_check.rb index fbe1645c1b1050dea4926befa01e12dd0830809f..3dcb28a193c97881393951a8502a3b177e98d077 100644 --- a/lib/gitlab/health_checks/simple_abstract_check.rb +++ b/lib/gitlab/health_checks/simple_abstract_check.rb @@ -20,7 +20,7 @@ module Gitlab [ metric("#{metric_prefix}_timeout", result.is_a?(Timeout::Error) ? 1 : 0), metric("#{metric_prefix}_success", is_successful?(result) ? 1 : 0), - metric("#{metric_prefix}_latency", elapsed) + metric("#{metric_prefix}_latency_seconds", elapsed) ] end end diff --git a/lib/gitlab/metrics/connection_rack_middleware.rb b/lib/gitlab/metrics/connection_rack_middleware.rb deleted file mode 100644 index b3da360be8f3500aaa3fb28bf6fff5e98ba92baa..0000000000000000000000000000000000000000 --- a/lib/gitlab/metrics/connection_rack_middleware.rb +++ /dev/null @@ -1,45 +0,0 @@ -module Gitlab - module Metrics - class ConnectionRackMiddleware - def initialize(app) - @app = app - end - - def self.rack_request_count - @rack_request_count ||= Gitlab::Metrics.counter(:rack_request, 'Rack request count') - end - - def self.rack_response_count - @rack_response_count ||= Gitlab::Metrics.counter(:rack_response, 'Rack response count') - end - - def self.rack_uncaught_errors_count - @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors, 'Rack connections handling uncaught errors count') - end - - def self.rack_execution_time - @rack_execution_time ||= Gitlab::Metrics.histogram(:rack_execution_time, 'Rack connection handling execution time', - {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 1.5, 2, 2.5, 3, 5, 7, 10]) - end - - def call(env) - method = env['REQUEST_METHOD'].downcase - started = Time.now.to_f - begin - ConnectionRackMiddleware.rack_request_count.increment(method: method) - - status, headers, body = @app.call(env) - - ConnectionRackMiddleware.rack_response_count.increment(method: method, status: status) - [status, headers, body] - rescue - ConnectionRackMiddleware.rack_uncaught_errors_count.increment - raise - ensure - elapsed = Time.now.to_f - started - ConnectionRackMiddleware.rack_execution_time.observe({}, elapsed) - end - end - end - end -end diff --git a/lib/gitlab/metrics/requests_rack_middleware.rb b/lib/gitlab/metrics/requests_rack_middleware.rb new file mode 100644 index 0000000000000000000000000000000000000000..0dc19f31d03dc9cf30bf2c68b971e971f8569aea --- /dev/null +++ b/lib/gitlab/metrics/requests_rack_middleware.rb @@ -0,0 +1,40 @@ +module Gitlab + module Metrics + class RequestsRackMiddleware + def initialize(app) + @app = app + end + + def self.http_request_total + @http_request_total ||= Gitlab::Metrics.counter(:http_requests_total, 'Request count') + end + + def self.rack_uncaught_errors_count + @rack_uncaught_errors_count ||= Gitlab::Metrics.counter(:rack_uncaught_errors_total, 'Request handling uncaught errors count') + end + + def self.http_request_duration_seconds + @http_request_duration_seconds ||= Gitlab::Metrics.histogram(:http_request_duration_seconds, 'Request handling execution time', + {}, [0.05, 0.1, 0.25, 0.5, 0.7, 1, 2.5, 5, 10, 25]) + end + + def call(env) + method = env['REQUEST_METHOD'].downcase + started = Time.now.to_f + begin + RequestsRackMiddleware.http_request_total.increment(method: method) + + status, headers, body = @app.call(env) + + elapsed = Time.now.to_f - started + RequestsRackMiddleware.http_request_duration_seconds.observe({ method: method, status: status }, elapsed) + + [status, headers, body] + rescue + RequestsRackMiddleware.rack_uncaught_errors_count.increment + raise + end + end + end + end +end diff --git a/spec/controllers/metrics_controller_spec.rb b/spec/controllers/metrics_controller_spec.rb index 86847c07c09511d0a4577688e82f1dd9f9e23b14..8964d89b438f9df8fc4797312f47c9a557f36038 100644 --- a/spec/controllers/metrics_controller_spec.rb +++ b/spec/controllers/metrics_controller_spec.rb @@ -24,7 +24,7 @@ describe MetricsController do expect(response.body).to match(/^db_ping_timeout 0$/) expect(response.body).to match(/^db_ping_success 1$/) - expect(response.body).to match(/^db_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^db_ping_latency_seconds [0-9\.]+$/) end it 'returns Redis ping metrics' do @@ -32,7 +32,7 @@ describe MetricsController do expect(response.body).to match(/^redis_ping_timeout 0$/) expect(response.body).to match(/^redis_ping_success 1$/) - expect(response.body).to match(/^redis_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^redis_ping_latency_seconds [0-9\.]+$/) end it 'returns Caching ping metrics' do @@ -40,7 +40,7 @@ describe MetricsController do expect(response.body).to match(/^redis_cache_ping_timeout 0$/) expect(response.body).to match(/^redis_cache_ping_success 1$/) - expect(response.body).to match(/^redis_cache_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^redis_cache_ping_latency_seconds [0-9\.]+$/) end it 'returns Queues ping metrics' do @@ -48,7 +48,7 @@ describe MetricsController do expect(response.body).to match(/^redis_queues_ping_timeout 0$/) expect(response.body).to match(/^redis_queues_ping_success 1$/) - expect(response.body).to match(/^redis_queues_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^redis_queues_ping_latency_seconds [0-9\.]+$/) end it 'returns SharedState ping metrics' do @@ -56,17 +56,17 @@ describe MetricsController do expect(response.body).to match(/^redis_shared_state_ping_timeout 0$/) expect(response.body).to match(/^redis_shared_state_ping_success 1$/) - expect(response.body).to match(/^redis_shared_state_ping_latency [0-9\.]+$/) + expect(response.body).to match(/^redis_shared_state_ping_latency_seconds [0-9\.]+$/) end it 'returns file system check metrics' do get :index - expect(response.body).to match(/^filesystem_access_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_access_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_accessible{shard="default"} 1$/) - expect(response.body).to match(/^filesystem_write_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_write_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_writable{shard="default"} 1$/) - expect(response.body).to match(/^filesystem_read_latency{shard="default"} [0-9\.]+$/) + expect(response.body).to match(/^filesystem_read_latency_seconds{shard="default"} [0-9\.]+$/) expect(response.body).to match(/^filesystem_readable{shard="default"} 1$/) end diff --git a/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb b/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb index b333e162909ccab1fd045aef75aa408510b64139..3de73a9ff650a4b73c89a4bed9c375363935f518 100644 --- a/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb +++ b/spec/lib/gitlab/health_checks/fs_shards_check_spec.rb @@ -109,9 +109,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end @@ -127,9 +127,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 1)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 1)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end end @@ -159,9 +159,9 @@ describe Gitlab::HealthChecks::FsShardsCheck do expect(subject).to include(an_object_having_attributes(name: :filesystem_readable, value: 0)) expect(subject).to include(an_object_having_attributes(name: :filesystem_writable, value: 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency, value: be >= 0)) - expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_access_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_read_latency_seconds, value: be >= 0)) + expect(subject).to include(an_object_having_attributes(name: :filesystem_write_latency_seconds, value: be >= 0)) end end end diff --git a/spec/lib/gitlab/health_checks/simple_check_shared.rb b/spec/lib/gitlab/health_checks/simple_check_shared.rb index 1abebeac4dddd7e190e013ba5762fd049ddac428..e2643458acabb01947adaf30cf07a8c84cfc16fc 100644 --- a/spec/lib/gitlab/health_checks/simple_check_shared.rb +++ b/spec/lib/gitlab/health_checks/simple_check_shared.rb @@ -8,7 +8,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 1)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end context 'Check is misbehaving' do @@ -18,7 +18,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 0)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end context 'Check is timeouting' do @@ -28,7 +28,7 @@ shared_context 'simple_check' do |metrics_prefix, check_name, success_result| it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_success", value: 0)) } it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_timeout", value: 1)) } - it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency", value: be >= 0)) } + it { is_expected.to include(have_attributes(name: "#{metrics_prefix}_latency_seconds", value: be >= 0)) } end end diff --git a/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb b/spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb similarity index 51% rename from spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb rename to spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb index 94251af305f6f940893ffbd97d9f393847cb0b0e..461b1e4182a798127b710cbf4d3ce181bcaf559d 100644 --- a/spec/lib/gitlab/metrics/connection_rack_middleware_spec.rb +++ b/spec/lib/gitlab/metrics/requests_rack_middleware_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Gitlab::Metrics::ConnectionRackMiddleware do +describe Gitlab::Metrics::RequestsRackMiddleware do let(:app) { double('app') } subject { described_class.new(app) } @@ -22,14 +22,8 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do allow(app).to receive(:call).and_return([200, nil, nil]) end - it 'increments response count with status label' do - expect(described_class).to receive_message_chain(:rack_response_count, :increment).with(include(status: 200, method: 'get')) - - subject.call(env) - end - it 'increments requests count' do - expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') + expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get') subject.call(env) end @@ -38,20 +32,21 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do execution_time = 10 allow(app).to receive(:call) do |*args| Timecop.freeze(execution_time.seconds) + [200, nil, nil] end - expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + expect(described_class).to receive_message_chain(:http_request_duration_seconds, :observe).with({ status: 200, method: 'get' }, execution_time) subject.call(env) end end context '@app.call throws exception' do - let(:rack_response_count) { double('rack_response_count') } + let(:http_request_duration_seconds) { double('http_request_duration_seconds') } before do allow(app).to receive(:call).and_raise(StandardError) - allow(described_class).to receive(:rack_response_count).and_return(rack_response_count) + allow(described_class).to receive(:http_request_duration_seconds).and_return(http_request_duration_seconds) end it 'increments exceptions count' do @@ -61,25 +56,13 @@ describe Gitlab::Metrics::ConnectionRackMiddleware do end it 'increments requests count' do - expect(described_class).to receive_message_chain(:rack_request_count, :increment).with(method: 'get') - - expect { subject.call(env) }.to raise_error(StandardError) - end - - it "does't increment response count" do - expect(described_class.rack_response_count).not_to receive(:increment) + expect(described_class).to receive_message_chain(:http_request_total, :increment).with(method: 'get') expect { subject.call(env) }.to raise_error(StandardError) end - it 'measures execution time' do - execution_time = 10 - allow(app).to receive(:call) do |*args| - Timecop.freeze(execution_time.seconds) - raise StandardError - end - - expect(described_class).to receive_message_chain(:rack_execution_time, :observe).with({}, execution_time) + it "does't measure request execution time" do + expect(described_class.http_request_duration_seconds).not_to receive(:increment) expect { subject.call(env) }.to raise_error(StandardError) end