Skip to content
Snippets Groups Projects
Unverified Commit 6872be87 authored by David Fernandez's avatar David Fernandez
Browse files

Cache created_at values for tags that are kept

This is to avoid network requests to the container registry api. This
cache will only work if the `older_than` parameter is set.
Tags that are destroyed are guaranted to have their `created_at` freshly
fetched from the container registry API.

By reducing the amount of pings to the container registry API, the
cleanup tags service is more efficient and takes less time to go through
a set of tags.
parent 814ba9c1
No related branches found
No related tags found
No related merge requests found
Showing
with 701 additions and 202 deletions
Loading
Loading
@@ -4,7 +4,7 @@ module ContainerExpirationPolicies
class CleanupService
attr_reader :repository
 
SERVICE_RESULT_FIELDS = %i[original_size before_truncate_size after_truncate_size before_delete_size deleted_size].freeze
SERVICE_RESULT_FIELDS = %i[original_size before_truncate_size after_truncate_size before_delete_size deleted_size cached_tags_count].freeze
 
def initialize(repository)
@repository = repository
Loading
Loading
# frozen_string_literal: true
module Projects
module ContainerRepository
class CacheTagsCreatedAtService
def initialize(container_repository)
@container_repository = container_repository
@cached_tag_names = Set.new
end
def populate(tags)
return if tags.empty?
keys = tags.map(&method(:cache_key))
cached_tags_count = 0
::Gitlab::Redis::Cache.with do |redis|
tags.zip(redis.mget(keys)).each do |tag, created_at|
next unless created_at
tag.created_at = DateTime.rfc3339(created_at)
@cached_tag_names << tag.name
cached_tags_count += 1
end
end
cached_tags_count
end
def insert(tags, max_ttl_in_seconds)
return unless max_ttl_in_seconds
return if tags.empty?
# tags with nil created_at are not cacheable
# tags already cached don't need to be cached again
cacheable_tags = tags.select do |tag|
tag.created_at.present? && !tag.name.in?(@cached_tag_names)
end
return if cacheable_tags.empty?
now = Time.zone.now
::Gitlab::Redis::Cache.with do |redis|
# we use a pipeline instead of a MSET because each tag has
# a specific ttl
redis.pipelined do
cacheable_tags.each do |tag|
created_at = tag.created_at
# ttl is the max_ttl_in_seconds reduced by the number
# of seconds that the tag has already existed
ttl = max_ttl_in_seconds - (now - created_at).seconds
ttl = ttl.to_i
redis.set(cache_key(tag), created_at.rfc3339, ex: ttl) if ttl > 0
end
end
end
end
private
def cache_key(tag)
"container_repository:{#{@container_repository.id}}:tag:#{tag.name}:created_at"
end
end
end
end
Loading
Loading
@@ -3,6 +3,8 @@
module Projects
module ContainerRepository
class CleanupTagsService < BaseService
include ::Gitlab::Utils::StrongMemoize
def execute(container_repository)
return error('access denied') unless can_destroy?
return error('invalid regex') unless valid_regex?
Loading
Loading
@@ -17,13 +19,16 @@ def execute(container_repository)
tags = truncate(tags)
after_truncate_size = tags.size
 
tags = filter_keep_n(tags)
tags = filter_by_older_than(tags)
cached_tags_count = populate_tags_from_cache(container_repository, tags) || 0
tags = filter_keep_n(container_repository, tags)
tags = filter_by_older_than(container_repository, tags)
 
delete_tags(container_repository, tags).tap do |result|
result[:original_size] = original_size
result[:before_truncate_size] = before_truncate_size
result[:after_truncate_size] = after_truncate_size
result[:cached_tags_count] = cached_tags_count
result[:before_delete_size] = tags.size
result[:deleted_size] = result[:deleted]&.size
 
Loading
Loading
@@ -67,21 +72,26 @@ def filter_by_name(tags)
end
end
 
def filter_keep_n(tags)
def filter_keep_n(container_repository, tags)
return tags unless params['keep_n']
 
tags = order_by_date(tags)
cache_tags(container_repository, tags.first(keep_n))
tags.drop(keep_n)
end
 
def filter_by_older_than(tags)
return tags unless params['older_than']
def filter_by_older_than(container_repository, tags)
return tags unless older_than
 
older_than = ChronicDuration.parse(params['older_than']).seconds.ago
older_than_timestamp = older_than_in_seconds.ago
 
tags.select do |tag|
tag.created_at && tag.created_at < older_than
tags, tags_to_keep = tags.partition do |tag|
tag.created_at && tag.created_at < older_than_timestamp
end
cache_tags(container_repository, tags_to_keep)
tags
end
 
def can_destroy?
Loading
Loading
@@ -114,6 +124,28 @@ def truncate(tags)
tags.sample(truncated_size)
end
 
def populate_tags_from_cache(container_repository, tags)
cache(container_repository).populate(tags) if caching_enabled?(container_repository)
end
def cache_tags(container_repository, tags)
cache(container_repository).insert(tags, older_than_in_seconds) if caching_enabled?(container_repository)
end
def cache(container_repository)
# TODO Implement https://gitlab.com/gitlab-org/gitlab/-/issues/340277 to avoid passing
# the container repository parameter which is bad for a memoized function
strong_memoize(:cache) do
::Projects::ContainerRepository::CacheTagsCreatedAtService.new(container_repository)
end
end
def caching_enabled?(container_repository)
params['container_expiration_policy'] &&
older_than.present? &&
Feature.enabled?(:container_registry_expiration_policies_caching, container_repository.project)
end
def throttling_enabled?
Feature.enabled?(:container_registry_expiration_policies_throttling)
end
Loading
Loading
@@ -125,6 +157,16 @@ def max_list_size
def keep_n
params['keep_n'].to_i
end
def older_than_in_seconds
strong_memoize(:older_than_in_seconds) do
ChronicDuration.parse(older_than).seconds
end
end
def older_than
params['older_than']
end
end
end
end
Loading
Loading
@@ -22,6 +22,7 @@ class CleanupContainerRepositoryWorker
cleanup_tags_service_original_size
cleanup_tags_service_before_truncate_size
cleanup_tags_service_after_truncate_size
cleanup_tags_service_cached_tags_count
cleanup_tags_service_before_delete_size
cleanup_tags_service_deleted_size
].freeze
Loading
Loading
@@ -148,13 +149,27 @@ def log_on_done(result)
log_extra_metadata_on_done(field, value)
end
 
log_truncate(result)
log_cache_ratio(result)
log_extra_metadata_on_done(:running_jobs_count, running_jobs_count)
end
def log_cache_ratio(result)
tags_count = result.payload[:cleanup_tags_service_after_truncate_size]
cached_tags_count = result.payload[:cleanup_tags_service_cached_tags_count]
return unless tags_count && cached_tags_count && tags_count != 0
log_extra_metadata_on_done(:cleanup_tags_service_cache_hit_ratio, cached_tags_count / tags_count.to_f)
end
def log_truncate(result)
before_truncate_size = result.payload[:cleanup_tags_service_before_truncate_size]
after_truncate_size = result.payload[:cleanup_tags_service_after_truncate_size]
truncated = before_truncate_size &&
after_truncate_size &&
before_truncate_size != after_truncate_size
log_extra_metadata_on_done(:cleanup_tags_service_truncated, !!truncated)
log_extra_metadata_on_done(:running_jobs_count, running_jobs_count)
end
 
def policy
Loading
Loading
---
name: container_registry_expiration_policies_caching
introduced_by_url:
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/340606
milestone: '14.3'
type: development
group: group::package
default_enabled: false
Loading
Loading
@@ -5,6 +5,7 @@ class Tag
include Gitlab::Utils::StrongMemoize
 
attr_reader :repository, :name
attr_writer :created_at
 
delegate :registry, :client, to: :repository
delegate :revision, :short_revision, to: :config_blob, allow_nil: true
Loading
Loading
@@ -73,9 +74,10 @@ def config
end
 
def created_at
return @created_at if @created_at
return unless config
 
strong_memoize(:created_at) do
strong_memoize(:memoized_created_at) do
DateTime.rfc3339(config['created'])
rescue ArgumentError
nil
Loading
Loading
Loading
Loading
@@ -60,6 +60,20 @@
end
 
context 'manifest processing' do
shared_examples 'using the value manually set on created_at' do
let(:value) { 5.seconds.ago }
before do
tag.created_at = value
end
it 'does not use the config' do
expect(tag).not_to receive(:config)
expect(subject).to eq(value)
end
end
context 'schema v1' do
before do
stub_request(:get, 'http://registry.gitlab/v2/group/test/manifests/tag')
Loading
Loading
@@ -93,6 +107,8 @@
subject { tag.created_at }
 
it { is_expected.to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
end
Loading
Loading
@@ -117,6 +133,8 @@
subject { tag.created_at }
 
it { is_expected.to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
 
Loading
Loading
@@ -154,6 +172,8 @@
subject { tag.created_at }
 
it { is_expected.not_to be_nil }
it_behaves_like 'using the value manually set on created_at'
end
end
 
Loading
Loading
Loading
Loading
@@ -69,6 +69,7 @@
before_truncate_size: 800,
after_truncate_size: 200,
before_delete_size: 100,
cached_tags_count: 0,
deleted_size: 100
}
end
Loading
Loading
@@ -86,6 +87,7 @@
cleanup_tags_service_before_truncate_size: 800,
cleanup_tags_service_after_truncate_size: 200,
cleanup_tags_service_before_delete_size: 100,
cleanup_tags_service_cached_tags_count: 0,
cleanup_tags_service_deleted_size: 100
)
expect(ContainerRepository.waiting_for_cleanup.count).to eq(1)
Loading
Loading
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Projects::ContainerRepository::CacheTagsCreatedAtService, :clean_gitlab_redis_cache do
let_it_be(:dummy_tag_class) { Struct.new(:name, :created_at) }
let_it_be(:repository) { create(:container_repository) }
let(:tags) { create_tags(5) }
let(:service) { described_class.new(repository) }
shared_examples 'not interacting with redis' do
it 'does not interact with redis' do
expect(::Gitlab::Redis::Cache).not_to receive(:with)
subject
end
end
describe '#populate' do
subject { service.populate(tags) }
context 'with tags' do
it 'gets values from redis' do
expect(::Gitlab::Redis::Cache).to receive(:with).and_call_original
expect(subject).to eq(0)
tags.each { |t| expect(t.created_at).to eq(nil) }
end
context 'with cached values' do
let(:cached_tags) { tags.first(2) }
before do
::Gitlab::Redis::Cache.with do |redis|
cached_tags.each do |tag|
redis.set(cache_key(tag), rfc3339(10.days.ago))
end
end
end
it 'gets values from redis' do
expect(::Gitlab::Redis::Cache).to receive(:with).and_call_original
expect(subject).to eq(2)
cached_tags.each { |t| expect(t.created_at).not_to eq(nil) }
(tags - cached_tags).each { |t| expect(t.created_at).to eq(nil) }
end
end
end
context 'with no tags' do
let(:tags) { [] }
it_behaves_like 'not interacting with redis'
end
end
describe '#insert' do
let(:max_ttl) { 90.days }
subject { service.insert(tags, max_ttl) }
context 'with tags' do
let(:tag) { tags.first }
let(:ttl) { 90.days - 3.days }
before do
travel_to(Time.zone.local(2021, 9, 2, 12, 0, 0))
tag.created_at = DateTime.rfc3339(3.days.ago.rfc3339)
end
after do
travel_back
end
it 'inserts values in redis' do
::Gitlab::Redis::Cache.with do |redis|
expect(redis)
.to receive(:set)
.with(cache_key(tag), rfc3339(tag.created_at), ex: ttl.to_i)
.and_call_original
end
subject
end
context 'with some of them already cached' do
let(:tag) { tags.first }
before do
::Gitlab::Redis::Cache.with do |redis|
redis.set(cache_key(tag), rfc3339(10.days.ago))
end
service.populate(tags)
end
it_behaves_like 'not interacting with redis'
end
end
context 'with no tags' do
let(:tags) { [] }
it_behaves_like 'not interacting with redis'
end
context 'with no expires_in' do
let(:max_ttl) { nil }
it_behaves_like 'not interacting with redis'
end
end
def create_tags(size)
Array.new(size) do |i|
dummy_tag_class.new("Tag #{i}", nil)
end
end
def cache_key(tag)
"container_repository:{#{repository.id}}:tag:#{tag.name}:created_at"
end
def rfc3339(date_time)
# DateTime rfc3339 is different ActiveSupport::TimeWithZone rfc3339
# The caching will use DateTime rfc3339
DateTime.rfc3339(date_time.rfc3339).rfc3339
end
end
Loading
Loading
@@ -74,6 +74,30 @@
end
end
end
context 'the cache hit ratio field' do
where(:after_truncate_size, :cached_tags_count, :ratio) do
nil | nil | nil
10 | nil | nil
nil | 10 | nil
0 | 5 | nil
10 | 0 | 0
10 | 5 | 0.5
3 | 10 | (10 / 3.to_f)
end
with_them do
it 'is logged properly' do
service_response = cleanup_service_response(status: :unfinished, repository: repository, cleanup_tags_service_before_truncate_size: after_truncate_size, cleanup_tags_service_after_truncate_size: after_truncate_size, cleanup_tags_service_cached_tags_count: cached_tags_count)
expect(ContainerExpirationPolicies::CleanupService)
.to receive(:new).with(repository).and_return(double(execute: service_response))
expect_log_extra_metadata(service_response: service_response, cleanup_status: :unfinished, truncated: false, cache_hit_ratio: ratio)
expect_log_info(project_id: project.id, container_repository_id: repository.id)
subject
end
end
end
end
 
context 'with an erroneous cleanup' do
Loading
Loading
@@ -372,7 +396,16 @@ def update_container_repository(container_repository, cleanup_status, policy_sta
end
end
 
def cleanup_service_response(status: :finished, repository:, cleanup_tags_service_original_size: 100, cleanup_tags_service_before_truncate_size: 80, cleanup_tags_service_after_truncate_size: 80, cleanup_tags_service_before_delete_size: 50, cleanup_tags_service_deleted_size: 50)
def cleanup_service_response(
status: :finished,
repository:,
cleanup_tags_service_original_size: 100,
cleanup_tags_service_before_truncate_size: 80,
cleanup_tags_service_after_truncate_size: 80,
cleanup_tags_service_before_delete_size: 50,
cleanup_tags_service_deleted_size: 50,
cleanup_tags_service_cached_tags_count: 0
)
ServiceResponse.success(
message: "cleanup #{status}",
payload: {
Loading
Loading
@@ -381,21 +414,35 @@ def cleanup_service_response(status: :finished, repository:, cleanup_tags_servic
cleanup_tags_service_original_size: cleanup_tags_service_original_size,
cleanup_tags_service_before_truncate_size: cleanup_tags_service_before_truncate_size,
cleanup_tags_service_after_truncate_size: cleanup_tags_service_after_truncate_size,
cleanup_tags_service_before_delete_size: cleanup_tags_service_before_delete_size
cleanup_tags_service_before_delete_size: cleanup_tags_service_before_delete_size,
cleanup_tags_service_cached_tags_count: cleanup_tags_service_cached_tags_count
}.compact
)
end
 
def expect_log_extra_metadata(service_response:, cleanup_status: :finished, truncated: false)
def expect_log_extra_metadata(service_response:, cleanup_status: :finished, truncated: false, cache_hit_ratio: 0)
expect(worker).to receive(:log_extra_metadata_on_done).with(:container_repository_id, repository.id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:project_id, repository.project.id)
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_status, cleanup_status)
 
%i[cleanup_tags_service_original_size cleanup_tags_service_before_truncate_size cleanup_tags_service_after_truncate_size cleanup_tags_service_before_delete_size cleanup_tags_service_deleted_size].each do |field|
%i[
cleanup_tags_service_original_size
cleanup_tags_service_before_truncate_size
cleanup_tags_service_after_truncate_size
cleanup_tags_service_before_delete_size
cleanup_tags_service_deleted_size
cleanup_tags_service_cached_tags_count
].each do |field|
value = service_response.payload[field]
expect(worker).to receive(:log_extra_metadata_on_done).with(field, value) unless value.nil?
end
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_tags_service_truncated, truncated)
after_truncate_size = service_response.payload[:cleanup_tags_service_after_truncate_size]
if cache_hit_ratio && after_truncate_size && after_truncate_size != 0
expect(worker).to receive(:log_extra_metadata_on_done).with(:cleanup_tags_service_cache_hit_ratio, cache_hit_ratio)
end
expect(worker).to receive(:log_extra_metadata_on_done).with(:running_jobs_count, 0)
 
if service_response.error?
Loading
Loading
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment