Improve flag options

207b308c · Kamil Trzcinski · fa06ef9c · 207b308c · 207b308c · 207b308c
Unverified Commit 207b308c authored 7 years ago by Kamil Trzcinski
--- a/README.md
+++ b/README.md
@@ -10,8 +10,8 @@ It uses optimised file accesses and API calls to create walk DAG.
  
 ### Installation
  
-```
-go get -u gitlab.com/gitlab-org/docker-distribution-pruner
+```bash
+$ go get -u gitlab.com/gitlab-org/docker-distribution-pruner
 ```
  
 ### Preface
@@ -29,13 +29,13 @@ If you run `-delete -soft-delete=false` you will remove data forever.
 Dry run:
  
 ```bash
-docker-distribution-pruner -config=/path/to/registry/configuration
+$ docker-distribution-pruner -config=/path/to/registry/configuration
 ```
  
 Reclaim disk space:
  
 ```bash
-docker-distribution-pruner -config=/path/to/registry/configuration -delete
+$ docker-distribution-pruner -config=/path/to/registry/configuration -delete
 ```
  
 ### GitLab Omnibus
@@ -43,7 +43,7 @@ docker-distribution-pruner -config=/path/to/registry/configuration -delete
 Run:
  
 ```bash
-docker-distribution-pruner -config=/var/opt/gitlab/registry/config.yml
+$ docker-distribution-pruner -config=/var/opt/gitlab/registry/config.yml
 ```
  
 ### S3 effectiveness
@@ -62,7 +62,14 @@ For large repositories it allows to save hundreds of thousands requests and also
 This tool can effectively run on registries that consists of million objects and terrabytes of data in reasonable time.
 To ensure smooth run ensure to have at least 4GB for 5 million objects stored in registry.
  
+To speed-up processing of large repositories enable parallel blobs and repository processing:
+
+```bash
+$ docker-distribution-pruner -config=/path/to/registry/configuration -parallel-repository-walk -parallel-blob-walk
+```
+
 You can also tune performance settings (less or more):
+
 ```
 -jobs=100 -parallel-walk-jobs=100
 ```

--- a/blobs.go
+++ b/blobs.go
 package main
  
 import (
-	"flag"
 	"fmt"
 	"path/filepath"
 	"strings"
@@ -11,8 +10,6 @@ import (
 	"github.com/dustin/go-humanize"
 )
  
-var parallelBlobWalk = flag.Bool("parallel-blob-walk", true, "Allow to use parallel blob walker")
-
 type blobsData map[digest]*blobData
  
 var blobsLock sync.Mutex
@@ -116,10 +113,10 @@ func (b blobsData) walkPath(walkPath string) error {
 	})
 }
  
-func (b blobsData) walk() error {
+func (b blobsData) walk(parallel bool) error {
 	logrus.Infoln("Walking BLOBS...")
  
-	if *parallelBlobWalk {
+	if parallel {
 		listRootPath := filepath.Join("blobs", "sha256")
 		return parallelWalk(listRootPath, b.walkPath)
 	} else {

--- a/deletes.go
+++ b/deletes.go
 package main
  
 import (
-	"flag"
 	"path/filepath"
 	"sync/atomic"
  
@@ -16,11 +15,6 @@ var (
 	deletedBlobSize int64
 )
  
-var (
-	delete     = flag.Bool("delete", false, "Delete data, instead of dry run")
-	softDelete = flag.Bool("soft-delete", true, "When deleting, do not remove, but move to backup/ folder")
-)
-
 func deleteFile(path string, size int64) error {
 	logrus.Infoln("DELETE", path, size)
 	name := filepath.Base(path)

--- a/main.go
+++ b/main.go
@@ -10,13 +10,25 @@ import (
 )
  
 var (
-	config           = flag.String("config", "", "Path to registry config file")
-	debug            = flag.Bool("debug", false, "Print debug messages")
-	verbose          = flag.Bool("verbose", true, "Print verbose messages")
+	config = flag.String("config", "", "Path to registry config file")
+
+	ignoreBlobs = flag.Bool("ignore-blobs", false, "Ignore blobs processing and recycling")
+
 	jobs             = flag.Int("jobs", 10, "Number of concurrent jobs to execute")
 	parallelWalkJobs = flag.Int("parallel-walk-jobs", 10, "Number of concurrent parallel walk jobs to execute")
-	ignoreBlobs      = flag.Bool("ignore-blobs", false, "Ignore blobs processing and recycling")
-	softErrors       = flag.Bool("soft-errors", false, "Print errors, but do not fail")
+
+	debug      = flag.Bool("debug", false, "Print debug messages")
+	verbose    = flag.Bool("verbose", true, "Print verbose messages")
+	softErrors = flag.Bool("soft-errors", false, "Print errors, but do not fail")
+
+	parallelRepositoryWalk = flag.Bool("parallel-repository-walk", false, "Allow to use parallel repository walker")
+	parallelBlobWalk       = flag.Bool("parallel-blob-walk", false, "Allow to use parallel blob walker")
+
+	repositoryCsvOutput = flag.String("repository-csv-output", "repositories.csv", "File to which CSV will be written with all metrics")
+
+	deleteOldTagVersions = flag.Bool("delete-old-tag-versions", true, "Delete old tag versions")
+	delete               = flag.Bool("delete", false, "Delete data, instead of dry run")
+	softDelete           = flag.Bool("soft-delete", true, "When deleting, do not remove, but move to backup/ folder")
 )
  
 var (
@@ -78,7 +90,7 @@ func main() {
 	go func() {
 		defer wg.Done()
  
-		err = repositories.walk()
+		err = repositories.walk(*parallelRepositoryWalk)
 		if err != nil {
 			logErrorln(err)
 		}
@@ -91,7 +103,7 @@ func main() {
 			return
 		}
  
-		err = blobs.walk()
+		err = blobs.walk(*parallelBlobWalk)
 		if err != nil {
 			logErrorln(err)
 		}
@@ -118,7 +130,7 @@ func main() {
 	}
  
 	logrus.Infoln("Summary...")
-	repositories.info(blobs)
+	repositories.info(blobs, *repositoryCsvOutput)
 	blobs.info()
 	deletesInfo()
 	currentStorage.Info()

--- a/repositories.go
+++ b/repositories.go
 package main
  
 import (
-	"flag"
 	"fmt"
 	"io"
 	"os"
@@ -11,9 +10,6 @@ import (
 	"github.com/Sirupsen/logrus"
 )
  
-var parallelRepositoryWalk = flag.Bool("parallel-repository-walk", true, "Allow to use parallel repository walker")
-var repositoryCsvOutput = flag.String("repository-csv-output", "repositories.csv", "File to which CSV will be written with all metrics")
-
 type repositoriesData map[string]*repositoryData
  
 var repositoriesLock sync.Mutex
@@ -75,12 +71,12 @@ func (r repositoriesData) walkPath(walkPath string, jg *jobGroup) error {
 	})
 }
  
-func (r repositoriesData) walk() error {
+func (r repositoriesData) walk(parallel bool) error {
 	logrus.Infoln("Walking REPOSITORIES...")
  
 	jg := jobsRunner.group()
  
-	if *parallelRepositoryWalk {
+	if parallel {
 		err := parallelWalk("repositories", func(listPath string) error {
 			return r.walkPath(listPath, jg)
 		})
@@ -131,12 +127,12 @@ func (r repositoriesData) sweep() error {
 	return nil
 }
  
-func (r repositoriesData) info(blobs blobsData) {
+func (r repositoriesData) info(blobs blobsData, csvOutput string) {
 	var stream io.WriteCloser
  
-	if *repositoryCsvOutput != "" {
+	if csvOutput != "" {
 		var err error
-		stream, err = os.Create(*repositoryCsvOutput)
+		stream, err = os.Create(csvOutput)
 		if err == nil {
 			defer stream.Close()
  

--- a/s3_storage.go
+++ b/s3_storage.go
@@ -22,6 +22,7 @@ type s3Storage struct {
 	S3                *s3.S3
 	apiCalls          int64
 	expensiveApiCalls int64
+	freeApiCalls      int64
 	cacheHits         int64
 	cacheError        int64
 	cacheMiss         int64
@@ -233,7 +234,7 @@ func (f *s3Storage) Read(path string, etag string) ([]byte, error) {
 }
  
 func (f *s3Storage) Delete(path string) error {
-	atomic.AddInt64(&f.expensiveApiCalls, 1)
+	atomic.AddInt64(&f.freeApiCalls, 1)
 	_, err := f.S3.DeleteObject(&s3.DeleteObjectInput{
 		Bucket: aws.String(f.Bucket),
 		Key:    aws.String(f.fullPath(path)),
@@ -255,7 +256,7 @@ func (f *s3Storage) Move(path, newPath string) error {
 }
  
 func (f *s3Storage) Info() {
-	logrus.Infoln("S3 INFO: API calls/expensive:", f.apiCalls, f.expensiveApiCalls,
+	logrus.Infoln("S3 INFO: API calls/expensive/free:", f.apiCalls, f.expensiveApiCalls, f.freeApiCalls,
 		"Cache (hit/miss/error):", f.cacheHits, f.cacheMiss, f.cacheError)
 }
  

--- a/tag.go
+++ b/tag.go
 package main
  
 import (
-	"flag"
 	"path/filepath"
 	"sync"
  
 	"github.com/Sirupsen/logrus"
 )
  
-var deleteOldTagVersions = flag.Bool("delete-old-tag-versions", true, "Delete old tag versions")
-
 type tagData struct {
 	repository *repositoryData
 	name       string