Skip too-large blobs

3a2127a4 · Nick Thomas · 2f35c7f8 · 3a2127a4 · 3a2127a4
Commit 3a2127a4 authored 7 years ago by Nick Thomas
--- a/elastic/client.go
+++ b/elastic/client.go
@@ -13,12 +13,15 @@ import (
 )
  
 var (
+	timeoutError = fmt.Errorf("Timeout")
+)
+
+const (
 	// TODO: make this configurable / detectable.
 	// Limiting to 10MiB lets us work on small AWS clusters, but unnecessarily
 	// increases round trips in larger or non-AWS clusters
 	MaxBulkSize  = 10 * 1024 * 1024
 	BulkWorkers  = 2
-	timeoutError = fmt.Errorf("Timeout")
 )
  
 type Client struct {
@@ -101,6 +104,11 @@ func (c *Client) ParentID() string {
 	return c.ProjectID
 }
  
+// FIXME(nick): this should reserve some space for encoding
+func (c *Client) SubmissionLimit() int64 {
+	return MaxBulkSize
+}
+
 func (c *Client) Flush() error {
 	return c.bulk.Flush()
 }

--- a/indexer/indexer.go
+++ b/indexer/indexer.go
@@ -10,6 +10,7 @@ import (
  
 type Submitter interface {
 	ParentID() string
+	SubmissionLimit() int64
  
 	Index(id string, thing interface{}) error
 	Remove(id string) error
@@ -30,9 +31,13 @@ func (i *Indexer) SubmitCommit(c *object.Commit) error {
 }
  
 func (i *Indexer) SubmitBlob(f *object.File, _, toCommit *object.Commit) error {
-	// TODO(nick): Existing code doesn't index blobs > 10MiB in size
 	// FIXME(nick): Not sure commitSHA is right, or how it works at all
  
+	if f.Blob.Size > i.Submitter.SubmissionLimit() {
+		log.Printf("Skipping %s: too large: %dMiB", f.Name, f.Blob.Size/(1024*1024))
+		return nil
+	}
+
 	blob, err := BuildBlob(f, toCommit.Hash.String(), i.Submitter.ParentID())
 	if err != nil {
 		return err