Skip to content
Snippets Groups Projects
Commit 4b7a937a authored by Nick Thomas's avatar Nick Thomas
Browse files

Reorganise code into two subpackages

parent 1b218c84
No related branches found
No related tags found
1 merge request!1Initial implementation of an elasticsearch indexer in Go
package elastic
import (
"encoding/json"
"io"
"io/ioutil"
)
type Config struct {
URL []string `json:"url"`
AWS bool `json:"aws"`
Region string `json:"aws_region"`
AccessKey string `json:"aws_access_key"`
SecretKey string `json:"aws_secret_access_key"`
}
func ReadConfig(r io.Reader) (*Config, error) {
var out Config
if err := json.NewDecoder(r).Decode(&out); err != nil {
return nil, err
}
return &out, nil
}
type ReaderAsJSONString struct {
io.Reader
}
func (r *ReaderAsJSONString) MarshalJSON() ([]byte, error) {
/* TODO: fewer copies
out := bytes.NewBuffer(nil)
err := out.WriteByte('"')
_, err = io.Copy(out, r.Reader) // FIXME: convert to valid JSON string data, escape quote marks
err = out.WriteByte('"')
return out.Bytes(), err
*/
data, err := ioutil.ReadAll(r.Reader)
if err != nil {
return nil, err
}
return json.Marshal(string(data))
}
type Req struct {
Blob *ReaderAsJSONString `json:"blob"`
}
package git
import (
"fmt"
"log"
"sync"
"srcd.works/go-git.v4"
"srcd.works/go-git.v4/plumbing"
"srcd.works/go-git.v4/plumbing/difftree"
"srcd.works/go-git.v4/plumbing/object"
)
var (
EndError = fmt.Errorf("Finished") // not really an error
)
type Repo struct {
Repo *git.Repository
FromHash plumbing.Hash
ToHash plumbing.Hash
FromCommit *object.Commit
ToCommit *object.Commit
}
func NewRepo(projectPath string, fromSHA string, toSHA string) (*Repo, error) {
out := &Repo{}
repo, err := git.PlainOpen(projectPath)
if err != nil {
return nil, err
}
out.Repo = repo
if fromSHA == "" {
out.FromHash = plumbing.ZeroHash
} else {
out.FromHash = plumbing.NewHash(fromSHA)
commit, err := repo.Commit(out.FromHash)
if err != nil {
return nil, fmt.Errorf("Bad from SHA (%s): %s", out.FromHash, err)
}
out.FromCommit = commit
}
if toSHA == "" {
ref, err := out.Repo.Head()
if err != nil {
return nil, err
}
out.ToHash = ref.Hash()
} else {
out.ToHash = plumbing.NewHash(toSHA)
}
commit, err := repo.Commit(out.ToHash)
if err != nil {
return nil, fmt.Errorf("Bad to SHA (%s): %s", out.ToHash, err)
}
out.ToCommit = commit
return out, nil
}
func (r *Repo) Diff() (difftree.Changes, error) {
var fromTree, toTree *object.Tree
if r.FromCommit != nil {
tree, err := r.FromCommit.Tree()
if err != nil {
return nil, err
}
fromTree = tree
}
toTree, err := r.ToCommit.Tree()
if err != nil {
return nil, err
}
return difftree.DiffTree(fromTree, toTree)
}
type FileFunc func(file *object.File)
func (r *Repo) EachFileChange(ins, mod, del FileFunc) error {
changes, err := r.Diff()
if err != nil {
return err
}
for _, change := range changes {
fromF, toF, err := change.Files()
if err != nil {
return err
}
switch change.Action {
case difftree.Insert:
toF.Name = change.To.Name
ins(toF)
case difftree.Modify:
toF.Name = change.To.Name
mod(toF)
case difftree.Delete:
fromF.Name = change.From.Name
del(fromF)
}
}
return nil
}
// EachCommit runs `f` for each commit within `fromSHA`...`toSHA` (i.e., the
// inclusive set).
func (r *Repo) EachCommit(f func(*object.Commit)) error {
err := object.WalkCommitHistory(r.ToCommit, func(c *object.Commit) error {
f(c)
if r.FromCommit != nil && c.ID() == r.FromCommit.ID() {
return EndError
}
return nil
})
if err != nil && err != EndError {
return err
}
return nil
}
func (r *Repo) IndexCommits() error {
commit := func(c *object.Commit) {
log.Print("Commit: ", c.ID())
// TODO: Read & json-encode the commit so we do the same disc work as the ruby version
//reader, err := c.Blob.Reader()
//if err != nil {
// os.Exit(1)
//}
//json.NewEncoder(ioutil.Discard).Encode(&Req{&ReaderAsJSONString{reader}})
//reader.Close()
}
return r.EachCommit(commit)
}
func (r *Repo) IndexBlobs() error {
insOrModFile := func(file *object.File) {
log.Print("Write: ", file.Name)
// Read & json-encode the file so we do the same work as the Ruby version
//reader, err := file.Blob.Reader()
//if err != nil {
// os.Exit(1)
//}
//json.NewEncoder(ioutil.Discard).Encode(&Req{&ReaderAsJSONString{reader}})
//reader.Close()
}
delFile := func(file *object.File) {
log.Print("Delete: ", file.Name)
}
return r.EachFileChange(insOrModFile, insOrModFile, delFile)
}
func (r *Repo) Index() error {
wg := &sync.WaitGroup{}
wg.Add(2)
go func() {
if err := r.IndexBlobs(); err != nil {
log.Print("Error while indexing blobs:", err)
}
wg.Done()
}()
go func() {
if err := r.IndexCommits(); err != nil {
log.Print("Error while indexing commits:", err)
}
wg.Done()
}()
wg.Wait()
return nil
}
package main
 
import (
"encoding/json"
"fmt"
"io/ioutil"
"io"
"log"
"os"
"strings"
 
"srcd.works/go-git.v4"
"srcd.works/go-git.v4/plumbing/difftree"
"srcd.works/go-git.v4/plumbing/object"
"srcd.works/go-git.v4/plumbing"
"gitlab.com/gitlab-org/es-git-go/elastic"
"gitlab.com/gitlab-org/es-git-go/git"
)
 
type ESConfig struct {
URL []string `json:"url"`
AWS bool `json:"aws"`
Region string `json:"aws_region"`
AccessKey string `json:"aws_access_key"`
SecretKey string `json:"aws_secret_access_key"`
}
type Repo struct {
Repo *git.Repository
FromHash plumbing.Hash
ToHash plumbing.Hash
FromCommit *object.Commit
ToCommit *object.Commit
}
type ReaderAsJSONString struct {
io.Reader
}
func (r *ReaderAsJSONString) MarshalJSON() ([]byte, error) {
/* TODO: fewer copies
out := bytes.NewBuffer(nil)
err := out.WriteByte('"')
_, err = io.Copy(out, r.Reader) // FIXME: convert to valid JSON string data, escape quote marks
err = out.WriteByte('"')
return out.Bytes(), err
*/
data, err := ioutil.ReadAll(r.Reader)
if err != nil {
return nil, err
}
return json.Marshal(string(data))
}
type Req struct {
Blob *ReaderAsJSONString `json:"blob"`
}
func NewRepo(projectPath string, fromSHA string, toSHA string) (*Repo, error) {
out := &Repo{}
repo, err := git.PlainOpen(projectPath)
if err != nil {
return nil, err
}
out.Repo = repo
if fromSHA == "" {
out.FromHash = plumbing.ZeroHash
} else {
out.FromHash = plumbing.NewHash(fromSHA)
commit, err := repo.Commit(out.FromHash)
if err != nil {
return nil, fmt.Errorf("Bad from SHA (%s): %s", out.FromHash, err)
}
out.FromCommit = commit
}
if toSHA == "" {
ref, err := out.Repo.Head()
if err != nil {
return nil, err
}
out.ToHash = ref.Hash()
} else {
out.ToHash = plumbing.NewHash(toSHA)
}
commit, err := repo.Commit(out.ToHash)
if err != nil {
return nil, fmt.Errorf("Bad to SHA (%s): %s", out.ToHash, err)
}
out.ToCommit = commit
return out, nil
}
func (r *Repo) Diff() (difftree.Changes, error) {
var fromTree, toTree *object.Tree
if r.FromCommit != nil {
tree, err := r.FromCommit.Tree()
if err != nil {
return nil, err
}
fromTree = tree
}
toTree, err := r.ToCommit.Tree()
if err != nil {
return nil, err
}
return difftree.DiffTree(fromTree, toTree)
}
type FileFunc func(file *object.File)
func (r *Repo) EachFileChange(ins, mod, del FileFunc) error {
changes, err := r.Diff()
if err != nil {
fmt.Printf("Couldn't calculate diff: %s\n", err)
os.Exit(1)
}
for _, change := range changes {
fromF, toF, err := change.Files()
if err != nil {
return err
}
switch change.Action {
case difftree.Insert:
toF.Name = change.To.Name
ins(toF)
case difftree.Modify:
toF.Name = change.To.Name
mod(toF)
case difftree.Delete:
fromF.Name = change.From.Name
del(fromF)
}
}
return nil
}
var EndError = fmt.Errorf("Finished") // not really an error
func (r *Repo) EachCommit(f func(*object.Commit)) {
object.WalkCommitHistory(r.ToCommit, func(c *object.Commit) error {
if r.FromCommit != nil && c.ID() == r.FromCommit.ID() {
return EndError
}
f(c)
return nil
})
}
func main() {
var esConfig ESConfig
if len(os.Args) != 3 {
fmt.Printf("Usage: %s <project-id> <project-path>\n", os.Args[0])
os.Exit(1)
log.Fatalf("Usage: %s <project-id> <project-path>", os.Args[0])
}
 
if err := json.Unmarshal([]byte(os.Getenv("ELASTIC_CONNECTION_INFO")), &esConfig); err != nil {
fmt.Printf("Error parsing ELASTIC_CONNECTION_INFO: %s\n", err)
os.Exit(1)
_, err := elastic.ReadConfig(strings.NewReader(os.Getenv("ELASTIC_CONNECTION_INFO")))
if err != nil {
log.Fatalln("Couldn't parse ELASTIC_CONNECTION_INFO:", err)
}
 
projectID := os.Args[1]
Loading
Loading
@@ -183,46 +25,13 @@ func main() {
toSHA := os.Getenv("TO_SHA")
railsEnv := os.Getenv("RAILS_ENV")
 
repo, err := NewRepo(projectPath, fromSHA, toSHA)
repo, err := git.NewRepo(projectPath, fromSHA, toSHA)
if err != nil {
fmt.Printf("Failed to open %s: %s\n", projectPath, err)
os.Exit(1)
log.Fatalf("Failed to open %s: %s", projectPath, err)
}
 
fmt.Printf("From %s to %s\n", repo.FromHash, repo.ToHash)
fmt.Printf("Project ID: %s, rails env: %s\n", projectID, railsEnv)
insOrModFile := func(file *object.File) {
fmt.Println("Write", file.Name)
// Read & json-encode the file so we do the same work as the Ruby version
reader, err := file.Blob.Reader()
if err != nil {
os.Exit(1)
}
json.NewEncoder(ioutil.Discard).Encode(&Req{&ReaderAsJSONString{reader}})
reader.Close()
}
delFile := func(file *object.File) {
fmt.Println("Delete", file.Name)
// The Ruby version doesn't read the file either
}
// index_blobs
repo.EachFileChange(insOrModFile, insOrModFile, delFile)
commit := func(c *object.Commit) {
fmt.Println("Commit", c.ID())
// TODO: Read & json-encode the commit so we do the same disc work as the ruby version
//reader, err := c.Blob.Reader()
//if err != nil {
// os.Exit(1)
//}
//json.NewEncoder(ioutil.Discard).Encode(&Req{&ReaderAsJSONString{reader}})
//reader.Close()
}
log.Printf("Indexing from %s to %s", repo.FromHash, repo.ToHash)
log.Printf("Project ID: %s, Rails env: %s", projectID, railsEnv)
 
// index_commits
repo.EachCommit(commit)
repo.Index() // TODO: pass in something to receive the indexed data
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment