Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement DB collections for mirrors, repos and snapshots #766

Merged
merged 1 commit into from
Aug 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN_LONG_TESTS?=yes

GO_1_10_AND_HIGHER=$(shell (printf '%s\n' go1.10 $(GOVERSION) | sort -cV >/dev/null 2>&1) && echo "yes")

all: test check system-test
all: test bench check system-test

prepare:
go get -u github.com/alecthomas/gometalinter
Expand Down Expand Up @@ -57,6 +57,9 @@ else
go test -v `go list ./... | grep -v vendor/` -gocheck.v=true
endif

bench:
go test -v ./deb -run=nothing -bench=. -benchmem

mem.png: mem.dat mem.gp
gnuplot mem.gp
open mem.png
Expand Down
116 changes: 65 additions & 51 deletions deb/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package deb

import (
"bytes"
"errors"
"fmt"
"log"
"sync"
Expand Down Expand Up @@ -93,52 +94,68 @@ func (repo *LocalRepo) RefKey() []byte {
// LocalRepoCollection does listing, updating/adding/deleting of LocalRepos
type LocalRepoCollection struct {
*sync.RWMutex
db database.Storage
list []*LocalRepo
db database.Storage
cache map[string]*LocalRepo
}

// NewLocalRepoCollection loads LocalRepos from DB and makes up collection
func NewLocalRepoCollection(db database.Storage) *LocalRepoCollection {
return &LocalRepoCollection{
RWMutex: &sync.RWMutex{},
db: db,
cache: make(map[string]*LocalRepo),
}
}

func (collection *LocalRepoCollection) loadList() {
if collection.list != nil {
return
func (collection *LocalRepoCollection) search(filter func(*LocalRepo) bool, unique bool) []*LocalRepo {
result := []*LocalRepo(nil)
for _, r := range collection.cache {
if filter(r) {
result = append(result, r)
}
}

blobs := collection.db.FetchByPrefix([]byte("L"))
collection.list = make([]*LocalRepo, 0, len(blobs))
if unique && len(result) > 0 {
return result
}

for _, blob := range blobs {
collection.db.ProcessByPrefix([]byte("L"), func(key, blob []byte) error {
r := &LocalRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding repo: %s\n", err)
} else {
collection.list = append(collection.list, r)
log.Printf("Error decoding local repo: %s\n", err)
return nil
}
}

if filter(r) {
if _, exists := collection.cache[r.UUID]; !exists {
collection.cache[r.UUID] = r
result = append(result, r)
if unique {
return errors.New("abort")
}
}
}

return nil
})

return result
}

// Add appends new repo to collection and saves it
func (collection *LocalRepoCollection) Add(repo *LocalRepo) error {
collection.loadList()
_, err := collection.ByName(repo.Name)

for _, r := range collection.list {
if r.Name == repo.Name {
return fmt.Errorf("local repo with name %s already exists", repo.Name)
}
if err == nil {
return fmt.Errorf("local repo with name %s already exists", repo.Name)
}

err := collection.Update(repo)
err = collection.Update(repo)
if err != nil {
return err
}

collection.list = append(collection.list, repo)
collection.cache[repo.UUID] = repo
return nil
}

Expand All @@ -159,8 +176,6 @@ func (collection *LocalRepoCollection) Update(repo *LocalRepo) error {

// LoadComplete loads additional information for local repo
func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error {
collection.loadList()

encoded, err := collection.db.Get(repo.RefKey())
if err == database.ErrNotFound {
return nil
Expand All @@ -175,26 +190,39 @@ func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error {

// ByName looks up repository by name
func (collection *LocalRepoCollection) ByName(name string) (*LocalRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.Name == name {
return r, nil
}
result := collection.search(func(r *LocalRepo) bool { return r.Name == name }, true)
if len(result) == 0 {
return nil, fmt.Errorf("local repo with name %s not found", name)
}
return nil, fmt.Errorf("local repo with name %s not found", name)

return result[0], nil
}

// ByUUID looks up repository by uuid
func (collection *LocalRepoCollection) ByUUID(uuid string) (*LocalRepo, error) {
collection.loadList()
if r, ok := collection.cache[uuid]; ok {
return r, nil
}

for _, r := range collection.list {
if r.UUID == uuid {
return r, nil
}
key := (&LocalRepo{UUID: uuid}).Key()

value, err := collection.db.Get(key)
if err == database.ErrNotFound {
return nil, fmt.Errorf("local repo with uuid %s not found", uuid)
}

if err != nil {
return nil, err
}

r := &LocalRepo{}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't seem to be covered by a test (https://codecov.io/gh/aptly-dev/aptly/pull/766/diff#D2-210)

I think as this is a success path it would be good to have a unit test for it. There are some similar code bits uncovered in the other collections too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, thanks, I will check it!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've improved test coverage and added some benchmarks for ByUUID() and ByName(). They exercise "worst case": collection is created and one lookup is performed. On master branch this always leads to loading all the objects.

For ByUUID() in the new approach loading is fast, as object is looked up directly (loading only single element). ByName() still requires scanning whole collection, in the worst case it is as bad as the old approach, on average it's 50% better.

ForEach() doesn't cache objects in memory, so this should help #761 as objects would be GCed as soon as they're scanned. ByUUID() is used a lot to lookup source of published repositories.

On master:

BenchmarkSnapshotCollectionByUUID-8    	     500	   2953932 ns/op	 1352168 B/op	   30743 allocs/op
BenchmarkSnapshotCollectionByName-8    	     500	   2922043 ns/op	 1352504 B/op	   30747 allocs/op

This branch:

BenchmarkSnapshotCollectionByUUID-8    	  300000	      4492 ns/op	    1792 B/op	      39 allocs/op
BenchmarkSnapshotCollectionByName-8    	    1000	   1433058 ns/op	  533994 B/op	   14870 allocs/op

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good. I guess we can merge this and try to get some feedback of the users with a specific problem in #761 whether this fix helps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also had an idea to implement very simple "index" to lookup things by name (which is really frequent lookup): just a key in DB which is like name -> UUID. ByName() could use this index optimistically - if it's missing or doesn't point to right entry, ByName() falls back to full scan.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be a way. I think we should see first how these changes are in terms of performance in a actual use case before we make the code even more complex.

err = r.Decode(value)

if err == nil {
collection.cache[r.UUID] = r
}
return nil, fmt.Errorf("local repo with uuid %s not found", uuid)

return r, err
}

// ForEach runs method for each repository
Expand All @@ -212,30 +240,16 @@ func (collection *LocalRepoCollection) ForEach(handler func(*LocalRepo) error) e

// Len returns number of remote repos
func (collection *LocalRepoCollection) Len() int {
collection.loadList()

return len(collection.list)
return len(collection.db.KeysByPrefix([]byte("L")))
}

// Drop removes remote repo from collection
func (collection *LocalRepoCollection) Drop(repo *LocalRepo) error {
collection.loadList()

repoPosition := -1

for i, r := range collection.list {
if r == repo {
repoPosition = i
break
}
}

if repoPosition == -1 {
if _, err := collection.db.Get(repo.Key()); err == database.ErrNotFound {
panic("local repo not found!")
}

collection.list[len(collection.list)-1], collection.list[repoPosition], collection.list =
nil, collection.list[len(collection.list)-1], collection.list[:len(collection.list)-1]
delete(collection.cache, repo.UUID)

err := collection.db.Delete(repo.Key())
if err != nil {
Expand Down
5 changes: 5 additions & 0 deletions deb/local_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ func (s *LocalRepoCollectionSuite) TestByUUID(c *C) {

r, err := s.collection.ByUUID(repo.UUID)
c.Assert(err, IsNil)
c.Assert(r, Equals, repo)

collection := NewLocalRepoCollection(s.db)
r, err = collection.ByUUID(repo.UUID)
c.Assert(err, IsNil)
c.Assert(r.String(), Equals, repo.String())
}

Expand Down
Loading