Skip to content

Commit

Permalink
perf: fix performance issue with dump (#9)
Browse files Browse the repository at this point in the history
* perf: fix issue with dump

* perf: fix issue with dump
  • Loading branch information
adrienaury committed Mar 28, 2024
1 parent 7247617 commit f60195f
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Types of changes
## [0.3.0]

- `Added` cpu and memory profiling with `--profiling mem|cpu` flag
- `Fixed` performance issues on dump in exchange for higher RAM consumption, using `--limited-ram` flag will fall back to the 0.2.0 dump version

## [0.2.0]

Expand Down
22 changes: 17 additions & 5 deletions internal/app/cli/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ import (

func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.File) *cobra.Command {
var (
include []string
watch bool
include []string
watch bool
limitedRAM bool
)

cmd := &cobra.Command{ //nolint:exhaustruct
Expand All @@ -39,14 +40,15 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F
Example: " " + parent + " dump clients",
Args: cobra.ExactArgs(1),
Run: func(_ *cobra.Command, args []string) {
if err := dump(args[0], include, watch); err != nil {
if err := dump(args[0], include, watch, limitedRAM); err != nil {
log.Fatal().Err(err).Int("return", 1).Msg("end SILO")
}
},
}

cmd.Flags().StringSliceVarP(&include, "include", "i", []string{}, "include only these columns, exclude all others")
cmd.Flags().BoolVarP(&watch, "watch", "w", false, "watch statistics about dumped entities in stderr")
cmd.Flags().BoolVar(&limitedRAM, "limited-ram", false, "limit RAM usage, slower but more efficient on RAM usage")

cmd.Flags().SortFlags = false

Expand All @@ -57,8 +59,18 @@ func NewDumpCommand(parent string, stderr *os.File, stdout *os.File, stdin *os.F
return cmd
}

func dump(path string, include []string, watch bool) error {
backend, err := infra.NewBackend(path)
func dump(path string, include []string, watch bool, limitedRAM bool) error {
var (
backend silo.Backend
err error
)

if limitedRAM {
backend, err = infra.NewBackend(path)
} else {
backend, err = infra.NewBackendFull(path)
}

if err != nil {
return fmt.Errorf("%w", err)
}
Expand Down
117 changes: 117 additions & 0 deletions internal/infra/backend_full.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright (C) 2024 CGI France
//
// This file is part of SILO.
//
// SILO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SILO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SILO. If not, see <http://www.gnu.org/licenses/>.

package infra

import (
"fmt"

"github.com/cgi-fr/silo/pkg/silo"
"github.com/cockroachdb/pebble"
)

type BackendFull struct {
Backend
}

func NewBackendFull(path string) (BackendFull, error) {
backend, err := NewBackend(path)
if err != nil {
return BackendFull{backend}, err
}

return BackendFull{backend}, nil
}

func (b BackendFull) Snapshot() silo.Snapshot { //nolint:ireturn
return NewSnapshotFull(b.db)
}

type SnapshotFull struct {
db *pebble.DB
nodes map[string][]byte
loaded bool
}

const DefaultFullMapCap = 1024

func NewSnapshotFull(db *pebble.DB) silo.Snapshot { //nolint:ireturn
return &SnapshotFull{
db: db,
nodes: make(map[string][]byte, DefaultFullMapCap),
loaded: false,
}
}

func (s *SnapshotFull) Load() error {
iter, err := s.db.NewIter(&pebble.IterOptions{}) //nolint:exhaustruct
if err != nil {
return fmt.Errorf("%w", err)
}

for iter.First(); iter.Valid(); iter.Next() {
s.nodes[string(iter.Key())] = iter.Value()
}

s.loaded = true

return nil
}

func (s *SnapshotFull) Next() (silo.DataNode, bool, error) {
if !s.loaded {
if err := s.Load(); err != nil {
return silo.DataNode{Key: "", Data: ""}, false, err
}
}

for key := range s.nodes {
node, err := decodeKey([]byte(key))
if err != nil {
return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
}

return node, true, nil
}

return silo.DataNode{Key: "", Data: ""}, false, nil
}

func (s *SnapshotFull) PullAll(node silo.DataNode) ([]silo.DataNode, error) {
key, err := node.Binary()
if err != nil {
return nil, fmt.Errorf("%w", err)
}

item, has := s.nodes[string(key)]
if !has {
return []silo.DataNode{}, nil
}

set, err := decode(item)
if err != nil {
return nil, fmt.Errorf("%w", err)
}

delete(s.nodes, string(key))

return set, nil
}

func (s *SnapshotFull) Close() error {
return nil
}
145 changes: 145 additions & 0 deletions internal/infra/backend_iterate_once.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Copyright (C) 2024 CGI France
//
// This file is part of SILO.
//
// SILO is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// SILO is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with SILO. If not, see <http://www.gnu.org/licenses/>.

package infra

import (
"errors"
"fmt"

"github.com/cgi-fr/silo/pkg/silo"
"github.com/cockroachdb/pebble"
)

type BackendInterateOnce struct {
Backend
}

func NewBackendInterateOnce(path string) (BackendInterateOnce, error) {
backend, err := NewBackend(path)
if err != nil {
return BackendInterateOnce{backend}, err
}

return BackendInterateOnce{backend}, nil
}

func (b BackendInterateOnce) Snapshot() silo.Snapshot { //nolint:ireturn
return NewSnapshotInterateOnce(b.db)
}

type SnapshotInterateOnce struct {
db *pebble.DB
iter *pebble.Iterator
pulled map[string]bool
}

const DefaultPulledMapCap = 128

func NewSnapshotInterateOnce(db *pebble.DB) silo.Snapshot { //nolint:ireturn
return SnapshotInterateOnce{
db: db,
iter: nil,
pulled: make(map[string]bool, DefaultPulledMapCap),
}
}

func (s SnapshotInterateOnce) Next() (silo.DataNode, bool, error) {
if s.iter == nil { //nolint:nestif
var err error
if s.iter, err = s.db.NewIter(&pebble.IterOptions{}); err != nil { //nolint:exhaustruct
return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
}

if !s.iter.First() {
return silo.DataNode{Key: "", Data: ""}, false, nil
}

if _, pulled := s.pulled[string(s.iter.Key())]; !pulled {
node, err := decodeKey(s.iter.Key())
if err != nil {
return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
}

return node, true, nil
}
}

for {
if !s.iter.Next() {
return silo.DataNode{Key: "", Data: ""}, false, nil
}

if _, pulled := s.pulled[string(s.iter.Key())]; !pulled {
node, err := decodeKey(s.iter.Key())
if err != nil {
return silo.DataNode{Key: "", Data: ""}, false, fmt.Errorf("%w", err)
}

return node, true, nil
}
}
}

func (s SnapshotInterateOnce) PullAll(node silo.DataNode) ([]silo.DataNode, error) {
key, err := node.Binary()
if err != nil {
return nil, fmt.Errorf("%w", err)
}

if _, pulled := s.pulled[string(key)]; pulled {
return []silo.DataNode{}, nil
}

s.pulled[string(key)] = true

item, closer, err := s.db.Get(key)
if errors.Is(err, pebble.ErrNotFound) {
return []silo.DataNode{}, nil
} else if err != nil {
return nil, fmt.Errorf("%w", err)
}
defer closer.Close()

set, err := decode(item)
if err != nil {
return nil, fmt.Errorf("%w", err)
}

return set, nil
}

func (s SnapshotInterateOnce) Close() error {
if s.iter == nil {
return nil
}

if err := s.iter.Close(); err != nil {
return fmt.Errorf("%w", err)
}

return nil
}

func decodeKey(rawKey []byte) (silo.DataNode, error) {
key, err := silo.DecodeDataNode(rawKey)
if err != nil {
return silo.DataNode{Key: "", Data: ""}, fmt.Errorf("%w", err)
}

return key, nil
}

0 comments on commit f60195f

Please sign in to comment.