From 7d5942c03adfe73bcd21e8910e7ec515df84ec75 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Sun, 7 Mar 2021 14:18:56 +0200 Subject: [PATCH 01/34] WIP: hack to get recording rules working and pushing to Cortex/Prometheus Signed-off-by: Danny Kopping --- pkg/ruler/manager/appender.go | 133 ++++++++++++++++++++++++++++++++++ pkg/ruler/manager/compat.go | 23 +++++- 2 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 pkg/ruler/manager/appender.go diff --git a/pkg/ruler/manager/appender.go b/pkg/ruler/manager/appender.go new file mode 100644 index 000000000000..f3c4a4e147ed --- /dev/null +++ b/pkg/ruler/manager/appender.go @@ -0,0 +1,133 @@ +package manager + +import ( + "context" + "fmt" + + "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/storage" + "github.com/prometheus/prometheus/storage/remote" +) + +type TestAppendable struct { + remoteWriter remote.WriteClient +} + +type TestAppender struct { + ctx context.Context + remoteWriter remote.WriteClient + + labels []labels.Labels + samples []client.Sample +} + +// from github.com/prometheus/prometheus/storage/remote/codec.go +func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { + result := buf[:0] + if cap(buf) < len(labels) { + result = make([]prompb.Label, 0, len(labels)) + } + for _, l := range labels { + result = append(result, prompb.Label{ + Name: l.Name, + Value: l.Value, + }) + } + return result +} + +func (a *TestAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, error) { + ts := prompb.TimeSeries{ + Labels: labelsToLabelsProto(l, nil), + Samples: []prompb.Sample{ + prompb.Sample{ + Value: s.Value, + Timestamp: s.TimestampMs, + }, + }, + } + + // Create write request + data, err := proto.Marshal(&prompb.WriteRequest{Timeseries: []prompb.TimeSeries{ts}}) + if err != nil { + return nil, err + } + + // Create HTTP request + compressed := snappy.Encode(nil, data) + return compressed, nil +} + +func (a *TestAppendable) Appender(ctx context.Context) storage.Appender { + return &TestAppender{ + ctx: ctx, + remoteWriter: a.remoteWriter, + } +} + +// Add adds a sample pair for the given series. A reference number is +// returned which can be used to add further samples in the same or later +// transactions. +// Returned reference numbers are ephemeral and may be rejected in calls +// to AddFast() at any point. Adding the sample via Add() returns a new +// reference number. +// If the reference is 0 it must not be used for caching. +func (a *TestAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { + a.labels = append(a.labels, l) + + a.samples = append(a.samples, client.Sample{ + TimestampMs: t, + Value: v, + }) + + fmt.Printf("GOT HERE! %v %v %v\n\n\n", l, t, v) + + return 0, nil +} + +// AddFast adds a sample pair for the referenced series. It is generally +// faster than adding a sample by providing its full label set. +func (a *TestAppender) AddFast(ref uint64, t int64, v float64) error { + return storage.ErrNotFound +} + +// Commit submits the collected samples and purges the batch. If Commit +// returns a non-nil error, it also rolls back all modifications made in +// the appender so far, as Rollback would do. In any case, an Appender +// must not be used anymore after Commit has been called. +func (a *TestAppender) Commit() error { + + if len(a.samples) == 0 { + return nil + } + + fmt.Printf("COMMIT TO TSDB!\n\n") + req := client.ToWriteRequest(a.labels, a.samples, nil, client.API) + b, err := req.Marshal() + if err != nil { + panic(err) + } + + var dst []byte + x := snappy.Encode(dst, b) + + err = a.remoteWriter.Store(a.ctx, x) + if err != nil { + fmt.Printf("%v", err) + } + + return nil +} + +// Rollback rolls back all modifications made in the appender so far. +// Appender has to be discarded after rollback. +func (a *TestAppender) Rollback() error { + a.labels = nil + a.samples = nil + + return nil +} diff --git a/pkg/ruler/manager/compat.go b/pkg/ruler/manager/compat.go index 05a56692762a..2d3bc15e64eb 100644 --- a/pkg/ruler/manager/compat.go +++ b/pkg/ruler/manager/compat.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "io/ioutil" + "net/url" "strings" "time" @@ -11,6 +12,7 @@ import ( "github.com/go-kit/kit/log" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/labels" @@ -19,6 +21,7 @@ import ( "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/rules" + "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/template" "github.com/weaveworks/common/user" yaml "gopkg.in/yaml.v3" @@ -106,8 +109,26 @@ func MemstoreTenantManager( queryFunc := engineQueryFunc(engine, overrides, userID) memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) + // cortex + //u, err := url.Parse("http://localhost:8001/api/prom/push") + + // prometheus + u, err := url.Parse("http://localhost:9090/api/v1/write") + if err != nil { + panic(err) + } + + writeClient, err := remote.NewWriteClient("cortex", &remote.ClientConfig{ + URL: &config.URL{u}, + Timeout: model.Duration(5 * time.Second), + HTTPClientConfig: config.HTTPClientConfig{}, + }) + if err != nil { + panic(err) + } + mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: NoopAppender{}, + Appendable: &TestAppendable{remoteWriter: writeClient}, Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), From 2215f7b018d6bce49a9171278ef9f1279b4fda91 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Sun, 7 Mar 2021 17:30:40 +0200 Subject: [PATCH 02/34] Refactoring Adding remote_write config for ruler Signed-off-by: Danny Kopping --- pkg/loki/loki.go | 4 +-- pkg/ruler/config/config.go | 24 ++++++++++++++++++ pkg/ruler/manager/appender.go | 43 ++++++++++++++++++------------- pkg/ruler/manager/compat.go | 48 ++++++++++++++++++++--------------- pkg/ruler/ruler.go | 18 +++---------- 5 files changed, 83 insertions(+), 54 deletions(-) create mode 100644 pkg/ruler/config/config.go diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 2e2d384a9adf..f55d6f0ab6c7 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -10,6 +10,7 @@ import ( frontend "github.com/cortexproject/cortex/pkg/frontend/v1" "github.com/cortexproject/cortex/pkg/querier/worker" "github.com/felixge/fgprof" + "github.com/grafana/loki/pkg/ruler/config" "github.com/grafana/loki/pkg/storage/stores/shipper/compactor" @@ -41,7 +42,6 @@ import ( "github.com/grafana/loki/pkg/lokifrontend" "github.com/grafana/loki/pkg/querier" "github.com/grafana/loki/pkg/querier/queryrange" - "github.com/grafana/loki/pkg/ruler" "github.com/grafana/loki/pkg/storage" "github.com/grafana/loki/pkg/tracing" serverutil "github.com/grafana/loki/pkg/util/server" @@ -66,7 +66,7 @@ type Config struct { TableManager chunk.TableManagerConfig `yaml:"table_manager,omitempty"` Worker worker.Config `yaml:"frontend_worker,omitempty"` Frontend lokifrontend.Config `yaml:"frontend,omitempty"` - Ruler ruler.Config `yaml:"ruler,omitempty"` + Ruler config.Config `yaml:"ruler,omitempty"` QueryRange queryrange.Config `yaml:"query_range,omitempty"` RuntimeConfig runtimeconfig.ManagerConfig `yaml:"runtime_config,omitempty"` MemberlistKV memberlist.KVConfig `yaml:"memberlist"` diff --git a/pkg/ruler/config/config.go b/pkg/ruler/config/config.go new file mode 100644 index 000000000000..9757459b801c --- /dev/null +++ b/pkg/ruler/config/config.go @@ -0,0 +1,24 @@ +package config + +import ( + "github.com/cortexproject/cortex/pkg/ruler" + "github.com/pkg/errors" +) + +type Config struct { + ruler.Config `yaml:",inline"` + + RemoteWriteConfig `yaml:"remote_write"` +} + +type RemoteWriteConfig struct { + URL string `yaml:"url"` +} + +// Override the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. +func (cfg *Config) Validate() error { + if err := cfg.StoreConfig.Validate(); err != nil { + return errors.Wrap(err, "invalid storage config") + } + return nil +} diff --git a/pkg/ruler/manager/appender.go b/pkg/ruler/manager/appender.go index f3c4a4e147ed..64bf4cb74068 100644 --- a/pkg/ruler/manager/appender.go +++ b/pkg/ruler/manager/appender.go @@ -2,9 +2,10 @@ package manager import ( "context" - "fmt" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" "github.com/gogo/protobuf/proto" "github.com/golang/snappy" "github.com/prometheus/prometheus/pkg/labels" @@ -14,12 +15,14 @@ import ( ) type TestAppendable struct { - remoteWriter remote.WriteClient + logger log.Logger + remoteWriter *remote.WriteClient } type TestAppender struct { + logger log.Logger ctx context.Context - remoteWriter remote.WriteClient + remoteWriter *remote.WriteClient labels []labels.Labels samples []client.Sample @@ -44,7 +47,7 @@ func (a *TestAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, ts := prompb.TimeSeries{ Labels: labelsToLabelsProto(l, nil), Samples: []prompb.Sample{ - prompb.Sample{ + { Value: s.Value, Timestamp: s.TimestampMs, }, @@ -65,6 +68,7 @@ func (a *TestAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, func (a *TestAppendable) Appender(ctx context.Context) storage.Appender { return &TestAppender{ ctx: ctx, + logger: a.logger, remoteWriter: a.remoteWriter, } } @@ -84,8 +88,6 @@ func (a *TestAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) Value: v, }) - fmt.Printf("GOT HERE! %v %v %v\n\n\n", l, t, v) - return 0, nil } @@ -100,27 +102,34 @@ func (a *TestAppender) AddFast(ref uint64, t int64, v float64) error { // the appender so far, as Rollback would do. In any case, an Appender // must not be used anymore after Commit has been called. func (a *TestAppender) Commit() error { + if a.remoteWriter == nil { + level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") + } + + level.Debug(a.logger).Log("msg", "writing to remote_write target") + writer := *a.remoteWriter - if len(a.samples) == 0 { - return nil + req, err := a.prepareRequest() + if err != nil { + return err } - fmt.Printf("COMMIT TO TSDB!\n\n") - req := client.ToWriteRequest(a.labels, a.samples, nil, client.API) - b, err := req.Marshal() + err = writer.Store(a.ctx, req) if err != nil { - panic(err) + return err } - var dst []byte - x := snappy.Encode(dst, b) + return nil +} - err = a.remoteWriter.Store(a.ctx, x) +func (a *TestAppender) prepareRequest() ([]byte, error) { + req := client.ToWriteRequest(a.labels, a.samples, nil, client.API) + reqBytes, err := req.Marshal() if err != nil { - fmt.Printf("%v", err) + return nil, err } - return nil + return snappy.Encode(nil, reqBytes), nil } // Rollback rolls back all modifications made in the appender so far. diff --git a/pkg/ruler/manager/compat.go b/pkg/ruler/manager/compat.go index 2d3bc15e64eb..bfca86df9385 100644 --- a/pkg/ruler/manager/compat.go +++ b/pkg/ruler/manager/compat.go @@ -10,6 +10,8 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + rulerConfig "github.com/grafana/loki/pkg/ruler/config" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -87,7 +89,7 @@ func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error { } func MemstoreTenantManager( - cfg ruler.Config, + cfg rulerConfig.Config, engine *logql.Engine, overrides RulesLimits, ) ruler.ManagerFactory { @@ -109,26 +111,8 @@ func MemstoreTenantManager( queryFunc := engineQueryFunc(engine, overrides, userID) memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) - // cortex - //u, err := url.Parse("http://localhost:8001/api/prom/push") - - // prometheus - u, err := url.Parse("http://localhost:9090/api/v1/write") - if err != nil { - panic(err) - } - - writeClient, err := remote.NewWriteClient("cortex", &remote.ClientConfig{ - URL: &config.URL{u}, - Timeout: model.Duration(5 * time.Second), - HTTPClientConfig: config.HTTPClientConfig{}, - }) - if err != nil { - panic(err) - } - mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: &TestAppendable{remoteWriter: writeClient}, + Appendable: &TestAppendable{remoteWriter: newRemoteWriter(logger, cfg), logger: logger}, Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), @@ -149,6 +133,30 @@ func MemstoreTenantManager( }) } +func newRemoteWriter(logger log.Logger, cfg rulerConfig.Config) *remote.WriteClient { + if cfg.RemoteWriteConfig.URL == "" { + return nil + } + + u, err := url.Parse(cfg.RemoteWriteConfig.URL) + if err != nil { + level.Warn(logger).Log("url", cfg.RemoteWriteConfig.URL, "msg", "cannot parse remote_write url", "err", err) + return nil + } + + writeClient, err := remote.NewWriteClient("cortex", &remote.ClientConfig{ + URL: &config.URL{u}, + Timeout: model.Duration(5 * time.Second), + HTTPClientConfig: config.HTTPClientConfig{}, + }) + if err != nil { + level.Warn(logger).Log("remote_write_url", cfg.RemoteWriteConfig.URL, "msg", "cannot create remote_write client", "err", err) + return nil + } + + return &writeClient +} + type GroupLoader struct{} func (GroupLoader) Parse(query string) (parser.Expr, error) { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 6fd1d9aed7ae..3d90db079aec 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -4,30 +4,18 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" cRules "github.com/cortexproject/cortex/pkg/ruler/rules" "github.com/go-kit/kit/log" - "github.com/pkg/errors" + "github.com/grafana/loki/pkg/ruler/config" "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/ruler/manager" ) -type Config struct { - ruler.Config `yaml:",inline"` -} - -// Override the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. -func (cfg *Config) Validate() error { - if err := cfg.StoreConfig.Validate(); err != nil { - return errors.Wrap(err, "invalid storage config") - } - return nil -} - -func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore cRules.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { +func NewRuler(cfg config.Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore cRules.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { mgr, err := ruler.NewDefaultMultiTenantManager( cfg.Config, manager.MemstoreTenantManager( - cfg.Config, + cfg, engine, limits, ), From fc48da7e9ef4a1e76f4454541c0100dca84d5dba Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 12 Mar 2021 10:22:40 +0200 Subject: [PATCH 03/34] Minor refactorings Signed-off-by: Danny Kopping --- pkg/loki/loki.go | 4 ++-- pkg/ruler/{config => }/config.go | 2 +- pkg/ruler/manager/appender.go | 20 ++++++++++---------- pkg/ruler/manager/compat.go | 4 ++-- pkg/ruler/ruler.go | 13 ------------- 5 files changed, 15 insertions(+), 28 deletions(-) rename pkg/ruler/{config => }/config.go (97%) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 6d95ecf36ed0..5675131d809c 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -11,7 +11,7 @@ import ( "github.com/cortexproject/cortex/pkg/querier/worker" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/felixge/fgprof" - "github.com/grafana/loki/pkg/ruler/config" + ruler "github.com/grafana/loki/pkg/ruler" "github.com/grafana/loki/pkg/storage/stores/shipper/compactor" "github.com/grafana/loki/pkg/util/runtime" @@ -66,7 +66,7 @@ type Config struct { TableManager chunk.TableManagerConfig `yaml:"table_manager,omitempty"` Worker worker.Config `yaml:"frontend_worker,omitempty"` Frontend lokifrontend.Config `yaml:"frontend,omitempty"` - Ruler config.Config `yaml:"ruler,omitempty"` + Ruler ruler.Config `yaml:"ruler,omitempty"` QueryRange queryrange.Config `yaml:"query_range,omitempty"` RuntimeConfig runtimeconfig.ManagerConfig `yaml:"runtime_config,omitempty"` MemberlistKV memberlist.KVConfig `yaml:"memberlist"` diff --git a/pkg/ruler/config/config.go b/pkg/ruler/config.go similarity index 97% rename from pkg/ruler/config/config.go rename to pkg/ruler/config.go index 9757459b801c..cd18a2843a50 100644 --- a/pkg/ruler/config/config.go +++ b/pkg/ruler/config.go @@ -1,4 +1,4 @@ -package config +package ruler import ( "github.com/cortexproject/cortex/pkg/ruler" diff --git a/pkg/ruler/manager/appender.go b/pkg/ruler/manager/appender.go index 64bf4cb74068..76041d91af3f 100644 --- a/pkg/ruler/manager/appender.go +++ b/pkg/ruler/manager/appender.go @@ -14,12 +14,12 @@ import ( "github.com/prometheus/prometheus/storage/remote" ) -type TestAppendable struct { +type RemoteWriteAppendable struct { logger log.Logger remoteWriter *remote.WriteClient } -type TestAppender struct { +type RemoteWriteAppender struct { logger log.Logger ctx context.Context remoteWriter *remote.WriteClient @@ -43,7 +43,7 @@ func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Labe return result } -func (a *TestAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, error) { +func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, error) { ts := prompb.TimeSeries{ Labels: labelsToLabelsProto(l, nil), Samples: []prompb.Sample{ @@ -65,8 +65,8 @@ func (a *TestAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, return compressed, nil } -func (a *TestAppendable) Appender(ctx context.Context) storage.Appender { - return &TestAppender{ +func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { + return &RemoteWriteAppender{ ctx: ctx, logger: a.logger, remoteWriter: a.remoteWriter, @@ -80,7 +80,7 @@ func (a *TestAppendable) Appender(ctx context.Context) storage.Appender { // to AddFast() at any point. Adding the sample via Add() returns a new // reference number. // If the reference is 0 it must not be used for caching. -func (a *TestAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { +func (a *RemoteWriteAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { a.labels = append(a.labels, l) a.samples = append(a.samples, client.Sample{ @@ -93,7 +93,7 @@ func (a *TestAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) // AddFast adds a sample pair for the referenced series. It is generally // faster than adding a sample by providing its full label set. -func (a *TestAppender) AddFast(ref uint64, t int64, v float64) error { +func (a *RemoteWriteAppender) AddFast(ref uint64, t int64, v float64) error { return storage.ErrNotFound } @@ -101,7 +101,7 @@ func (a *TestAppender) AddFast(ref uint64, t int64, v float64) error { // returns a non-nil error, it also rolls back all modifications made in // the appender so far, as Rollback would do. In any case, an Appender // must not be used anymore after Commit has been called. -func (a *TestAppender) Commit() error { +func (a *RemoteWriteAppender) Commit() error { if a.remoteWriter == nil { level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") } @@ -122,7 +122,7 @@ func (a *TestAppender) Commit() error { return nil } -func (a *TestAppender) prepareRequest() ([]byte, error) { +func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { req := client.ToWriteRequest(a.labels, a.samples, nil, client.API) reqBytes, err := req.Marshal() if err != nil { @@ -134,7 +134,7 @@ func (a *TestAppender) prepareRequest() ([]byte, error) { // Rollback rolls back all modifications made in the appender so far. // Appender has to be discarded after rollback. -func (a *TestAppender) Rollback() error { +func (a *RemoteWriteAppender) Rollback() error { a.labels = nil a.samples = nil diff --git a/pkg/ruler/manager/compat.go b/pkg/ruler/manager/compat.go index bfca86df9385..bdcf7dd3ca9d 100644 --- a/pkg/ruler/manager/compat.go +++ b/pkg/ruler/manager/compat.go @@ -11,7 +11,7 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - rulerConfig "github.com/grafana/loki/pkg/ruler/config" + rulerConfig "github.com/grafana/loki/pkg/ruler" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -112,7 +112,7 @@ func MemstoreTenantManager( memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: &TestAppendable{remoteWriter: newRemoteWriter(logger, cfg), logger: logger}, + Appendable: &RemoteWriteAppendable{remoteWriter: newRemoteWriter(logger, cfg), logger: logger}, Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 5f312e4fb57c..d3a09caf84f0 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -4,25 +4,12 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/go-kit/kit/log" - "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/ruler/manager" ) -type Config struct { - ruler.Config `yaml:",inline"` -} - -// Override the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. -func (cfg *Config) Validate() error { - if err := cfg.StoreConfig.Validate(); err != nil { - return errors.Wrap(err, "invalid storage config") - } - return nil -} - func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { mgr, err := ruler.NewDefaultMultiTenantManager( cfg.Config, From 870aa51f47eb4afc39ab6186763dcb7da526af24 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 12 Mar 2021 10:25:22 +0200 Subject: [PATCH 04/34] Moving manager subpackage into ruler package to avoid dependency cycles This also mirrors Cortex's package structure Signed-off-by: Danny Kopping --- pkg/loki/loki.go | 2 +- pkg/loki/modules.go | 3 +-- pkg/ruler/{manager => }/appender.go | 2 +- pkg/ruler/{manager => }/compat.go | 7 +++---- pkg/ruler/{manager => }/compat_test.go | 2 +- pkg/ruler/{manager => }/memstore.go | 2 +- pkg/ruler/{manager => }/memstore_test.go | 2 +- pkg/ruler/ruler.go | 5 ++--- 8 files changed, 11 insertions(+), 14 deletions(-) rename pkg/ruler/{manager => }/appender.go (99%) rename pkg/ruler/{manager => }/compat.go (98%) rename pkg/ruler/{manager => }/compat_test.go (99%) rename pkg/ruler/{manager => }/memstore.go (99%) rename pkg/ruler/{manager => }/memstore_test.go (99%) diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 5675131d809c..0cfa58ca745d 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -11,7 +11,6 @@ import ( "github.com/cortexproject/cortex/pkg/querier/worker" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/felixge/fgprof" - ruler "github.com/grafana/loki/pkg/ruler" "github.com/grafana/loki/pkg/storage/stores/shipper/compactor" "github.com/grafana/loki/pkg/util/runtime" @@ -42,6 +41,7 @@ import ( "github.com/grafana/loki/pkg/lokifrontend" "github.com/grafana/loki/pkg/querier" "github.com/grafana/loki/pkg/querier/queryrange" + "github.com/grafana/loki/pkg/ruler" "github.com/grafana/loki/pkg/storage" "github.com/grafana/loki/pkg/tracing" serverutil "github.com/grafana/loki/pkg/util/server" diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 7c8e57a548ec..e132a2cc65e8 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -15,7 +15,6 @@ import ( "github.com/cortexproject/cortex/pkg/frontend/transport" "github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb" - "github.com/grafana/loki/pkg/ruler/manager" "github.com/grafana/loki/pkg/storage/stores/shipper/compactor" "github.com/grafana/loki/pkg/util/runtime" @@ -500,7 +499,7 @@ func (t *Loki) initRulerStorage() (_ services.Service, err error) { } } - t.RulerStorage, err = cortex_ruler.NewLegacyRuleStore(t.cfg.Ruler.StoreConfig, manager.GroupLoader{}, util_log.Logger) + t.RulerStorage, err = cortex_ruler.NewLegacyRuleStore(t.cfg.Ruler.StoreConfig, ruler.GroupLoader{}, util_log.Logger) return } diff --git a/pkg/ruler/manager/appender.go b/pkg/ruler/appender.go similarity index 99% rename from pkg/ruler/manager/appender.go rename to pkg/ruler/appender.go index 76041d91af3f..521b50503854 100644 --- a/pkg/ruler/manager/appender.go +++ b/pkg/ruler/appender.go @@ -1,4 +1,4 @@ -package manager +package ruler import ( "context" diff --git a/pkg/ruler/manager/compat.go b/pkg/ruler/compat.go similarity index 98% rename from pkg/ruler/manager/compat.go rename to pkg/ruler/compat.go index bdcf7dd3ca9d..6ec30a71e507 100644 --- a/pkg/ruler/manager/compat.go +++ b/pkg/ruler/compat.go @@ -1,4 +1,4 @@ -package manager +package ruler import ( "bytes" @@ -11,7 +11,6 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - rulerConfig "github.com/grafana/loki/pkg/ruler" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/config" @@ -89,7 +88,7 @@ func (m *MultiTenantManager) ValidateRuleGroup(grp rulefmt.RuleGroup) []error { } func MemstoreTenantManager( - cfg rulerConfig.Config, + cfg Config, engine *logql.Engine, overrides RulesLimits, ) ruler.ManagerFactory { @@ -133,7 +132,7 @@ func MemstoreTenantManager( }) } -func newRemoteWriter(logger log.Logger, cfg rulerConfig.Config) *remote.WriteClient { +func newRemoteWriter(logger log.Logger, cfg Config) *remote.WriteClient { if cfg.RemoteWriteConfig.URL == "" { return nil } diff --git a/pkg/ruler/manager/compat_test.go b/pkg/ruler/compat_test.go similarity index 99% rename from pkg/ruler/manager/compat_test.go rename to pkg/ruler/compat_test.go index 7cddb3f91528..69afd2f7ddfa 100644 --- a/pkg/ruler/manager/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -1,4 +1,4 @@ -package manager +package ruler import ( "fmt" diff --git a/pkg/ruler/manager/memstore.go b/pkg/ruler/memstore.go similarity index 99% rename from pkg/ruler/manager/memstore.go rename to pkg/ruler/memstore.go index 54daf2d94b1e..458bc71da1de 100644 --- a/pkg/ruler/manager/memstore.go +++ b/pkg/ruler/memstore.go @@ -1,4 +1,4 @@ -package manager +package ruler import ( "context" diff --git a/pkg/ruler/manager/memstore_test.go b/pkg/ruler/memstore_test.go similarity index 99% rename from pkg/ruler/manager/memstore_test.go rename to pkg/ruler/memstore_test.go index 2d2569ee882c..f00910aaa493 100644 --- a/pkg/ruler/manager/memstore_test.go +++ b/pkg/ruler/memstore_test.go @@ -1,4 +1,4 @@ -package manager +package ruler import ( "context" diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index d3a09caf84f0..95edf1ada2b8 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -7,13 +7,12 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/grafana/loki/pkg/logql" - "github.com/grafana/loki/pkg/ruler/manager" ) func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { mgr, err := ruler.NewDefaultMultiTenantManager( cfg.Config, - manager.MemstoreTenantManager( + MemstoreTenantManager( cfg, engine, limits, @@ -26,7 +25,7 @@ func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logge } return ruler.NewRuler( cfg.Config, - manager.MultiTenantManagerAdapter(mgr), + MultiTenantManagerAdapter(mgr), reg, logger, ruleStore, From 5565a78ac9c3b1fd1c080c0fe521485e33bcc0f8 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 12 Mar 2021 10:43:55 +0200 Subject: [PATCH 05/34] Minor refactorings Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 4 +--- pkg/ruler/config.go | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 521b50503854..a1284b161458 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -54,13 +54,11 @@ func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s client.Sample) ([ }, } - // Create write request data, err := proto.Marshal(&prompb.WriteRequest{Timeseries: []prompb.TimeSeries{ts}}) if err != nil { return nil, err } - // Create HTTP request compressed := snappy.Encode(nil, data) return compressed, nil } @@ -106,8 +104,8 @@ func (a *RemoteWriteAppender) Commit() error { level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") } - level.Debug(a.logger).Log("msg", "writing to remote_write target") writer := *a.remoteWriter + level.Debug(a.logger).Log("msg", "writing to remote_write target", "target", writer.Endpoint()) req, err := a.prepareRequest() if err != nil { diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index cd18a2843a50..f371be7c1310 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -18,7 +18,7 @@ type RemoteWriteConfig struct { // Override the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. func (cfg *Config) Validate() error { if err := cfg.StoreConfig.Validate(); err != nil { - return errors.Wrap(err, "invalid storage config") + return errors.Wrap(err, "invalid ruler config") } return nil } From 23356a3f7d7317ddc5b957b63a3d942a3107e466 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Sat, 13 Mar 2021 17:48:51 +0200 Subject: [PATCH 06/34] Skipping commit if remote-write client is not defined Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index a1284b161458..fb93c3cfa769 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -102,6 +102,7 @@ func (a *RemoteWriteAppender) AddFast(ref uint64, t int64, v float64) error { func (a *RemoteWriteAppender) Commit() error { if a.remoteWriter == nil { level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") + return nil } writer := *a.remoteWriter From 8f07114a68610dbbd2644ebe241b89fc82798df6 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 27 Apr 2021 11:42:28 +0200 Subject: [PATCH 07/34] Updating use of cortex client Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index fb93c3cfa769..1d80fefe54d5 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -3,7 +3,9 @@ package ruler import ( "context" - "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/prometheus/prometheus/pkg/exemplar" + + "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/gogo/protobuf/proto" @@ -25,7 +27,7 @@ type RemoteWriteAppender struct { remoteWriter *remote.WriteClient labels []labels.Labels - samples []client.Sample + samples []cortexpb.Sample } // from github.com/prometheus/prometheus/storage/remote/codec.go @@ -43,7 +45,7 @@ func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Labe return result } -func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s client.Sample) ([]byte, error) { +func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) ([]byte, error) { ts := prompb.TimeSeries{ Labels: labelsToLabelsProto(l, nil), Samples: []prompb.Sample{ @@ -71,28 +73,40 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { } } -// Add adds a sample pair for the given series. A reference number is -// returned which can be used to add further samples in the same or later -// transactions. +// Append adds a sample pair for the given series. +// An optional reference number can be provided to accelerate calls. +// A reference number is returned which can be used to add further +// samples in the same or later transactions. // Returned reference numbers are ephemeral and may be rejected in calls -// to AddFast() at any point. Adding the sample via Add() returns a new +// to Append() at any point. Adding the sample via Append() returns a new // reference number. // If the reference is 0 it must not be used for caching. -func (a *RemoteWriteAppender) Add(l labels.Labels, t int64, v float64) (uint64, error) { +func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float64) (uint64, error) { a.labels = append(a.labels, l) - a.samples = append(a.samples, client.Sample{ + a.samples = append(a.samples, cortexpb.Sample{ TimestampMs: t, Value: v, }) + level.Warn(a.logger).Log("msg", "writing sample", "sample", fmt.Sprintf("%+v", a.samples[len(a.samples)-1])) + return 0, nil } -// AddFast adds a sample pair for the referenced series. It is generally -// faster than adding a sample by providing its full label set. -func (a *RemoteWriteAppender) AddFast(ref uint64, t int64, v float64) error { - return storage.ErrNotFound +// AppendExemplar adds an exemplar for the given series labels. +// An optional reference number can be provided to accelerate calls. +// A reference number is returned which can be used to add further +// exemplars in the same or later transactions. +// Returned reference numbers are ephemeral and may be rejected in calls +// to Append() at any point. Adding the sample via Append() returns a new +// reference number. +// If the reference is 0 it must not be used for caching. +// Note that in our current implementation of Prometheus' exemplar storage +// calls to Append should generate the reference numbers, AppendExemplar +// generating a new reference number should be considered possible erroneous behaviour and be logged. +func (a *RemoteWriteAppender) AppendExemplar(_ uint64, _ labels.Labels, _ exemplar.Exemplar) (uint64, error) { + return 0, errors.New("exemplars are unsupported") } // Commit submits the collected samples and purges the batch. If Commit @@ -122,7 +136,7 @@ func (a *RemoteWriteAppender) Commit() error { } func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { - req := client.ToWriteRequest(a.labels, a.samples, nil, client.API) + req := cortexpb.ToWriteRequest(a.labels, a.samples, nil, cortexpb.RULE) reqBytes, err := req.Marshal() if err != nil { return nil, err From f8161930ec2ef868a9cf30fb2e7b03aa607ff48a Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Sun, 16 May 2021 19:06:50 +0200 Subject: [PATCH 08/34] Memoizing appenders, using queue for samples & labels Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 124 +++++++++++++++++++++++++++++++----------- pkg/ruler/compat.go | 2 +- pkg/ruler/queue.go | 39 +++++++++++++ 3 files changed, 132 insertions(+), 33 deletions(-) create mode 100644 pkg/ruler/queue.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 1d80fefe54d5..cf1855a53eff 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -2,6 +2,13 @@ package ruler import ( "context" + "errors" + + "github.com/prometheus/prometheus/storage/remote" + + "github.com/prometheus/prometheus/rules" + + "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/pkg/exemplar" @@ -13,10 +20,11 @@ import ( "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/storage" - "github.com/prometheus/prometheus/storage/remote" ) type RemoteWriteAppendable struct { + groupAppender map[string]*RemoteWriteAppender + logger log.Logger remoteWriter *remote.WriteClient } @@ -25,24 +33,9 @@ type RemoteWriteAppender struct { logger log.Logger ctx context.Context remoteWriter *remote.WriteClient + groupKey string - labels []labels.Labels - samples []cortexpb.Sample -} - -// from github.com/prometheus/prometheus/storage/remote/codec.go -func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { - result := buf[:0] - if cap(buf) < len(labels) { - result = make([]prompb.Label, 0, len(labels)) - } - for _, l := range labels { - result = append(result, prompb.Label{ - Name: l.Name, - Value: l.Value, - }) - } - return result + queue *RemoteWriteQueue } func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) ([]byte, error) { @@ -66,11 +59,33 @@ func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { - return &RemoteWriteAppender{ - ctx: ctx, - logger: a.logger, - remoteWriter: a.remoteWriter, + + if a.groupAppender == nil { + a.groupAppender = make(map[string]*RemoteWriteAppender) + } + + groupKey := retrieveGroupKeyFromContext(ctx) + + var appender *RemoteWriteAppender + + appender, found := a.groupAppender[groupKey] + if !found { + appender = &RemoteWriteAppender{ + ctx: ctx, + logger: a.logger, + remoteWriter: a.remoteWriter, + groupKey: groupKey, + + queue: NewRemoteWriteQueue(15000), + } + + // only track reference if groupKey was retrieved + if groupKey != "" { + a.groupAppender[groupKey] = appender + } } + + return appender } // Append adds a sample pair for the given series. @@ -82,15 +97,11 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { // reference number. // If the reference is 0 it must not be used for caching. func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float64) (uint64, error) { - a.labels = append(a.labels, l) - - a.samples = append(a.samples, cortexpb.Sample{ - TimestampMs: t, + a.queue.Append(l, cortexpb.Sample{ Value: v, + TimestampMs: t, }) - level.Warn(a.logger).Log("msg", "writing sample", "sample", fmt.Sprintf("%+v", a.samples[len(a.samples)-1])) - return 0, nil } @@ -114,29 +125,40 @@ func (a *RemoteWriteAppender) AppendExemplar(_ uint64, _ labels.Labels, _ exempl // the appender so far, as Rollback would do. In any case, an Appender // must not be used anymore after Commit has been called. func (a *RemoteWriteAppender) Commit() error { + if a.queue.Length() <= 0 { + return nil + } + if a.remoteWriter == nil { level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") return nil } writer := *a.remoteWriter - level.Debug(a.logger).Log("msg", "writing to remote_write target", "target", writer.Endpoint()) + level.Warn(a.logger).Log("msg", "writing samples to remote_write target", "target", writer.Endpoint(), "count", a.queue.Length()) req, err := a.prepareRequest() if err != nil { + level.Warn(a.logger).Log("msg", "could not prepare", "err", err) return err } err = writer.Store(a.ctx, req) if err != nil { + level.Warn(a.logger).Log("msg", "could not store", "err", err) return err } + // clear the queue on a successful response + a.queue.Clear() + return nil } func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { - req := cortexpb.ToWriteRequest(a.labels, a.samples, nil, cortexpb.RULE) + req := cortexpb.ToWriteRequest(a.queue.labels, a.queue.samples, nil, cortexpb.RULE) + defer cortexpb.ReuseSlice(req.Timeseries) + reqBytes, err := req.Marshal() if err != nil { return nil, err @@ -148,8 +170,46 @@ func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { // Rollback rolls back all modifications made in the appender so far. // Appender has to be discarded after rollback. func (a *RemoteWriteAppender) Rollback() error { - a.labels = nil - a.samples = nil + a.queue.Clear() return nil } + +func retrieveGroupKeyFromContext(ctx context.Context) string { + data, found := ctx.Value(promql.QueryOrigin{}).(map[string]interface{}) + if !found { + return "" + } + + ruleGroup, found := data["ruleGroup"].(map[string]string) + if !found { + return "" + } + + file, found := ruleGroup["file"] + if !found { + return "" + } + + name, found := ruleGroup["name"] + if !found { + return "" + } + + return rules.GroupKey(file, name) +} + +// from github.com/prometheus/prometheus/storage/remote/codec.go +func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { + result := buf[:0] + if cap(buf) < len(labels) { + result = make([]prompb.Label, 0, len(labels)) + } + for _, l := range labels { + result = append(result, prompb.Label{ + Name: l.Name, + Value: l.Value, + }) + } + return result +} diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 6ec30a71e507..76819eaeaf69 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -101,7 +101,7 @@ func MemstoreTenantManager( logger log.Logger, reg prometheus.Registerer, ) ruler.RulesManager { - // We'll ignore the passed registere and use the default registerer to avoid prefix issues and other weirdness. + // We'll ignore the passed registerer and use the default registerer to avoid prefix issues and other weirdness. // This closure prevents re-registering. if metrics == nil { metrics = NewMetrics(prometheus.DefaultRegisterer) diff --git a/pkg/ruler/queue.go b/pkg/ruler/queue.go new file mode 100644 index 000000000000..cdcc09a53e1d --- /dev/null +++ b/pkg/ruler/queue.go @@ -0,0 +1,39 @@ +package ruler + +import ( + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/prometheus/prometheus/pkg/labels" +) + +type RemoteWriteQueue struct { + labels []labels.Labels + samples []cortexpb.Sample + + capacity int +} + +func NewRemoteWriteQueue(capacity int) *RemoteWriteQueue { + return &RemoteWriteQueue{ + capacity: capacity, + } +} + +func (q *RemoteWriteQueue) Append(labels labels.Labels, sample cortexpb.Sample) { + if len(q.samples) >= q.capacity { + // capacity exceeded, delete oldest sample + q.samples = append(q.samples[:0], q.samples[1:]...) + q.labels = append(q.labels[:0], q.labels[1:]...) + } + + q.labels = append(q.labels, labels) + q.samples = append(q.samples, sample) +} + +func (q *RemoteWriteQueue) Length() int { + return len(q.samples) +} + +func (q *RemoteWriteQueue) Clear() { + q.samples = nil + q.labels = nil +} From d0be7facc32e55fd2921bf87adbc4faa84658c1f Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 17 May 2021 17:47:06 +0200 Subject: [PATCH 09/34] Adding buffer size configurability Refactoring Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 72 +++++++++++-------------------------------- pkg/ruler/compat.go | 38 ++++++++++++++--------- pkg/ruler/config.go | 14 +++++++-- pkg/ruler/metrics.go | 18 +++++++++++ pkg/ruler/queue.go | 28 +++++++++++++---- pkg/ruler/utils.go | 21 +++++++++++++ 6 files changed, 114 insertions(+), 77 deletions(-) create mode 100644 pkg/ruler/metrics.go create mode 100644 pkg/ruler/utils.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index cf1855a53eff..7c4ea98a6a8d 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -24,6 +24,8 @@ import ( type RemoteWriteAppendable struct { groupAppender map[string]*RemoteWriteAppender + userID string + cfg Config logger log.Logger remoteWriter *remote.WriteClient @@ -35,7 +37,7 @@ type RemoteWriteAppender struct { remoteWriter *remote.WriteClient groupKey string - queue *RemoteWriteQueue + queue *remoteWriteQueue } func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) ([]byte, error) { @@ -59,6 +61,7 @@ func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { + var appender *RemoteWriteAppender if a.groupAppender == nil { a.groupAppender = make(map[string]*RemoteWriteAppender) @@ -66,8 +69,6 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey := retrieveGroupKeyFromContext(ctx) - var appender *RemoteWriteAppender - appender, found := a.groupAppender[groupKey] if !found { appender = &RemoteWriteAppender{ @@ -76,28 +77,23 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { remoteWriter: a.remoteWriter, groupKey: groupKey, - queue: NewRemoteWriteQueue(15000), + queue: newRemoteWriteQueue(a.cfg.RemoteWrite.BufferSize, a.userID, groupKey), } // only track reference if groupKey was retrieved - if groupKey != "" { - a.groupAppender[groupKey] = appender + if groupKey == "" { + level.Warn(a.logger).Log("msg", "blank group key passed via context; creating new appender") + return appender } + + a.groupAppender[groupKey] = appender } return appender } -// Append adds a sample pair for the given series. -// An optional reference number can be provided to accelerate calls. -// A reference number is returned which can be used to add further -// samples in the same or later transactions. -// Returned reference numbers are ephemeral and may be rejected in calls -// to Append() at any point. Adding the sample via Append() returns a new -// reference number. -// If the reference is 0 it must not be used for caching. func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float64) (uint64, error) { - a.queue.Append(l, cortexpb.Sample{ + a.queue.append(l, cortexpb.Sample{ Value: v, TimestampMs: t, }) @@ -105,52 +101,37 @@ func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float return 0, nil } -// AppendExemplar adds an exemplar for the given series labels. -// An optional reference number can be provided to accelerate calls. -// A reference number is returned which can be used to add further -// exemplars in the same or later transactions. -// Returned reference numbers are ephemeral and may be rejected in calls -// to Append() at any point. Adding the sample via Append() returns a new -// reference number. -// If the reference is 0 it must not be used for caching. -// Note that in our current implementation of Prometheus' exemplar storage -// calls to Append should generate the reference numbers, AppendExemplar -// generating a new reference number should be considered possible erroneous behaviour and be logged. func (a *RemoteWriteAppender) AppendExemplar(_ uint64, _ labels.Labels, _ exemplar.Exemplar) (uint64, error) { return 0, errors.New("exemplars are unsupported") } -// Commit submits the collected samples and purges the batch. If Commit -// returns a non-nil error, it also rolls back all modifications made in -// the appender so far, as Rollback would do. In any case, an Appender -// must not be used anymore after Commit has been called. func (a *RemoteWriteAppender) Commit() error { - if a.queue.Length() <= 0 { + if a.queue.length() <= 0 { return nil } if a.remoteWriter == nil { - level.Debug(a.logger).Log("msg", "no remote_write client defined, skipping commit") + level.Warn(a.logger).Log("msg", "no remote_write client defined, skipping commit") return nil } writer := *a.remoteWriter - level.Warn(a.logger).Log("msg", "writing samples to remote_write target", "target", writer.Endpoint(), "count", a.queue.Length()) + level.Debug(a.logger).Log("msg", "writing samples to remote_write target", "target", writer.Endpoint(), "count", a.queue.length()) req, err := a.prepareRequest() if err != nil { - level.Warn(a.logger).Log("msg", "could not prepare", "err", err) + level.Error(a.logger).Log("msg", "could not prepare remote-write request", "err", err) return err } err = writer.Store(a.ctx, req) if err != nil { - level.Warn(a.logger).Log("msg", "could not store", "err", err) + level.Error(a.logger).Log("msg", "could not store recording rule samples", "err", err) return err } // clear the queue on a successful response - a.queue.Clear() + a.queue.clear() return nil } @@ -167,10 +148,8 @@ func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { return snappy.Encode(nil, reqBytes), nil } -// Rollback rolls back all modifications made in the appender so far. -// Appender has to be discarded after rollback. func (a *RemoteWriteAppender) Rollback() error { - a.queue.Clear() + a.queue.clear() return nil } @@ -198,18 +177,3 @@ func retrieveGroupKeyFromContext(ctx context.Context) string { return rules.GroupKey(file, name) } - -// from github.com/prometheus/prometheus/storage/remote/codec.go -func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { - result := buf[:0] - if cap(buf) < len(labels) { - result = make([]prompb.Label, 0, len(labels)) - } - for _, l := range labels { - result = append(result, prompb.Label{ - Name: l.Name, - Value: l.Value, - }) - } - return result -} diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 76819eaeaf69..76bcf3d407b5 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -4,16 +4,16 @@ import ( "bytes" "context" "io/ioutil" - "net/url" "strings" "time" + "github.com/prometheus/prometheus/storage" + "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/common/config" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/notifier" "github.com/prometheus/prometheus/pkg/labels" @@ -111,7 +111,7 @@ func MemstoreTenantManager( memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: &RemoteWriteAppendable{remoteWriter: newRemoteWriter(logger, cfg), logger: logger}, + Appendable: newAppendable(cfg, logger, userID), Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), @@ -132,24 +132,34 @@ func MemstoreTenantManager( }) } -func newRemoteWriter(logger log.Logger, cfg Config) *remote.WriteClient { - if cfg.RemoteWriteConfig.URL == "" { - return nil +func newAppendable(cfg Config, logger log.Logger, userID string) storage.Appendable { + if !cfg.RemoteWrite.Enabled() { + level.Warn(logger).Log("msg", "remote write client not configured") + return &NoopAppender{} } - u, err := url.Parse(cfg.RemoteWriteConfig.URL) - if err != nil { - level.Warn(logger).Log("url", cfg.RemoteWriteConfig.URL, "msg", "cannot parse remote_write url", "err", err) + return &RemoteWriteAppendable{ + remoteWriter: newRemoteWriter(logger, cfg), + logger: logger, + userID: userID, + cfg: cfg, + } +} + +func newRemoteWriter(logger log.Logger, cfg Config) *remote.WriteClient { + if !cfg.RemoteWrite.Enabled() { + level.Warn(logger).Log("msg", "remote write client not configured") return nil } - writeClient, err := remote.NewWriteClient("cortex", &remote.ClientConfig{ - URL: &config.URL{u}, - Timeout: model.Duration(5 * time.Second), - HTTPClientConfig: config.HTTPClientConfig{}, + writeClient, err := remote.NewWriteClient("recording_rules", &remote.ClientConfig{ + URL: cfg.RemoteWrite.Client.URL, + Timeout: cfg.RemoteWrite.Client.RemoteTimeout, + HTTPClientConfig: cfg.RemoteWrite.Client.HTTPClientConfig, + Headers: cfg.RemoteWrite.Client.Headers, }) if err != nil { - level.Warn(logger).Log("remote_write_url", cfg.RemoteWriteConfig.URL, "msg", "cannot create remote_write client", "err", err) + level.Warn(logger).Log("remote_write_url", cfg.RemoteWrite.Client.URL.String(), "msg", "cannot create remote_write client", "err", err) return nil } diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index f371be7c1310..7ff6d38eb6cc 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -3,19 +3,27 @@ package ruler import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/pkg/errors" + "github.com/prometheus/prometheus/config" ) type Config struct { ruler.Config `yaml:",inline"` - RemoteWriteConfig `yaml:"remote_write"` + RemoteWrite RemoteWriteConfig `yaml:"remote_write,omitempty"` } type RemoteWriteConfig struct { - URL string `yaml:"url"` + Client config.RemoteWriteConfig `yaml:"client"` + + BufferSize int `yaml:"buffer_size,omitempty"` +} + +func (c *RemoteWriteConfig) Enabled() bool { + // remote-write is considered disabled if there's no target to write to + return c.Client.URL != nil } -// Override the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. +// Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. func (cfg *Config) Validate() error { if err := cfg.StoreConfig.Validate(); err != nil { return errors.Wrap(err, "invalid ruler config") diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go new file mode 100644 index 000000000000..c62df286e6e6 --- /dev/null +++ b/pkg/ruler/metrics.go @@ -0,0 +1,18 @@ +package ruler + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var samplesDiscarded = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rule_queue_samples_discarded_total", + Help: "Number of samples discarded from queue - buffer is full.", +}, []string{"user_id", "group_key"}) + +var samplesBuffered = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rule_queue_samples_buffered_total", + Help: "Number of samples buffered in queue.", +}, []string{"user_id", "group_key"}) diff --git a/pkg/ruler/queue.go b/pkg/ruler/queue.go index cdcc09a53e1d..81cf01c89f9e 100644 --- a/pkg/ruler/queue.go +++ b/pkg/ruler/queue.go @@ -5,21 +5,35 @@ import ( "github.com/prometheus/prometheus/pkg/labels" ) -type RemoteWriteQueue struct { +// DefaultCapacity defines the default size of the samples buffer which will hold samples +// while the remote-write endpoint is unavailable +const DefaultCapacity = 10000 + +type remoteWriteQueue struct { labels []labels.Labels samples []cortexpb.Sample capacity int + userID string + key string } -func NewRemoteWriteQueue(capacity int) *RemoteWriteQueue { - return &RemoteWriteQueue{ +func newRemoteWriteQueue(capacity int, userID, key string) *remoteWriteQueue { + if capacity == 0 { + capacity = DefaultCapacity + } + + return &remoteWriteQueue{ capacity: capacity, + key: key, + userID: userID, } } -func (q *RemoteWriteQueue) Append(labels labels.Labels, sample cortexpb.Sample) { +func (q *remoteWriteQueue) append(labels labels.Labels, sample cortexpb.Sample) { if len(q.samples) >= q.capacity { + samplesDiscarded.WithLabelValues(q.userID, q.key).Inc() + // capacity exceeded, delete oldest sample q.samples = append(q.samples[:0], q.samples[1:]...) q.labels = append(q.labels[:0], q.labels[1:]...) @@ -27,13 +41,15 @@ func (q *RemoteWriteQueue) Append(labels labels.Labels, sample cortexpb.Sample) q.labels = append(q.labels, labels) q.samples = append(q.samples, sample) + + samplesBuffered.WithLabelValues(q.userID, q.key).Inc() } -func (q *RemoteWriteQueue) Length() int { +func (q *remoteWriteQueue) length() int { return len(q.samples) } -func (q *RemoteWriteQueue) Clear() { +func (q *remoteWriteQueue) clear() { q.samples = nil q.labels = nil } diff --git a/pkg/ruler/utils.go b/pkg/ruler/utils.go new file mode 100644 index 000000000000..908d20bcefcb --- /dev/null +++ b/pkg/ruler/utils.go @@ -0,0 +1,21 @@ +package ruler + +import ( + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/prompb" +) + +// from github.com/prometheus/prometheus/storage/remote/codec.go +func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { + result := buf[:0] + if cap(buf) < len(labels) { + result = make([]prompb.Label, 0, len(labels)) + } + for _, l := range labels { + result = append(result, prompb.Label{ + Name: l.Name, + Value: l.Value, + }) + } + return result +} From 524bbf7f292b3e80f96129c9b828d2cdf87d19d1 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 18 May 2021 17:06:42 +0200 Subject: [PATCH 10/34] Adding metric to show current buffer size Signed-off-by: Danny Kopping --- pkg/ruler/metrics.go | 16 +++++++++++----- pkg/ruler/queue.go | 5 ++++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go index c62df286e6e6..d9acd9bc254a 100644 --- a/pkg/ruler/metrics.go +++ b/pkg/ruler/metrics.go @@ -7,12 +7,18 @@ import ( var samplesDiscarded = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rule_queue_samples_discarded_total", - Help: "Number of samples discarded from queue - buffer is full.", + Name: "recording_rules_queue_samples_discarded_total", + Help: "Number of samples discarded from queue; buffer is full!", }, []string{"user_id", "group_key"}) -var samplesBuffered = promauto.NewCounterVec(prometheus.CounterOpts{ +var samplesBufferedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rule_queue_samples_buffered_total", - Help: "Number of samples buffered in queue.", + Name: "recording_rules_queue_samples_buffered_total", + Help: "Number of samples buffered in queue in total.", +}, []string{"user_id", "group_key"}) + +var samplesBufferedCurrent = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "loki", + Name: "recording_rules_queue_samples_buffered_current", + Help: "Number of samples currently buffered in queue.", }, []string{"user_id", "group_key"}) diff --git a/pkg/ruler/queue.go b/pkg/ruler/queue.go index 81cf01c89f9e..7d7e2270a4fa 100644 --- a/pkg/ruler/queue.go +++ b/pkg/ruler/queue.go @@ -42,7 +42,8 @@ func (q *remoteWriteQueue) append(labels labels.Labels, sample cortexpb.Sample) q.labels = append(q.labels, labels) q.samples = append(q.samples, sample) - samplesBuffered.WithLabelValues(q.userID, q.key).Inc() + samplesBufferedCurrent.WithLabelValues(q.userID, q.key).Set(float64(q.length())) + samplesBufferedTotal.WithLabelValues(q.userID, q.key).Inc() } func (q *remoteWriteQueue) length() int { @@ -52,4 +53,6 @@ func (q *remoteWriteQueue) length() int { func (q *remoteWriteQueue) clear() { q.samples = nil q.labels = nil + + samplesBufferedCurrent.WithLabelValues(q.userID, q.key).Set(0) } From df1f8d2e4976597309c535fd11a6c0f9677e4a7a Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 24 May 2021 21:40:14 +0200 Subject: [PATCH 11/34] Refactoring for better responsibility separation & testability Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 83 +++++++++++++++----------------------- pkg/ruler/compat.go | 6 ++- pkg/ruler/compat_test.go | 5 +++ pkg/ruler/config.go | 4 ++ pkg/ruler/metrics.go | 6 +-- pkg/ruler/queue.go | 58 -------------------------- pkg/ruler/remote_write.go | 52 ++++++++++++++++++++++++ pkg/ruler/utils.go | 21 ---------- pkg/util/evicting_queue.go | 55 +++++++++++++++++++++++++ 9 files changed, 155 insertions(+), 135 deletions(-) delete mode 100644 pkg/ruler/queue.go create mode 100644 pkg/ruler/remote_write.go delete mode 100644 pkg/ruler/utils.go create mode 100644 pkg/util/evicting_queue.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 7c4ea98a6a8d..8f9b108c85aa 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -4,7 +4,7 @@ import ( "context" "errors" - "github.com/prometheus/prometheus/storage/remote" + "github.com/grafana/loki/pkg/util" "github.com/prometheus/prometheus/rules" @@ -15,10 +15,7 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - "github.com/gogo/protobuf/proto" - "github.com/golang/snappy" "github.com/prometheus/prometheus/pkg/labels" - "github.com/prometheus/prometheus/prompb" "github.com/prometheus/prometheus/storage" ) @@ -28,36 +25,17 @@ type RemoteWriteAppendable struct { cfg Config logger log.Logger - remoteWriter *remote.WriteClient + remoteWriter remoteWriter } type RemoteWriteAppender struct { logger log.Logger ctx context.Context - remoteWriter *remote.WriteClient + remoteWriter remoteWriter + userID string groupKey string - queue *remoteWriteQueue -} - -func (a *RemoteWriteAppender) encodeRequest(l labels.Labels, s cortexpb.Sample) ([]byte, error) { - ts := prompb.TimeSeries{ - Labels: labelsToLabelsProto(l, nil), - Samples: []prompb.Sample{ - { - Value: s.Value, - Timestamp: s.TimestampMs, - }, - }, - } - - data, err := proto.Marshal(&prompb.WriteRequest{Timeseries: []prompb.TimeSeries{ts}}) - if err != nil { - return nil, err - } - - compressed := snappy.Encode(nil, data) - return compressed, nil + queue *util.EvictingQueue } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { @@ -69,6 +47,7 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey := retrieveGroupKeyFromContext(ctx) + // create or retrieve an appender associated with this groupKey (unique ID for rule group) appender, found := a.groupAppender[groupKey] if !found { appender = &RemoteWriteAppender{ @@ -76,8 +55,9 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { logger: a.logger, remoteWriter: a.remoteWriter, groupKey: groupKey, + userID: a.userID, - queue: newRemoteWriteQueue(a.cfg.RemoteWrite.BufferSize, a.userID, groupKey), + queue: util.NewEvictingQueue(a.cfg.RemoteWrite.BufferSize, onEvict(a.userID, groupKey)), } // only track reference if groupKey was retrieved @@ -92,12 +72,24 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return appender } +func onEvict(userID, groupKey string) func() { + return func() { + samplesEvicted.WithLabelValues(userID, groupKey).Inc() + } +} + func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float64) (uint64, error) { - a.queue.append(l, cortexpb.Sample{ - Value: v, - TimestampMs: t, + a.queue.Append(queueEntry{ + labels: l, + sample: cortexpb.Sample{ + Value: v, + TimestampMs: t, + }, }) + samplesBufferedCurrent.WithLabelValues(a.userID, a.groupKey).Set(float64(a.queue.Length())) + samplesBufferedTotal.WithLabelValues(a.userID, a.groupKey).Inc() + return 0, nil } @@ -106,7 +98,7 @@ func (a *RemoteWriteAppender) AppendExemplar(_ uint64, _ labels.Labels, _ exempl } func (a *RemoteWriteAppender) Commit() error { - if a.queue.length() <= 0 { + if a.queue.Length() <= 0 { return nil } @@ -115,41 +107,30 @@ func (a *RemoteWriteAppender) Commit() error { return nil } - writer := *a.remoteWriter - level.Debug(a.logger).Log("msg", "writing samples to remote_write target", "target", writer.Endpoint(), "count", a.queue.length()) + level.Debug(a.logger).Log("msg", "writing samples to remote_write target", "target", a.remoteWriter.Endpoint(), "count", a.queue.Length()) - req, err := a.prepareRequest() + req, err := a.remoteWriter.PrepareRequest(a.queue) if err != nil { level.Error(a.logger).Log("msg", "could not prepare remote-write request", "err", err) return err } - err = writer.Store(a.ctx, req) + err = a.remoteWriter.Store(a.ctx, req) if err != nil { level.Error(a.logger).Log("msg", "could not store recording rule samples", "err", err) return err } - // clear the queue on a successful response - a.queue.clear() - - return nil -} - -func (a *RemoteWriteAppender) prepareRequest() ([]byte, error) { - req := cortexpb.ToWriteRequest(a.queue.labels, a.queue.samples, nil, cortexpb.RULE) - defer cortexpb.ReuseSlice(req.Timeseries) + // Clear the queue on a successful response + a.queue.Clear() - reqBytes, err := req.Marshal() - if err != nil { - return nil, err - } + samplesBufferedCurrent.WithLabelValues(a.userID, a.groupKey).Set(0) - return snappy.Encode(nil, reqBytes), nil + return nil } func (a *RemoteWriteAppender) Rollback() error { - a.queue.clear() + a.queue.Clear() return nil } diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 76bcf3d407b5..4ed7cc0f7cb5 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -146,7 +146,7 @@ func newAppendable(cfg Config, logger log.Logger, userID string) storage.Appenda } } -func newRemoteWriter(logger log.Logger, cfg Config) *remote.WriteClient { +func newRemoteWriter(logger log.Logger, cfg Config) remoteWriter { if !cfg.RemoteWrite.Enabled() { level.Warn(logger).Log("msg", "remote write client not configured") return nil @@ -163,7 +163,9 @@ func newRemoteWriter(logger log.Logger, cfg Config) *remote.WriteClient { return nil } - return &writeClient + return &remoteWriteClient{ + writeClient, + } } type GroupLoader struct{} diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 69afd2f7ddfa..730a694c8838 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -1,12 +1,17 @@ package ruler import ( + "context" "fmt" "io/ioutil" "os" "strings" "testing" + "github.com/cortexproject/cortex/pkg/ruler" + "github.com/go-kit/kit/log" + "github.com/prometheus/prometheus/config" + "github.com/stretchr/testify/require" ) diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index 7ff6d38eb6cc..e8155be169cf 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -6,6 +6,10 @@ import ( "github.com/prometheus/prometheus/config" ) +// DefaultBufferSize defines the default size of the samples buffer which will hold samples +// while the remote-write endpoint is unavailable +const DefaultBufferSize = 100 + type Config struct { ruler.Config `yaml:",inline"` diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go index d9acd9bc254a..e41a3b57c5f7 100644 --- a/pkg/ruler/metrics.go +++ b/pkg/ruler/metrics.go @@ -5,10 +5,10 @@ import ( "github.com/prometheus/client_golang/prometheus/promauto" ) -var samplesDiscarded = promauto.NewCounterVec(prometheus.CounterOpts{ +var samplesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_discarded_total", - Help: "Number of samples discarded from queue; buffer is full!", + Name: "recording_rules_queue_samples_evicted_total", + Help: "Number of samples evicted from queue; buffer is full!", }, []string{"user_id", "group_key"}) var samplesBufferedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ diff --git a/pkg/ruler/queue.go b/pkg/ruler/queue.go deleted file mode 100644 index 7d7e2270a4fa..000000000000 --- a/pkg/ruler/queue.go +++ /dev/null @@ -1,58 +0,0 @@ -package ruler - -import ( - "github.com/cortexproject/cortex/pkg/cortexpb" - "github.com/prometheus/prometheus/pkg/labels" -) - -// DefaultCapacity defines the default size of the samples buffer which will hold samples -// while the remote-write endpoint is unavailable -const DefaultCapacity = 10000 - -type remoteWriteQueue struct { - labels []labels.Labels - samples []cortexpb.Sample - - capacity int - userID string - key string -} - -func newRemoteWriteQueue(capacity int, userID, key string) *remoteWriteQueue { - if capacity == 0 { - capacity = DefaultCapacity - } - - return &remoteWriteQueue{ - capacity: capacity, - key: key, - userID: userID, - } -} - -func (q *remoteWriteQueue) append(labels labels.Labels, sample cortexpb.Sample) { - if len(q.samples) >= q.capacity { - samplesDiscarded.WithLabelValues(q.userID, q.key).Inc() - - // capacity exceeded, delete oldest sample - q.samples = append(q.samples[:0], q.samples[1:]...) - q.labels = append(q.labels[:0], q.labels[1:]...) - } - - q.labels = append(q.labels, labels) - q.samples = append(q.samples, sample) - - samplesBufferedCurrent.WithLabelValues(q.userID, q.key).Set(float64(q.length())) - samplesBufferedTotal.WithLabelValues(q.userID, q.key).Inc() -} - -func (q *remoteWriteQueue) length() int { - return len(q.samples) -} - -func (q *remoteWriteQueue) clear() { - q.samples = nil - q.labels = nil - - samplesBufferedCurrent.WithLabelValues(q.userID, q.key).Set(0) -} diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go new file mode 100644 index 000000000000..1b6a3a0d9274 --- /dev/null +++ b/pkg/ruler/remote_write.go @@ -0,0 +1,52 @@ +package ruler + +import ( + "fmt" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/golang/snappy" + "github.com/grafana/loki/pkg/util" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/storage/remote" +) + +type queueEntry struct { + labels labels.Labels + sample cortexpb.Sample +} + +type remoteWriter interface { + remote.WriteClient + PrepareRequest(queue *util.EvictingQueue) ([]byte, error) +} + +type remoteWriteClient struct { + remote.WriteClient +} + +// PrepareRequest takes the given queue and serialized it into a compressed +// proto write request that will be sent to Cortex +func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { + var labels []labels.Labels + var samples []cortexpb.Sample + + for _, entry := range queue.Entries() { + entry, ok := entry.(queueEntry) + if !ok { + return nil, fmt.Errorf("queue contains invalid entry of type: %T", entry) + } + + labels = append(labels, entry.labels) + samples = append(samples, entry.sample) + } + + req := cortexpb.ToWriteRequest(labels, samples, nil, cortexpb.RULE) + defer cortexpb.ReuseSlice(req.Timeseries) + + reqBytes, err := req.Marshal() + if err != nil { + return nil, err + } + + return snappy.Encode(nil, reqBytes), nil +} diff --git a/pkg/ruler/utils.go b/pkg/ruler/utils.go deleted file mode 100644 index 908d20bcefcb..000000000000 --- a/pkg/ruler/utils.go +++ /dev/null @@ -1,21 +0,0 @@ -package ruler - -import ( - "github.com/prometheus/prometheus/pkg/labels" - "github.com/prometheus/prometheus/prompb" -) - -// from github.com/prometheus/prometheus/storage/remote/codec.go -func labelsToLabelsProto(labels labels.Labels, buf []prompb.Label) []prompb.Label { - result := buf[:0] - if cap(buf) < len(labels) { - result = make([]prompb.Label, 0, len(labels)) - } - for _, l := range labels { - result = append(result, prompb.Label{ - Name: l.Name, - Value: l.Value, - }) - } - return result -} diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go new file mode 100644 index 000000000000..355e79392698 --- /dev/null +++ b/pkg/util/evicting_queue.go @@ -0,0 +1,55 @@ +package util + +import ( + "sync" +) + +type EvictingQueue struct { + sync.RWMutex + + capacity int + entries []interface{} + onEvict func() +} + +func NewEvictingQueue(capacity int, onEvict func()) *EvictingQueue { + return &EvictingQueue{ + capacity: capacity, + onEvict: onEvict, + } +} + +func (q *EvictingQueue) Append(entry interface{}) { + q.Lock() + defer q.Unlock() + + if len(q.entries) >= q.capacity { + q.onEvict() + + // capacity exceeded, evict oldest entry + q.entries = append(q.entries[:0], q.entries[1:]...) + } + + q.entries = append(q.entries, entry) +} + +func (q *EvictingQueue) Entries() []interface{} { + q.RLock() + defer q.RUnlock() + + return q.entries +} + +func (q *EvictingQueue) Length() int { + q.RLock() + defer q.RUnlock() + + return len(q.entries) +} + +func (q *EvictingQueue) Clear() { + q.Lock() + defer q.Unlock() + + q.entries = nil +} From df72b2fe91715d17db1e0cd918311c934b9102f4 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 24 May 2021 22:14:15 +0200 Subject: [PATCH 12/34] Adding per-tenant overrides of remote-write queue capacity Renaming "buffer size" to "queue capacity" to be more accurate Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 21 +++++++++++++++++---- pkg/ruler/compat.go | 9 ++++++--- pkg/ruler/config.go | 21 ++++++++++++++++++--- pkg/ruler/metrics.go | 20 +++++++++++++------- pkg/ruler/ruler.go | 2 +- pkg/validation/limits.go | 12 +++++++++--- 6 files changed, 64 insertions(+), 21 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 8f9b108c85aa..cafd83782609 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -23,6 +23,7 @@ type RemoteWriteAppendable struct { groupAppender map[string]*RemoteWriteAppender userID string cfg Config + overrides RulesLimits logger log.Logger remoteWriter remoteWriter @@ -50,6 +51,7 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { // create or retrieve an appender associated with this groupKey (unique ID for rule group) appender, found := a.groupAppender[groupKey] if !found { + capacity := a.queueCapacityForTenant() appender = &RemoteWriteAppender{ ctx: ctx, logger: a.logger, @@ -57,9 +59,11 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey: groupKey, userID: a.userID, - queue: util.NewEvictingQueue(a.cfg.RemoteWrite.BufferSize, onEvict(a.userID, groupKey)), + queue: util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)), } + samplesQueueCapacity.WithLabelValues(a.userID, groupKey).Set(float64(capacity)) + // only track reference if groupKey was retrieved if groupKey == "" { level.Warn(a.logger).Log("msg", "blank group key passed via context; creating new appender") @@ -72,6 +76,15 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return appender } +func (a *RemoteWriteAppendable) queueCapacityForTenant() int { + capacity := a.cfg.RemoteWrite.QueueCapacity + if tenantCapacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID); tenantCapacity > 0 { + capacity = tenantCapacity + } + + return capacity +} + func onEvict(userID, groupKey string) func() { return func() { samplesEvicted.WithLabelValues(userID, groupKey).Inc() @@ -87,8 +100,8 @@ func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float }, }) - samplesBufferedCurrent.WithLabelValues(a.userID, a.groupKey).Set(float64(a.queue.Length())) - samplesBufferedTotal.WithLabelValues(a.userID, a.groupKey).Inc() + samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(float64(a.queue.Length())) + samplesQueuedTotal.WithLabelValues(a.userID, a.groupKey).Inc() return 0, nil } @@ -124,7 +137,7 @@ func (a *RemoteWriteAppender) Commit() error { // Clear the queue on a successful response a.queue.Clear() - samplesBufferedCurrent.WithLabelValues(a.userID, a.groupKey).Set(0) + samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(0) return nil } diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 4ed7cc0f7cb5..313826832fa7 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -34,7 +34,9 @@ import ( // RulesLimits is the one function we need from limits.Overrides, and // is here to limit coupling. type RulesLimits interface { - EvaluationDelay(usedID string) time.Duration + ruler.RulesLimits + + RulerRemoteWriteQueueCapacity(userID string) int } // engineQueryFunc returns a new query function using the rules.EngineQueryFunc function @@ -111,7 +113,7 @@ func MemstoreTenantManager( memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: newAppendable(cfg, logger, userID), + Appendable: newAppendable(cfg, overrides, logger, userID), Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), @@ -132,7 +134,7 @@ func MemstoreTenantManager( }) } -func newAppendable(cfg Config, logger log.Logger, userID string) storage.Appendable { +func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { if !cfg.RemoteWrite.Enabled() { level.Warn(logger).Log("msg", "remote write client not configured") return &NoopAppender{} @@ -143,6 +145,7 @@ func newAppendable(cfg Config, logger log.Logger, userID string) storage.Appenda logger: logger, userID: userID, cfg: cfg, + overrides: overrides, } } diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index e8155be169cf..9d20e45e7019 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -6,9 +6,9 @@ import ( "github.com/prometheus/prometheus/config" ) -// DefaultBufferSize defines the default size of the samples buffer which will hold samples +// DefaultQueueCapacity defines the default size of the samples queue which will hold samples // while the remote-write endpoint is unavailable -const DefaultBufferSize = 100 +const DefaultQueueCapacity = 10000 type Config struct { ruler.Config `yaml:",inline"` @@ -19,7 +19,7 @@ type Config struct { type RemoteWriteConfig struct { Client config.RemoteWriteConfig `yaml:"client"` - BufferSize int `yaml:"buffer_size,omitempty"` + QueueCapacity int `yaml:"queue_capacity,omitempty"` } func (c *RemoteWriteConfig) Enabled() bool { @@ -27,6 +27,21 @@ func (c *RemoteWriteConfig) Enabled() bool { return c.Client.URL != nil } +func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { + type raw RemoteWriteConfig + var cfg raw + + // set defaults + cfg.QueueCapacity = DefaultQueueCapacity + + if err := unmarshal(&cfg); err != nil { + return err + } + + *c = RemoteWriteConfig(cfg) + return nil +} + // Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. func (cfg *Config) Validate() error { if err := cfg.StoreConfig.Validate(); err != nil { diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go index e41a3b57c5f7..48aebc3adf80 100644 --- a/pkg/ruler/metrics.go +++ b/pkg/ruler/metrics.go @@ -8,17 +8,23 @@ import ( var samplesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", Name: "recording_rules_queue_samples_evicted_total", - Help: "Number of samples evicted from queue; buffer is full!", + Help: "Number of samples evicted from queue; queue is full!", }, []string{"user_id", "group_key"}) -var samplesBufferedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ +var samplesQueuedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_buffered_total", - Help: "Number of samples buffered in queue in total.", + Name: "recording_rules_queue_samples_queued_total", + Help: "Number of samples queued in total.", }, []string{"user_id", "group_key"}) -var samplesBufferedCurrent = promauto.NewGaugeVec(prometheus.GaugeOpts{ +var samplesQueued = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_buffered_current", - Help: "Number of samples currently buffered in queue.", + Name: "recording_rules_queue_samples_queued_current", + Help: "Number of samples queued to be remote-written.", +}, []string{"user_id", "group_key"}) + +var samplesQueueCapacity = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "loki", + Name: "recording_rules_queue_samples_queue_capacity", + Help: "Number of samples that can be queued before eviction of oldest samples occurs.", }, []string{"user_id", "group_key"}) diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 95edf1ada2b8..3dc9d0530366 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -9,7 +9,7 @@ import ( "github.com/grafana/loki/pkg/logql" ) -func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits ruler.RulesLimits) (*ruler.Ruler, error) { +func NewRuler(cfg Config, engine *logql.Engine, reg prometheus.Registerer, logger log.Logger, ruleStore rulestore.RuleStore, limits RulesLimits) (*ruler.Ruler, error) { mgr, err := ruler.NewDefaultMultiTenantManager( cfg.Config, MemstoreTenantManager( diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index d97cef3dfaa4..b3efac22d12d 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -60,9 +60,10 @@ type Limits struct { QuerySplitDuration model.Duration `yaml:"split_queries_by_interval" json:"split_queries_by_interval"` // Ruler defaults and limits. - RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` - RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` - RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` + RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` + RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` + RulerRemoteWriteQueueCapacity int `yaml:"ruler_remote_write_queue_capacity" json:"ruler_remote_write_queue_capacity"` // Global and per tenant retention RetentionPeriod model.Duration `yaml:"retention_period" json:"retention_period"` @@ -354,6 +355,11 @@ func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int { return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant } +// RulerRemoteWriteQueueCapacity returns the remote-write queue capacity for a given user. +func (o *Overrides) RulerRemoteWriteQueueCapacity(userID string) int { + return o.getOverridesForUser(userID).RulerRemoteWriteQueueCapacity +} + // RetentionPeriod returns the retention period for a given user. func (o *Overrides) RetentionPeriod(userID string) time.Duration { return time.Duration(o.getOverridesForUser(userID).RetentionPeriod) From 2eb904228a113a476a6f7ae007ef9d4237debd7d Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 24 May 2021 22:31:43 +0200 Subject: [PATCH 13/34] Adding tests for evicting queue Minor refactoring Signed-off-by: Danny Kopping --- pkg/util/evicting_queue.go | 11 +++-- pkg/util/evicting_queue_test.go | 80 +++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 pkg/util/evicting_queue_test.go diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index 355e79392698..e5f2b3b13239 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -24,15 +24,18 @@ func (q *EvictingQueue) Append(entry interface{}) { defer q.Unlock() if len(q.entries) >= q.capacity { - q.onEvict() - - // capacity exceeded, evict oldest entry - q.entries = append(q.entries[:0], q.entries[1:]...) + q.evictOldest() } q.entries = append(q.entries, entry) } +func (q *EvictingQueue) evictOldest() { + q.onEvict() + + q.entries = append(q.entries[:0], q.entries[1:]...) +} + func (q *EvictingQueue) Entries() []interface{} { q.RLock() defer q.RUnlock() diff --git a/pkg/util/evicting_queue_test.go b/pkg/util/evicting_queue_test.go new file mode 100644 index 000000000000..185a39de8339 --- /dev/null +++ b/pkg/util/evicting_queue_test.go @@ -0,0 +1,80 @@ +package util + +import ( + "math/rand" + "sync" + "testing" + + "github.com/stretchr/testify/require" +) + +func noopOnEvict() {} + +func TestQueueAppend(t *testing.T) { + q := NewEvictingQueue(10, noopOnEvict) + + q.Append(1) + q.Append(2) + q.Append(3) + q.Append(4) + q.Append(5) + + require.Equal(t, 5, q.Length()) +} + +func TestQueueEvict(t *testing.T) { + q := NewEvictingQueue(3, noopOnEvict) + + // appending 5 entries will cause the first (oldest) 2 entries to be evicted + entries := []interface{}{1, 2, 3, 4, 5} + for _, entry := range entries { + q.Append(entry) + } + + require.Equal(t, 3, q.Length()) + require.Equal(t, entries[2:], q.Entries()) +} + +func TestQueueClear(t *testing.T) { + q := NewEvictingQueue(3, noopOnEvict) + + q.Append(1) + q.Clear() + + require.Equal(t, 0, q.Length()) +} + +func TestQueueEvictionCallback(t *testing.T) { + var evictionCallbackCalls int + + q := NewEvictingQueue(3, func() { + evictionCallbackCalls++ + }) + + for i := 0; i < 5; i++ { + q.Append(i) + } + + require.Equal(t, 2, evictionCallbackCalls) +} + +func TestSafeConcurrentAccess(t *testing.T) { + q := NewEvictingQueue(3, noopOnEvict) + + var wg sync.WaitGroup + + for w := 0; w < 30; w++ { + wg.Add(1) + go func() { + defer wg.Done() + + for i := 0; i < 500; i++ { + q.Append(rand.Int()) + } + }() + } + + wg.Wait() + + require.Equal(t, 3, q.Length()) +} From 19874d9330cddf562524ddf8dad7c796e175c5be Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 25 May 2021 16:42:06 +0200 Subject: [PATCH 14/34] Adding more tests and refactoring Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 4 + pkg/ruler/appender_test.go | 302 ++++++++++++++++++++++++++++++++ pkg/ruler/compat.go | 6 +- pkg/ruler/compat_test.go | 18 +- pkg/ruler/config.go | 8 +- pkg/ruler/remote_write_test.go | 47 +++++ pkg/util/evicting_queue.go | 9 + pkg/util/evicting_queue_test.go | 18 ++ 8 files changed, 401 insertions(+), 11 deletions(-) create mode 100644 pkg/ruler/appender_test.go create mode 100644 pkg/ruler/remote_write_test.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index cafd83782609..ce5596251cab 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -40,6 +40,10 @@ type RemoteWriteAppender struct { } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { + if !a.cfg.RemoteWrite.Enabled { + return &NoopAppender{} + } + var appender *RemoteWriteAppender if a.groupAppender == nil { diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go new file mode 100644 index 000000000000..ce59a5bd175e --- /dev/null +++ b/pkg/ruler/appender_test.go @@ -0,0 +1,302 @@ +package ruler + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/go-kit/kit/log" + "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/validation" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/rules" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +var ( + Logger = log.NewNopLogger() + UserID = "fake" + EmptyWriteRequest = []byte{} +) + +// TODO: +// tests: +// - non-metric query executed (document this) + +func TestGroupKeyRetrieval(t *testing.T) { + ruleFile := "/my/file" + groupName := "my-group" + + ctx := createOriginContext(ruleFile, groupName) + // group key should match value derived from context + require.Equal(t, rules.GroupKey(ruleFile, groupName), retrieveGroupKeyFromContext(ctx)) + + // group key should be blank if context does not contain expected data + require.Equal(t, "", retrieveGroupKeyFromContext(context.TODO())) +} + +// TestMemoizedAppenders tests that appenders are memoized by their associated group key +func TestMemoizedAppenders(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable() + + // context passing a valid group key will allow the appender to be memoized + appender := appendable.Appender(ctx) + require.Same(t, appender, appendable.Appender(ctx)) + + // a missing or invalid group key will force a new appender to be created each time + ctx = promql.NewOriginContext(context.TODO(), nil) + appender = appendable.Appender(ctx) + require.NotSame(t, appender, appendable.Appender(ctx)) +} + +func TestAppenderSeparationByRuleGroup(t *testing.T) { + ctxA := createOriginContext("/rule/fileA", "rule-groupA") + ctxB := createOriginContext("/rule/fileB", "rule-groupB") + appendable := createBasicAppendable() + + // context passing a valid group key will allow the appender to be memoized + appenderA := appendable.Appender(ctxA) + appenderB := appendable.Appender(ctxB) + require.NotSame(t, appenderA, appenderB) +} + +func TestQueueCapacity(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable() + + defaultCapacity := 100 + appendable.cfg.RemoteWrite.QueueCapacity = defaultCapacity + + appender := appendable.Appender(ctx).(*RemoteWriteAppender) + require.Equal(t, appender.queue.Capacity(), defaultCapacity) +} + +func TestQueueCapacityTenantOverride(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable() + + defaultCapacity := 100 + overriddenCapacity := 999 + appendable.cfg.RemoteWrite.QueueCapacity = defaultCapacity + + overrides, err := validation.NewOverrides(validation.Limits{}, func(userID string) *validation.Limits { + return &validation.Limits{ + RulerRemoteWriteQueueCapacity: overriddenCapacity, + } + }) + require.Nil(t, err) + appendable.overrides = overrides + + appender := appendable.Appender(ctx).(*RemoteWriteAppender) + require.Equal(t, appender.queue.Capacity(), overriddenCapacity) +} + +func TestAppendSample(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable() + appender := appendable.Appender(ctx).(*RemoteWriteAppender) + + labels := labels.Labels{ + labels.Label{ + Name: "cluster", + Value: "us-central1", + }, + } + ts := time.Now().Unix() + val := 91.2 + + sample := queueEntry{ + labels: labels, + sample: cortexpb.Sample{ + Value: val, + TimestampMs: ts, + }, + } + + _, err := appender.Append(0, labels, ts, val) + require.Nil(t, err) + + require.Equal(t, appender.queue.Entries()[0], sample) +} + +func TestSuccessfulRemoteWriteSample(t *testing.T) { + client := &MockRemoteWriteClient{} + + appendable := createBasicAppendable() + appendable.remoteWriter = client + + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() + client.On("Store", mock.Anything, mock.Anything).Return(nil).Once() + + _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + require.Nil(t, err) + + // commit didn't return any error, which means a successful write + err = appender.Commit() + require.Nil(t, err) + + // queue should be cleared on successful write + require.Zero(t, appender.queue.Length()) + + client.AssertExpectations(t) +} + +func TestUnsuccessfulRemoteWritePrepare(t *testing.T) { + client := &MockRemoteWriteClient{} + + appendable := createBasicAppendable() + appendable.remoteWriter = client + + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, fmt.Errorf("some error")).Once() + _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + require.Nil(t, err) + + // commit fails if PrepareRequest returns an error + err = appender.Commit() + require.NotNil(t, err) + + // queue should NOT be cleared on unsuccessful write + require.NotZero(t, appender.queue.Length()) + + client.AssertExpectations(t) +} + +func TestUnsuccessfulRemoteWriteStore(t *testing.T) { + client := &MockRemoteWriteClient{} + + appendable := createBasicAppendable() + appendable.remoteWriter = client + + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() + client.On("Store", mock.Anything, mock.Anything).Return(fmt.Errorf("some error")).Once() + _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + require.Nil(t, err) + + // commit fails if Store returns an error + err = appender.Commit() + require.NotNil(t, err) + + // queue should NOT be cleared on unsuccessful write + require.NotZero(t, appender.queue.Length()) + + client.AssertExpectations(t) +} + +func TestEmptyRemoteWrite(t *testing.T) { + client := &MockRemoteWriteClient{} + + appendable := createBasicAppendable() + appendable.remoteWriter = client + + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + // queue should be empty + require.Zero(t, appender.queue.Length()) + + // no error returned + err := appender.Commit() + require.Nil(t, err) + + // PrepareRequest & Store were not called either + client.AssertExpectations(t) +} + +func TestAppenderRollback(t *testing.T) { + appendable := createBasicAppendable() + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + + require.Equal(t, 3, appender.queue.Length()) + + require.Nil(t, appender.Rollback()) + require.Zero(t, appender.queue.Length()) +} + +func TestAppenderEvictOldest(t *testing.T) { + queueCapacity := 2 + + appendable := createBasicAppendable() + appendable.cfg.RemoteWrite.QueueCapacity = queueCapacity + + appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.3) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.4) + + // capacity is enforced + require.Equal(t, queueCapacity, appender.queue.Length()) + + // only two newest samples are kept + require.Equal(t, appender.queue.Entries()[0].(queueEntry).sample.Value, 11.3) + require.Equal(t, appender.queue.Entries()[1].(queueEntry).sample.Value, 11.4) +} + +// context is created by ruler, passing along details of the rule being executed +// see github.com/prometheus/prometheus/rules/manager.go +// -> func (g *Group) run(ctx context.Context) +func createOriginContext(ruleFile, groupName string) context.Context { + return promql.NewOriginContext(context.TODO(), map[string]interface{}{ + "ruleGroup": map[string]string{ + "file": ruleFile, + "name": groupName, + }, + }) +} + +func createBasicAppendable() RemoteWriteAppendable { + return RemoteWriteAppendable{ + userID: "fake", + cfg: Config{ + RemoteWrite: RemoteWriteConfig{ + Enabled: true, + QueueCapacity: 10, + }, + }, + logger: log.NewNopLogger(), + overrides: fakeLimits(), + } +} + +func fakeLimits() RulesLimits { + o, err := validation.NewOverrides(validation.Limits{}, nil) + if err != nil { + panic(err) + } + return o +} + +type MockRemoteWriteClient struct { + mock.Mock +} + +// Store stores the given samples in the remote storage. +func (c *MockRemoteWriteClient) Store(ctx context.Context, data []byte) error { + args := c.Called(ctx, data) + return args.Error(0) +} + +// Name uniquely identifies the remote storage. +func (c *MockRemoteWriteClient) Name() string { return "" } + +// Endpoint is the remote read or write endpoint for the storage client. +func (c *MockRemoteWriteClient) Endpoint() string { return "" } + +func (c *MockRemoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { + args := c.Called(queue) + return args.Get(0).([]byte), args.Error(1) +} diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 313826832fa7..f595083c286d 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -135,7 +135,7 @@ func MemstoreTenantManager( } func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { - if !cfg.RemoteWrite.Enabled() { + if !cfg.RemoteWrite.Enabled { level.Warn(logger).Log("msg", "remote write client not configured") return &NoopAppender{} } @@ -150,8 +150,8 @@ func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID } func newRemoteWriter(logger log.Logger, cfg Config) remoteWriter { - if !cfg.RemoteWrite.Enabled() { - level.Warn(logger).Log("msg", "remote write client not configured") + if !cfg.RemoteWrite.Enabled { + level.Warn(logger).Log("msg", "remote write is not enabled") return nil } diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 730a694c8838..6ff276db4cff 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -10,8 +10,7 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" - "github.com/prometheus/prometheus/config" - + "github.com/grafana/loki/pkg/validation" "github.com/stretchr/testify/require" ) @@ -270,3 +269,18 @@ groups: } } + +// TestNoopAppender tests that a NoopAppender is created when remote-write is disabled +func TestNoopAppender(t *testing.T) { + cfg := Config{ + Config: ruler.Config{}, + RemoteWrite: RemoteWriteConfig{ + Enabled: false, + }, + } + require.False(t, cfg.RemoteWrite.Enabled) + + appendable := newAppendable(cfg, &validation.Overrides{}, log.NewNopLogger(), "fake") + appender := appendable.Appender(context.TODO()) + require.IsType(t, NoopAppender{}, appender) +} diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index 9d20e45e7019..07be27ad180f 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -17,16 +17,12 @@ type Config struct { } type RemoteWriteConfig struct { - Client config.RemoteWriteConfig `yaml:"client"` + Client config.RemoteWriteConfig `yaml:"client"` + Enabled bool `yaml:"enabled"` QueueCapacity int `yaml:"queue_capacity,omitempty"` } -func (c *RemoteWriteConfig) Enabled() bool { - // remote-write is considered disabled if there's no target to write to - return c.Client.URL != nil -} - func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type raw RemoteWriteConfig var cfg raw diff --git a/pkg/ruler/remote_write_test.go b/pkg/ruler/remote_write_test.go new file mode 100644 index 000000000000..8548aede802c --- /dev/null +++ b/pkg/ruler/remote_write_test.go @@ -0,0 +1,47 @@ +package ruler + +import ( + "testing" + "time" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/go-kit/kit/log" + "github.com/golang/snappy" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/require" +) + +func TestPrepareRequest(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable() + appendable.remoteWriter = newRemoteWriter(log.NewNopLogger(), appendable.cfg) + + appender := appendable.Appender(ctx).(*RemoteWriteAppender) + + lbs := labels.Labels{ + labels.Label{ + Name: "cluster", + Value: "us-central1", + }, + } + sample := cortexpb.Sample{ + Value: 70, + TimestampMs: time.Now().Unix(), + } + + appender.Append(0, lbs, sample.TimestampMs, sample.Value) + + bytes, err := appender.remoteWriter.PrepareRequest(appender.queue) + require.Nil(t, err) + + var req cortexpb.WriteRequest + + reqBytes, err := snappy.Decode(nil, bytes) + require.Nil(t, err) + + require.Nil(t, req.Unmarshal(reqBytes)) + + require.Equal(t, req.Timeseries[0].Labels[0].Name, lbs[0].Name) + require.Equal(t, req.Timeseries[0].Labels[0].Value, lbs[0].Value) + require.Equal(t, req.Timeseries[0].Samples[0], sample) +} diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index e5f2b3b13239..8d9909899c80 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -13,6 +13,11 @@ type EvictingQueue struct { } func NewEvictingQueue(capacity int, onEvict func()) *EvictingQueue { + if capacity <= 0 { + // a queue of 0 (or smaller) capacity is invalid + return nil + } + return &EvictingQueue{ capacity: capacity, onEvict: onEvict, @@ -50,6 +55,10 @@ func (q *EvictingQueue) Length() int { return len(q.entries) } +func (q *EvictingQueue) Capacity() int { + return q.capacity +} + func (q *EvictingQueue) Clear() { q.Lock() defer q.Unlock() diff --git a/pkg/util/evicting_queue_test.go b/pkg/util/evicting_queue_test.go index 185a39de8339..aa76ad78cf8c 100644 --- a/pkg/util/evicting_queue_test.go +++ b/pkg/util/evicting_queue_test.go @@ -22,6 +22,24 @@ func TestQueueAppend(t *testing.T) { require.Equal(t, 5, q.Length()) } +func TestQueueCapacity(t *testing.T) { + q := NewEvictingQueue(9, noopOnEvict) + require.Equal(t, 9, q.Capacity()) + + q.capacity = 11 + require.Equal(t, 11, q.Capacity()) +} + +func TestZeroCapacityQueue(t *testing.T) { + q := NewEvictingQueue(0, noopOnEvict) + require.Nil(t, q) +} + +func TestNegativeCapacityQueue(t *testing.T) { + q := NewEvictingQueue(-1, noopOnEvict) + require.Nil(t, q) +} + func TestQueueEvict(t *testing.T) { q := NewEvictingQueue(3, noopOnEvict) From 0200090b829151424394b4a7aedbff590154c01d Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 25 May 2021 22:15:44 +0200 Subject: [PATCH 15/34] Adding queue benchmark Signed-off-by: Danny Kopping --- pkg/util/evicting_queue_test.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pkg/util/evicting_queue_test.go b/pkg/util/evicting_queue_test.go index aa76ad78cf8c..87f6a20f6d53 100644 --- a/pkg/util/evicting_queue_test.go +++ b/pkg/util/evicting_queue_test.go @@ -1,6 +1,7 @@ package util import ( + "math" "math/rand" "sync" "testing" @@ -96,3 +97,17 @@ func TestSafeConcurrentAccess(t *testing.T) { require.Equal(t, 3, q.Length()) } + +func BenchmarkAppendAndEvict(b *testing.B) { + capacity := 5000 + q := NewEvictingQueue(capacity, noopOnEvict) + + b.ResetTimer() + b.ReportAllocs() + + for n := 0; n < b.N; n++ { + q.Append(n) + } + + require.EqualValues(b, math.Min(float64(b.N), float64(capacity)), q.Length()) +} From 58000105aa65419140d753de90e86acdcee1ec3c Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 25 May 2021 22:21:23 +0200 Subject: [PATCH 16/34] Reducing redundancy in metric names Signed-off-by: Danny Kopping --- pkg/ruler/metrics.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go index 48aebc3adf80..b23cfffbd7c7 100644 --- a/pkg/ruler/metrics.go +++ b/pkg/ruler/metrics.go @@ -7,24 +7,24 @@ import ( var samplesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_evicted_total", + Name: "recording_rules_samples_evicted_total", Help: "Number of samples evicted from queue; queue is full!", }, []string{"user_id", "group_key"}) var samplesQueuedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_queued_total", + Name: "recording_rules_samples_queued_total", Help: "Number of samples queued in total.", }, []string{"user_id", "group_key"}) var samplesQueued = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_queued_current", + Name: "recording_rules_samples_queued_current", Help: "Number of samples queued to be remote-written.", }, []string{"user_id", "group_key"}) var samplesQueueCapacity = promauto.NewGaugeVec(prometheus.GaugeOpts{ Namespace: "loki", - Name: "recording_rules_queue_samples_queue_capacity", + Name: "recording_rules_samples_queue_capacity", Help: "Number of samples that can be queued before eviction of oldest samples occurs.", }, []string{"user_id", "group_key"}) From 02a0943ebffffb1d01573a7ea67fd60c7e96aad0 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 25 May 2021 22:55:12 +0200 Subject: [PATCH 17/34] Testing that only metric queries can be run Signed-off-by: Danny Kopping --- pkg/ruler/appender_test.go | 4 ---- pkg/ruler/compat_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index ce59a5bd175e..f4545242daeb 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -23,10 +23,6 @@ var ( EmptyWriteRequest = []byte{} ) -// TODO: -// tests: -// - non-metric query executed (document this) - func TestGroupKeyRetrieval(t *testing.T) { ruleFile := "/my/file" groupName := "my-group" diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 6ff276db4cff..d0f82a9c0b7c 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -7,9 +7,12 @@ import ( "os" "strings" "testing" + "time" "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" + "github.com/grafana/loki/pkg/iter" + "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/validation" "github.com/stretchr/testify/require" ) @@ -284,3 +287,26 @@ func TestNoopAppender(t *testing.T) { appender := appendable.Appender(context.TODO()) require.IsType(t, NoopAppender{}, appender) } + +// TestNonMetricQuery tests that only metric queries can be executed in the query function, +// as both alert and recording rules rely on metric queries being run +func TestNonMetricQuery(t *testing.T) { + overrides, err := validation.NewOverrides(validation.Limits{}, nil) + require.Nil(t, err) + + engine := logql.NewEngine(logql.EngineOpts{}, &FakeQuerier{}, overrides) + queryFunc := engineQueryFunc(engine, overrides, "fake") + + _, err = queryFunc(context.TODO(), `{job="nginx"}`, time.Now()) + require.Error(t, err, "rule result is not a vector or scalar") +} + +type FakeQuerier struct{} + +func (q *FakeQuerier) SelectLogs(context.Context, logql.SelectLogParams) (iter.EntryIterator, error) { + return iter.NoopIterator, nil +} + +func (q *FakeQuerier) SelectSamples(context.Context, logql.SelectSampleParams) (iter.SampleIterator, error) { + return iter.NoopIterator, nil +} From 0d188f7ce8664b525f120570e0ab7e926c78c563 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 25 May 2021 23:01:21 +0200 Subject: [PATCH 18/34] Minor fixes pre-review Signed-off-by: Danny Kopping --- pkg/ruler/appender_test.go | 1 - pkg/ruler/compat.go | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index f4545242daeb..30c4b6923af6 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -55,7 +55,6 @@ func TestAppenderSeparationByRuleGroup(t *testing.T) { ctxB := createOriginContext("/rule/fileB", "rule-groupB") appendable := createBasicAppendable() - // context passing a valid group key will allow the appender to be memoized appenderA := appendable.Appender(ctxA) appenderB := appendable.Appender(ctxB) require.NotSame(t, appenderA, appenderB) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index f595083c286d..ee808c764409 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -136,7 +136,7 @@ func MemstoreTenantManager( func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { if !cfg.RemoteWrite.Enabled { - level.Warn(logger).Log("msg", "remote write client not configured") + level.Warn(logger).Log("msg", "remote write is not enabled") return &NoopAppender{} } From 858934a7cd9d156c373772274c64f917084b4559 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 26 May 2021 00:03:18 +0200 Subject: [PATCH 19/34] Appeasing the linter Signed-off-by: Danny Kopping --- pkg/ruler/appender_test.go | 21 +++++++++++---------- pkg/ruler/compat_test.go | 3 ++- pkg/ruler/remote_write.go | 3 ++- pkg/ruler/remote_write_test.go | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index 30c4b6923af6..5fbba131db8b 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -8,13 +8,14 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" - "github.com/grafana/loki/pkg/util" - "github.com/grafana/loki/pkg/validation" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/validation" ) var ( @@ -211,9 +212,9 @@ func TestAppenderRollback(t *testing.T) { appendable := createBasicAppendable() appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) //nolint:errcheck + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) //nolint:errcheck + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) //nolint:errcheck require.Equal(t, 3, appender.queue.Length()) @@ -229,9 +230,9 @@ func TestAppenderEvictOldest(t *testing.T) { appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.3) - appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.4) + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) //nolint:errcheck + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.3) //nolint:errcheck + appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.4) //nolint:errcheck // capacity is enforced require.Equal(t, queueCapacity, appender.queue.Length()) @@ -255,14 +256,14 @@ func createOriginContext(ruleFile, groupName string) context.Context { func createBasicAppendable() RemoteWriteAppendable { return RemoteWriteAppendable{ - userID: "fake", + userID: UserID, cfg: Config{ RemoteWrite: RemoteWriteConfig{ Enabled: true, QueueCapacity: 10, }, }, - logger: log.NewNopLogger(), + logger: Logger, overrides: fakeLimits(), } } diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index d0f82a9c0b7c..4894bc0d8306 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -11,10 +11,11 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" + "github.com/stretchr/testify/require" + "github.com/grafana/loki/pkg/iter" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/validation" - "github.com/stretchr/testify/require" ) func Test_Load(t *testing.T) { diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index 1b6a3a0d9274..1e9bf627ee21 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -5,9 +5,10 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/golang/snappy" - "github.com/grafana/loki/pkg/util" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage/remote" + + "github.com/grafana/loki/pkg/util" ) type queueEntry struct { diff --git a/pkg/ruler/remote_write_test.go b/pkg/ruler/remote_write_test.go index 8548aede802c..bacf2a9f329a 100644 --- a/pkg/ruler/remote_write_test.go +++ b/pkg/ruler/remote_write_test.go @@ -29,7 +29,7 @@ func TestPrepareRequest(t *testing.T) { TimestampMs: time.Now().Unix(), } - appender.Append(0, lbs, sample.TimestampMs, sample.Value) + appender.Append(0, lbs, sample.TimestampMs, sample.Value) //nolint:errcheck bytes, err := appender.remoteWriter.PrepareRequest(appender.queue) require.Nil(t, err) From 67c78faf14e290be6dda32c73bf8720eb3b1a160 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 27 May 2021 15:43:58 +0200 Subject: [PATCH 20/34] Guarding against unprotected nil pointer dereference in Prometheus remote.Client Adding remote-write client validation Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 3 ++- pkg/ruler/appender_test.go | 11 +++++++++++ pkg/ruler/compat.go | 8 ++++---- pkg/ruler/compat_test.go | 25 +++++++++++++++++++++++++ pkg/ruler/config.go | 12 ++++++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index ce5596251cab..fad9220f300c 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -40,7 +40,8 @@ type RemoteWriteAppender struct { } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { - if !a.cfg.RemoteWrite.Enabled { + if err := a.cfg.RemoteWrite.Validate(); err != nil { + level.Warn(a.logger).Log("msg", err) return &NoopAppender{} } diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index 5fbba131db8b..d51701dc231f 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -3,11 +3,14 @@ package ruler import ( "context" "fmt" + "net/url" "testing" "time" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" + promConfig "github.com/prometheus/common/config" + "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/rules" @@ -255,12 +258,20 @@ func createOriginContext(ruleFile, groupName string) context.Context { } func createBasicAppendable() RemoteWriteAppendable { + target, err := url.Parse("http://some/target") + if err != nil { + panic(err) + } + return RemoteWriteAppendable{ userID: UserID, cfg: Config{ RemoteWrite: RemoteWriteConfig{ Enabled: true, QueueCapacity: 10, + Client: config.RemoteWriteConfig{ + URL: &promConfig.URL{target}, + }, }, }, logger: Logger, diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index ee808c764409..3b5732000cef 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -135,8 +135,8 @@ func MemstoreTenantManager( } func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { - if !cfg.RemoteWrite.Enabled { - level.Warn(logger).Log("msg", "remote write is not enabled") + if err := cfg.RemoteWrite.Validate(); err != nil { + level.Warn(logger).Log("msg", err) return &NoopAppender{} } @@ -150,8 +150,8 @@ func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID } func newRemoteWriter(logger log.Logger, cfg Config) remoteWriter { - if !cfg.RemoteWrite.Enabled { - level.Warn(logger).Log("msg", "remote write is not enabled") + if err := cfg.RemoteWrite.Validate(); err != nil { + level.Warn(logger).Log("msg", err) return nil } diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 4894bc0d8306..a4544b3d3d31 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -11,6 +11,7 @@ import ( "github.com/cortexproject/cortex/pkg/ruler" "github.com/go-kit/kit/log" + "github.com/prometheus/prometheus/config" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/iter" @@ -274,6 +275,30 @@ groups: } } +// TestNoopAppender tests that a NoopAppender is created when remote-write is disabled +func TestInvalidRemoteWriteConfig(t *testing.T) { + // if remote-write is not enabled, validation fails + cfg := Config{ + Config: ruler.Config{}, + RemoteWrite: RemoteWriteConfig{ + Enabled: false, + }, + } + require.Error(t, cfg.RemoteWrite.Validate()) + + // if no remote-write URL is configured, validation fails + cfg = Config{ + Config: ruler.Config{}, + RemoteWrite: RemoteWriteConfig{ + Enabled: true, + Client: config.RemoteWriteConfig{ + URL: nil, + }, + }, + } + require.Error(t, cfg.RemoteWrite.Validate()) +} + // TestNoopAppender tests that a NoopAppender is created when remote-write is disabled func TestNoopAppender(t *testing.T) { cfg := Config{ diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index 07be27ad180f..efd177aa3f8e 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -23,6 +23,18 @@ type RemoteWriteConfig struct { QueueCapacity int `yaml:"queue_capacity,omitempty"` } +func (c *RemoteWriteConfig) Validate() error { + if !c.Enabled { + return errors.New("remote-write is not enabled") + } + + if c.Client.URL == nil { + return errors.New("remote-write URL is not configured") + } + + return nil +} + func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { type raw RemoteWriteConfig var cfg raw From 469ce9b1e4269dfb21071311c4380b3b833a473d Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 27 May 2021 16:05:22 +0200 Subject: [PATCH 21/34] Appeasing the linter Signed-off-by: Danny Kopping --- pkg/ruler/appender_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index d51701dc231f..d2d771d6ef51 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -270,7 +270,7 @@ func createBasicAppendable() RemoteWriteAppendable { Enabled: true, QueueCapacity: 10, Client: config.RemoteWriteConfig{ - URL: &promConfig.URL{target}, + URL: &promConfig.URL{URL: target}, }, }, }, From 52489cd3aa109bd3d64e0a8c9d4c8c06a6a0eb6f Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Fri, 28 May 2021 16:23:49 +0200 Subject: [PATCH 22/34] Setting tenant ID header on remote-write client Adding User-Agent string Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 17 +++--- pkg/ruler/appender_test.go | 13 +++-- pkg/ruler/compat.go | 32 ++---------- pkg/ruler/remote_write.go | 28 ++++++++++ pkg/ruler/remote_write_test.go | 6 ++- pkg/util/mapmerge.go | 35 +++++++++++++ pkg/util/mapmerge_test.go | 96 ++++++++++++++++++++++++++++++++++ 7 files changed, 184 insertions(+), 43 deletions(-) create mode 100644 pkg/util/mapmerge.go create mode 100644 pkg/util/mapmerge_test.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index fad9220f300c..702990a140fe 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -21,12 +21,11 @@ import ( type RemoteWriteAppendable struct { groupAppender map[string]*RemoteWriteAppender - userID string - cfg Config - overrides RulesLimits - logger log.Logger - remoteWriter remoteWriter + userID string + cfg Config + overrides RulesLimits + logger log.Logger } type RemoteWriteAppender struct { @@ -56,11 +55,17 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { // create or retrieve an appender associated with this groupKey (unique ID for rule group) appender, found := a.groupAppender[groupKey] if !found { + client, err := newRemoteWriter(a.cfg, a.userID) + if err != nil { + level.Error(a.logger).Log("msg", "error creating remote-write client; setting appender as noop", "err", err, "tenant", a.userID) + return &NoopAppender{} + } + capacity := a.queueCapacityForTenant() appender = &RemoteWriteAppender{ ctx: ctx, logger: a.logger, - remoteWriter: a.remoteWriter, + remoteWriter: client, groupKey: groupKey, userID: a.userID, diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index d2d771d6ef51..8c7701d35621 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -23,7 +23,7 @@ import ( var ( Logger = log.NewNopLogger() - UserID = "fake" + FakeUserID = "fake" EmptyWriteRequest = []byte{} ) @@ -127,9 +127,9 @@ func TestSuccessfulRemoteWriteSample(t *testing.T) { client := &MockRemoteWriteClient{} appendable := createBasicAppendable() - appendable.remoteWriter = client appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + appender.remoteWriter = client client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() client.On("Store", mock.Anything, mock.Anything).Return(nil).Once() @@ -151,9 +151,9 @@ func TestUnsuccessfulRemoteWritePrepare(t *testing.T) { client := &MockRemoteWriteClient{} appendable := createBasicAppendable() - appendable.remoteWriter = client appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + appender.remoteWriter = client client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, fmt.Errorf("some error")).Once() _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) @@ -173,9 +173,9 @@ func TestUnsuccessfulRemoteWriteStore(t *testing.T) { client := &MockRemoteWriteClient{} appendable := createBasicAppendable() - appendable.remoteWriter = client appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + appender.remoteWriter = client client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() client.On("Store", mock.Anything, mock.Anything).Return(fmt.Errorf("some error")).Once() @@ -196,9 +196,8 @@ func TestEmptyRemoteWrite(t *testing.T) { client := &MockRemoteWriteClient{} appendable := createBasicAppendable() - appendable.remoteWriter = client - appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) + appender.remoteWriter = client // queue should be empty require.Zero(t, appender.queue.Length()) @@ -264,7 +263,7 @@ func createBasicAppendable() RemoteWriteAppendable { } return RemoteWriteAppendable{ - userID: UserID, + userID: FakeUserID, cfg: Config{ RemoteWrite: RemoteWriteConfig{ Enabled: true, diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 3b5732000cef..ca103bc86a35 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -22,7 +22,6 @@ import ( "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/promql/parser" "github.com/prometheus/prometheus/rules" - "github.com/prometheus/prometheus/storage/remote" "github.com/prometheus/prometheus/template" "github.com/weaveworks/common/user" yaml "gopkg.in/yaml.v3" @@ -141,33 +140,10 @@ func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID } return &RemoteWriteAppendable{ - remoteWriter: newRemoteWriter(logger, cfg), - logger: logger, - userID: userID, - cfg: cfg, - overrides: overrides, - } -} - -func newRemoteWriter(logger log.Logger, cfg Config) remoteWriter { - if err := cfg.RemoteWrite.Validate(); err != nil { - level.Warn(logger).Log("msg", err) - return nil - } - - writeClient, err := remote.NewWriteClient("recording_rules", &remote.ClientConfig{ - URL: cfg.RemoteWrite.Client.URL, - Timeout: cfg.RemoteWrite.Client.RemoteTimeout, - HTTPClientConfig: cfg.RemoteWrite.Client.HTTPClientConfig, - Headers: cfg.RemoteWrite.Client.Headers, - }) - if err != nil { - level.Warn(logger).Log("remote_write_url", cfg.RemoteWrite.Client.URL.String(), "msg", "cannot create remote_write client", "err", err) - return nil - } - - return &remoteWriteClient{ - writeClient, + logger: logger, + userID: userID, + cfg: cfg, + overrides: overrides, } } diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index 1e9bf627ee21..a2bd5f93107b 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -5,12 +5,16 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/golang/snappy" + "github.com/pkg/errors" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage/remote" "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/pkg/util/build" ) +var UserAgent = fmt.Sprintf("loki-remote-write/%s", build.Version) + type queueEntry struct { labels labels.Labels sample cortexpb.Sample @@ -18,6 +22,7 @@ type queueEntry struct { type remoteWriter interface { remote.WriteClient + PrepareRequest(queue *util.EvictingQueue) ([]byte, error) } @@ -25,6 +30,29 @@ type remoteWriteClient struct { remote.WriteClient } +func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { + if err := cfg.RemoteWrite.Validate(); err != nil { + return nil, errors.Wrap(err, "validation error") + } + + writeClient, err := remote.NewWriteClient("recording_rules", &remote.ClientConfig{ + URL: cfg.RemoteWrite.Client.URL, + Timeout: cfg.RemoteWrite.Client.RemoteTimeout, + HTTPClientConfig: cfg.RemoteWrite.Client.HTTPClientConfig, + Headers: util.MergeMaps(cfg.RemoteWrite.Client.Headers, map[string]string{ + "X-Scope-OrgID": userID, + "User-Agent": UserAgent, + }), + }) + if err != nil { + return nil, errors.Wrapf(err, "could not create remote-write client for tenant: %v", userID) + } + + return &remoteWriteClient{ + writeClient, + }, nil +} + // PrepareRequest takes the given queue and serialized it into a compressed // proto write request that will be sent to Cortex func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { diff --git a/pkg/ruler/remote_write_test.go b/pkg/ruler/remote_write_test.go index bacf2a9f329a..70f4cd720015 100644 --- a/pkg/ruler/remote_write_test.go +++ b/pkg/ruler/remote_write_test.go @@ -5,7 +5,6 @@ import ( "time" "github.com/cortexproject/cortex/pkg/cortexpb" - "github.com/go-kit/kit/log" "github.com/golang/snappy" "github.com/prometheus/prometheus/pkg/labels" "github.com/stretchr/testify/require" @@ -14,9 +13,12 @@ import ( func TestPrepareRequest(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") appendable := createBasicAppendable() - appendable.remoteWriter = newRemoteWriter(log.NewNopLogger(), appendable.cfg) appender := appendable.Appender(ctx).(*RemoteWriteAppender) + client, err := newRemoteWriter(appendable.cfg, FakeUserID) + require.Nil(t, err) + + appender.remoteWriter = client lbs := labels.Labels{ labels.Label{ diff --git a/pkg/util/mapmerge.go b/pkg/util/mapmerge.go new file mode 100644 index 000000000000..6dbfa3e1f50e --- /dev/null +++ b/pkg/util/mapmerge.go @@ -0,0 +1,35 @@ +package util + +// CopyMap makes a copy of the given map +func CopyMap(m map[string]string) map[string]string { + var newMap = make(map[string]string) + + if m == nil { + return nil + } + + for k, v := range m { + newMap[k] = v + } + + return newMap +} + +// MergeMaps merges the overlay map onto the base map, with overlay taking precedence +// NOTE: this treats the given base and overlay maps as immutable, and returns a copy +func MergeMaps(base map[string]string, overlay map[string]string) map[string]string { + if base == nil { + return CopyMap(overlay) + } + + newMap := CopyMap(base) + if overlay == nil { + return newMap + } + + for k, v := range overlay { + newMap[k] = v + } + + return newMap +} diff --git a/pkg/util/mapmerge_test.go b/pkg/util/mapmerge_test.go new file mode 100644 index 000000000000..b8ba2c88f889 --- /dev/null +++ b/pkg/util/mapmerge_test.go @@ -0,0 +1,96 @@ +package util + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestMerge(t *testing.T) { + var base = map[string]string{ + "a": "b", + "c": "10", + "y": "z", + } + + var overlay = map[string]string{ + "a": "z", + "c": "10", + "d": "e", + } + + merged := MergeMaps(base, overlay) + require.Equal(t, merged, map[string]string{ + "a": "z", + "c": "10", + "d": "e", + "y": "z", + }) +} + +func TestCopy(t *testing.T) { + var base = map[string]string{ + "a": "b", + "c": "10", + "y": "z", + } + + copy := CopyMap(base) + require.EqualValues(t, base, copy) + require.NotSame(t, base, copy) +} + +func TestNilCopy(t *testing.T) { + var base map[string]string + + copy := CopyMap(base) + require.EqualValues(t, base, copy) + require.NotSame(t, base, copy) +} + +func TestNilBase(t *testing.T) { + var overlay = map[string]string{ + "a": "z", + "c": "10", + "d": "e", + } + + merged := MergeMaps(nil, overlay) + require.Equal(t, merged, overlay) +} + +func TestNilOverlay(t *testing.T) { + var base = map[string]string{ + "a": "b", + "c": "10", + "y": "z", + } + + merged := MergeMaps(base, nil) + require.Equal(t, merged, base) +} + +// TestImmutability tests that both given maps are unaltered +func TestImmutability(t *testing.T) { + var base = map[string]string{ + "a": "b", + "c": "10", + "y": "z", + } + + var overlay = map[string]string{ + "a": "z", + "c": "10", + "d": "e", + } + + beforeBase := CopyMap(base) + beforeOverlay := CopyMap(overlay) + require.EqualValues(t, base, beforeBase) + require.EqualValues(t, overlay, beforeOverlay) + + MergeMaps(base, overlay) + + require.EqualValues(t, base, beforeBase) + require.EqualValues(t, overlay, beforeOverlay) +} From 4f218cf12537a7ff20d75c180dbe8986b38169e5 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 17:38:53 +0200 Subject: [PATCH 23/34] Updating benchmark to use complex struct rather than int to be more reflective of usage Signed-off-by: Danny Kopping --- pkg/util/evicting_queue_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pkg/util/evicting_queue_test.go b/pkg/util/evicting_queue_test.go index 87f6a20f6d53..46a3f33d52a6 100644 --- a/pkg/util/evicting_queue_test.go +++ b/pkg/util/evicting_queue_test.go @@ -98,6 +98,11 @@ func TestSafeConcurrentAccess(t *testing.T) { require.Equal(t, 3, q.Length()) } +type queueEntry struct { + key string + value interface{} +} + func BenchmarkAppendAndEvict(b *testing.B) { capacity := 5000 q := NewEvictingQueue(capacity, noopOnEvict) @@ -106,7 +111,10 @@ func BenchmarkAppendAndEvict(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { - q.Append(n) + q.Append(&queueEntry{ + key: "hello", + value: "world", + }) } require.EqualValues(b, math.Min(float64(b.N), float64(capacity)), q.Length()) From 4d3bebd2e84e3e1a0782f2d70b2c736d63e3d6f2 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 18:05:12 +0200 Subject: [PATCH 24/34] Registering flags Removing extraneous checks Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 18 ++++----------- pkg/ruler/compat.go | 4 ++-- pkg/ruler/compat_test.go | 2 +- pkg/ruler/config.go | 48 +++++++++++++++++++-------------------- pkg/ruler/remote_write.go | 4 ---- 5 files changed, 32 insertions(+), 44 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 702990a140fe..798bad47623f 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -4,19 +4,16 @@ import ( "context" "errors" - "github.com/grafana/loki/pkg/util" - - "github.com/prometheus/prometheus/rules" - - "github.com/prometheus/prometheus/promql" - - "github.com/prometheus/prometheus/pkg/exemplar" - "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/prometheus/prometheus/pkg/exemplar" "github.com/prometheus/prometheus/pkg/labels" + "github.com/prometheus/prometheus/promql" + "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" + + "github.com/grafana/loki/pkg/util" ) type RemoteWriteAppendable struct { @@ -39,11 +36,6 @@ type RemoteWriteAppender struct { } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { - if err := a.cfg.RemoteWrite.Validate(); err != nil { - level.Warn(a.logger).Log("msg", err) - return &NoopAppender{} - } - var appender *RemoteWriteAppender if a.groupAppender == nil { diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index ca103bc86a35..02b2854a7bd6 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -134,8 +134,8 @@ func MemstoreTenantManager( } func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { - if err := cfg.RemoteWrite.Validate(); err != nil { - level.Warn(logger).Log("msg", err) + if !cfg.RemoteWrite.Enabled { + level.Info(logger).Log("msg", "remote-write is disabled") return &NoopAppender{} } diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index a4544b3d3d31..4f3a7335927f 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -284,7 +284,7 @@ func TestInvalidRemoteWriteConfig(t *testing.T) { Enabled: false, }, } - require.Error(t, cfg.RemoteWrite.Validate()) + require.Nil(t, cfg.RemoteWrite.Validate()) // if no remote-write URL is configured, validation fails cfg = Config{ diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index efd177aa3f8e..a6365137f464 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -1,6 +1,9 @@ package ruler import ( + "flag" + "fmt" + "github.com/cortexproject/cortex/pkg/ruler" "github.com/pkg/errors" "github.com/prometheus/prometheus/config" @@ -16,44 +19,41 @@ type Config struct { RemoteWrite RemoteWriteConfig `yaml:"remote_write,omitempty"` } -type RemoteWriteConfig struct { - Client config.RemoteWriteConfig `yaml:"client"` - Enabled bool `yaml:"enabled"` - - QueueCapacity int `yaml:"queue_capacity,omitempty"` +func (c *Config) RegisterFlags(f *flag.FlagSet) { + c.Config.RegisterFlags(f) + c.RemoteWrite.RegisterFlags(f) } -func (c *RemoteWriteConfig) Validate() error { - if !c.Enabled { - return errors.New("remote-write is not enabled") +// Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. +func (c *Config) Validate() error { + if err := c.StoreConfig.Validate(); err != nil { + return fmt.Errorf("invalid ruler store config: %w", err) } - if c.Client.URL == nil { - return errors.New("remote-write URL is not configured") + if err := c.RemoteWrite.Validate(); err != nil { + return fmt.Errorf("invalid ruler remote-write config: %w", err) } return nil } -func (c *RemoteWriteConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { - type raw RemoteWriteConfig - var cfg raw +type RemoteWriteConfig struct { + Client config.RemoteWriteConfig `yaml:"client"` + Enabled bool `yaml:"enabled"` - // set defaults - cfg.QueueCapacity = DefaultQueueCapacity + QueueCapacity int `yaml:"queue_capacity,omitempty"` +} - if err := unmarshal(&cfg); err != nil { - return err +func (c *RemoteWriteConfig) Validate() error { + if c.Enabled && c.Client.URL == nil { + return errors.New("remote-write enabled but client URL is not configured") } - *c = RemoteWriteConfig(cfg) return nil } -// Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. -func (cfg *Config) Validate() error { - if err := cfg.StoreConfig.Validate(); err != nil { - return errors.Wrap(err, "invalid ruler config") - } - return nil +// RegisterFlags adds the flags required to config this to the given FlagSet. +func (c *RemoteWriteConfig) RegisterFlags(f *flag.FlagSet) { + f.BoolVar(&c.Enabled, "ruler.remote-write.enabled", false, "Remote-write recording rule samples to Prometheus-compatible remote-write receiver.") + f.IntVar(&c.QueueCapacity, "ruler.remote-write.queue-capacity", DefaultQueueCapacity, "Capacity of remote-write queues; if a queue exceeds its capacity it will evict oldest samples.") } diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index a2bd5f93107b..c830b5bbef73 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -31,10 +31,6 @@ type remoteWriteClient struct { } func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { - if err := cfg.RemoteWrite.Validate(); err != nil { - return nil, errors.Wrap(err, "validation error") - } - writeClient, err := remote.NewWriteClient("recording_rules", &remote.ClientConfig{ URL: cfg.RemoteWrite.Client.URL, Timeout: cfg.RemoteWrite.Client.RemoteTimeout, From cc736d75ee72eb9702ddb3c57fd7c109f0a119aa Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 21:34:33 +0200 Subject: [PATCH 25/34] Adding metric to track remote-write commit errors Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 2 ++ pkg/ruler/metrics.go | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 798bad47623f..49d8b981abd3 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -127,12 +127,14 @@ func (a *RemoteWriteAppender) Commit() error { req, err := a.remoteWriter.PrepareRequest(a.queue) if err != nil { level.Error(a.logger).Log("msg", "could not prepare remote-write request", "err", err) + remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() return err } err = a.remoteWriter.Store(a.ctx, req) if err != nil { level.Error(a.logger).Log("msg", "could not store recording rule samples", "err", err) + remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() return err } diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go index b23cfffbd7c7..47c6d3084427 100644 --- a/pkg/ruler/metrics.go +++ b/pkg/ruler/metrics.go @@ -28,3 +28,9 @@ var samplesQueueCapacity = promauto.NewGaugeVec(prometheus.GaugeOpts{ Name: "recording_rules_samples_queue_capacity", Help: "Number of samples that can be queued before eviction of oldest samples occurs.", }, []string{"user_id", "group_key"}) + +var remoteWriteErrors = promauto.NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rules_remote_write_errors", + Help: "Number of samples that failed to be remote-written due to error.", +}, []string{"user_id", "group_key"}) From 7e95a8cd6dc0125cb34c5e18f3178e4a777e9935 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 23:10:31 +0200 Subject: [PATCH 26/34] Refactoring based on review Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 71 ++++++++++++++++++++------------------ pkg/ruler/appender_test.go | 14 ++++---- pkg/ruler/compat.go | 7 +--- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 49d8b981abd3..556dc74bab3f 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -25,6 +25,16 @@ type RemoteWriteAppendable struct { logger log.Logger } +func newRemoteWriteAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) *RemoteWriteAppendable { + return &RemoteWriteAppendable{ + logger: logger, + userID: userID, + cfg: cfg, + overrides: overrides, + groupAppender: make(map[string]*RemoteWriteAppender), + } +} + type RemoteWriteAppender struct { logger log.Logger ctx context.Context @@ -36,45 +46,40 @@ type RemoteWriteAppender struct { } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { - var appender *RemoteWriteAppender - - if a.groupAppender == nil { - a.groupAppender = make(map[string]*RemoteWriteAppender) - } - groupKey := retrieveGroupKeyFromContext(ctx) // create or retrieve an appender associated with this groupKey (unique ID for rule group) appender, found := a.groupAppender[groupKey] - if !found { - client, err := newRemoteWriter(a.cfg, a.userID) - if err != nil { - level.Error(a.logger).Log("msg", "error creating remote-write client; setting appender as noop", "err", err, "tenant", a.userID) - return &NoopAppender{} - } - - capacity := a.queueCapacityForTenant() - appender = &RemoteWriteAppender{ - ctx: ctx, - logger: a.logger, - remoteWriter: client, - groupKey: groupKey, - userID: a.userID, - - queue: util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)), - } - - samplesQueueCapacity.WithLabelValues(a.userID, groupKey).Set(float64(capacity)) - - // only track reference if groupKey was retrieved - if groupKey == "" { - level.Warn(a.logger).Log("msg", "blank group key passed via context; creating new appender") - return appender - } - - a.groupAppender[groupKey] = appender + if found { + return appender + } + + client, err := newRemoteWriter(a.cfg, a.userID) + if err != nil { + level.Error(a.logger).Log("msg", "error creating remote-write client; setting appender as noop", "err", err, "tenant", a.userID) + return &NoopAppender{} + } + + capacity := a.queueCapacityForTenant() + appender = &RemoteWriteAppender{ + ctx: ctx, + logger: a.logger, + remoteWriter: client, + groupKey: groupKey, + userID: a.userID, + + queue: util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)), + } + + samplesQueueCapacity.WithLabelValues(a.userID, groupKey).Set(float64(capacity)) + + // only track reference if groupKey was retrieved + if groupKey == "" { + level.Warn(a.logger).Log("msg", "blank group key passed via context; creating new appender") + return appender } + a.groupAppender[groupKey] = appender return appender } diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index 8c7701d35621..56758e44e845 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -256,15 +256,14 @@ func createOriginContext(ruleFile, groupName string) context.Context { }) } -func createBasicAppendable() RemoteWriteAppendable { +func createBasicAppendable() *RemoteWriteAppendable { target, err := url.Parse("http://some/target") if err != nil { panic(err) } - return RemoteWriteAppendable{ - userID: FakeUserID, - cfg: Config{ + return newRemoteWriteAppendable( + Config{ RemoteWrite: RemoteWriteConfig{ Enabled: true, QueueCapacity: 10, @@ -273,9 +272,10 @@ func createBasicAppendable() RemoteWriteAppendable { }, }, }, - logger: Logger, - overrides: fakeLimits(), - } + fakeLimits(), + Logger, + FakeUserID, + ) } func fakeLimits() RulesLimits { diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 02b2854a7bd6..3c8886925fa5 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -139,12 +139,7 @@ func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID return &NoopAppender{} } - return &RemoteWriteAppendable{ - logger: logger, - userID: userID, - cfg: cfg, - overrides: overrides, - } + return newRemoteWriteAppendable(cfg, overrides, logger, userID) } type GroupLoader struct{} From a7a418630ce42accc64c91dc40d819ee1d4665f5 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 23:10:53 +0200 Subject: [PATCH 27/34] Performance improvements based on review Signed-off-by: Danny Kopping --- pkg/ruler/remote_write.go | 15 +++++++++------ pkg/util/evicting_queue.go | 3 ++- pkg/util/mapmerge.go | 2 +- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index c830b5bbef73..68a1b4a79273 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -28,6 +28,9 @@ type remoteWriter interface { type remoteWriteClient struct { remote.WriteClient + + labels []labels.Labels + samples []cortexpb.Sample } func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { @@ -45,15 +48,15 @@ func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { } return &remoteWriteClient{ - writeClient, + WriteClient: writeClient, }, nil } // PrepareRequest takes the given queue and serialized it into a compressed // proto write request that will be sent to Cortex func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { - var labels []labels.Labels - var samples []cortexpb.Sample + r.labels = make([]labels.Labels, 0, queue.Length()) + r.samples = make([]cortexpb.Sample, 0, queue.Length()) for _, entry := range queue.Entries() { entry, ok := entry.(queueEntry) @@ -61,11 +64,11 @@ func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, e return nil, fmt.Errorf("queue contains invalid entry of type: %T", entry) } - labels = append(labels, entry.labels) - samples = append(samples, entry.sample) + r.labels = append(r.labels, entry.labels) + r.samples = append(r.samples, entry.sample) } - req := cortexpb.ToWriteRequest(labels, samples, nil, cortexpb.RULE) + req := cortexpb.ToWriteRequest(r.labels, r.samples, nil, cortexpb.RULE) defer cortexpb.ReuseSlice(req.Timeseries) reqBytes, err := req.Marshal() diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index 8d9909899c80..33d5dc4d83d3 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -21,6 +21,7 @@ func NewEvictingQueue(capacity int, onEvict func()) *EvictingQueue { return &EvictingQueue{ capacity: capacity, onEvict: onEvict, + entries: make([]interface{}, 0, capacity), } } @@ -63,5 +64,5 @@ func (q *EvictingQueue) Clear() { q.Lock() defer q.Unlock() - q.entries = nil + q.entries = q.entries[:0] } diff --git a/pkg/util/mapmerge.go b/pkg/util/mapmerge.go index 6dbfa3e1f50e..f218644003dc 100644 --- a/pkg/util/mapmerge.go +++ b/pkg/util/mapmerge.go @@ -2,7 +2,7 @@ package util // CopyMap makes a copy of the given map func CopyMap(m map[string]string) map[string]string { - var newMap = make(map[string]string) + var newMap = make(map[string]string, len(m)) if m == nil { return nil From 1e8b8af280a13ec0dcbc005062cda4d6a9b0eb5e Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Mon, 31 May 2021 23:18:35 +0200 Subject: [PATCH 28/34] Return error on invalid queue capacity Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 8 +++++++- pkg/util/evicting_queue.go | 7 ++++--- pkg/util/evicting_queue_test.go | 27 ++++++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 556dc74bab3f..601f40bce500 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -61,6 +61,12 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { } capacity := a.queueCapacityForTenant() + queue, err := util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)) + if err != nil { + level.Error(a.logger).Log("msg", "queue creation error; setting appender as noop", "err", err, "tenant", a.userID) + return &NoopAppender{} + } + appender = &RemoteWriteAppender{ ctx: ctx, logger: a.logger, @@ -68,7 +74,7 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey: groupKey, userID: a.userID, - queue: util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)), + queue: queue, } samplesQueueCapacity.WithLabelValues(a.userID, groupKey).Set(float64(capacity)) diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index 33d5dc4d83d3..754b854d6442 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -1,6 +1,7 @@ package util import ( + "errors" "sync" ) @@ -12,17 +13,17 @@ type EvictingQueue struct { onEvict func() } -func NewEvictingQueue(capacity int, onEvict func()) *EvictingQueue { +func NewEvictingQueue(capacity int, onEvict func()) (*EvictingQueue, error) { if capacity <= 0 { // a queue of 0 (or smaller) capacity is invalid - return nil + return nil, errors.New("queue cannot have a zero or negative capacity") } return &EvictingQueue{ capacity: capacity, onEvict: onEvict, entries: make([]interface{}, 0, capacity), - } + }, nil } func (q *EvictingQueue) Append(entry interface{}) { diff --git a/pkg/util/evicting_queue_test.go b/pkg/util/evicting_queue_test.go index 46a3f33d52a6..c33af3e2ac1d 100644 --- a/pkg/util/evicting_queue_test.go +++ b/pkg/util/evicting_queue_test.go @@ -12,7 +12,8 @@ import ( func noopOnEvict() {} func TestQueueAppend(t *testing.T) { - q := NewEvictingQueue(10, noopOnEvict) + q, err := NewEvictingQueue(10, noopOnEvict) + require.Nil(t, err) q.Append(1) q.Append(2) @@ -24,7 +25,8 @@ func TestQueueAppend(t *testing.T) { } func TestQueueCapacity(t *testing.T) { - q := NewEvictingQueue(9, noopOnEvict) + q, err := NewEvictingQueue(9, noopOnEvict) + require.Nil(t, err) require.Equal(t, 9, q.Capacity()) q.capacity = 11 @@ -32,17 +34,20 @@ func TestQueueCapacity(t *testing.T) { } func TestZeroCapacityQueue(t *testing.T) { - q := NewEvictingQueue(0, noopOnEvict) + q, err := NewEvictingQueue(0, noopOnEvict) + require.Error(t, err) require.Nil(t, q) } func TestNegativeCapacityQueue(t *testing.T) { - q := NewEvictingQueue(-1, noopOnEvict) + q, err := NewEvictingQueue(-1, noopOnEvict) + require.Error(t, err) require.Nil(t, q) } func TestQueueEvict(t *testing.T) { - q := NewEvictingQueue(3, noopOnEvict) + q, err := NewEvictingQueue(3, noopOnEvict) + require.Nil(t, err) // appending 5 entries will cause the first (oldest) 2 entries to be evicted entries := []interface{}{1, 2, 3, 4, 5} @@ -55,7 +60,8 @@ func TestQueueEvict(t *testing.T) { } func TestQueueClear(t *testing.T) { - q := NewEvictingQueue(3, noopOnEvict) + q, err := NewEvictingQueue(3, noopOnEvict) + require.Nil(t, err) q.Append(1) q.Clear() @@ -66,9 +72,10 @@ func TestQueueClear(t *testing.T) { func TestQueueEvictionCallback(t *testing.T) { var evictionCallbackCalls int - q := NewEvictingQueue(3, func() { + q, err := NewEvictingQueue(3, func() { evictionCallbackCalls++ }) + require.Nil(t, err) for i := 0; i < 5; i++ { q.Append(i) @@ -78,7 +85,8 @@ func TestQueueEvictionCallback(t *testing.T) { } func TestSafeConcurrentAccess(t *testing.T) { - q := NewEvictingQueue(3, noopOnEvict) + q, err := NewEvictingQueue(3, noopOnEvict) + require.Nil(t, err) var wg sync.WaitGroup @@ -105,7 +113,8 @@ type queueEntry struct { func BenchmarkAppendAndEvict(b *testing.B) { capacity := 5000 - q := NewEvictingQueue(capacity, noopOnEvict) + q, err := NewEvictingQueue(capacity, noopOnEvict) + require.Nil(b, err) b.ResetTimer() b.ReportAllocs() From 9dee0f481eea5ea6b23b851ddc09b96b76406225 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 1 Jun 2021 09:10:54 +0200 Subject: [PATCH 29/34] Removing global queue capacity config - using limits Minor refactoring Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 11 +----- pkg/ruler/appender_test.go | 66 ++++++++++++++++------------------ pkg/ruler/config.go | 3 -- pkg/ruler/remote_write_test.go | 4 +-- pkg/validation/limits.go | 1 + 5 files changed, 34 insertions(+), 51 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 601f40bce500..428c9dcf1879 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -60,7 +60,7 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return &NoopAppender{} } - capacity := a.queueCapacityForTenant() + capacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID) queue, err := util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)) if err != nil { level.Error(a.logger).Log("msg", "queue creation error; setting appender as noop", "err", err, "tenant", a.userID) @@ -89,15 +89,6 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return appender } -func (a *RemoteWriteAppendable) queueCapacityForTenant() int { - capacity := a.cfg.RemoteWrite.QueueCapacity - if tenantCapacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID); tenantCapacity > 0 { - capacity = tenantCapacity - } - - return capacity -} - func onEvict(userID, groupKey string) func() { return func() { samplesEvicted.WithLabelValues(userID, groupKey).Inc() diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index 56758e44e845..e140b1e114c6 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -22,9 +22,10 @@ import ( ) var ( - Logger = log.NewNopLogger() - FakeUserID = "fake" - EmptyWriteRequest = []byte{} + logger = log.NewNopLogger() + fakeUserID = "fake" + emptyWriteRequest = []byte{} + queueCapacity = 10 ) func TestGroupKeyRetrieval(t *testing.T) { @@ -42,7 +43,7 @@ func TestGroupKeyRetrieval(t *testing.T) { // TestMemoizedAppenders tests that appenders are memoized by their associated group key func TestMemoizedAppenders(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) // context passing a valid group key will allow the appender to be memoized appender := appendable.Appender(ctx) @@ -57,7 +58,7 @@ func TestMemoizedAppenders(t *testing.T) { func TestAppenderSeparationByRuleGroup(t *testing.T) { ctxA := createOriginContext("/rule/fileA", "rule-groupA") ctxB := createOriginContext("/rule/fileB", "rule-groupB") - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appenderA := appendable.Appender(ctxA) appenderB := appendable.Appender(ctxB) @@ -66,23 +67,17 @@ func TestAppenderSeparationByRuleGroup(t *testing.T) { func TestQueueCapacity(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable() - - defaultCapacity := 100 - appendable.cfg.RemoteWrite.QueueCapacity = defaultCapacity + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(ctx).(*RemoteWriteAppender) - require.Equal(t, appender.queue.Capacity(), defaultCapacity) + require.Equal(t, appender.queue.Capacity(), queueCapacity) } func TestQueueCapacityTenantOverride(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) - defaultCapacity := 100 overriddenCapacity := 999 - appendable.cfg.RemoteWrite.QueueCapacity = defaultCapacity - overrides, err := validation.NewOverrides(validation.Limits{}, func(userID string) *validation.Limits { return &validation.Limits{ RulerRemoteWriteQueueCapacity: overriddenCapacity, @@ -97,7 +92,7 @@ func TestQueueCapacityTenantOverride(t *testing.T) { func TestAppendSample(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(ctx).(*RemoteWriteAppender) labels := labels.Labels{ @@ -126,12 +121,12 @@ func TestAppendSample(t *testing.T) { func TestSuccessfulRemoteWriteSample(t *testing.T) { client := &MockRemoteWriteClient{} - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) appender.remoteWriter = client - client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() + client.On("PrepareRequest", mock.Anything).Return(emptyWriteRequest, nil).Once() client.On("Store", mock.Anything, mock.Anything).Return(nil).Once() _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) @@ -150,12 +145,12 @@ func TestSuccessfulRemoteWriteSample(t *testing.T) { func TestUnsuccessfulRemoteWritePrepare(t *testing.T) { client := &MockRemoteWriteClient{} - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) appender.remoteWriter = client - client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, fmt.Errorf("some error")).Once() + client.On("PrepareRequest", mock.Anything).Return(emptyWriteRequest, fmt.Errorf("some error")).Once() _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) require.Nil(t, err) @@ -172,12 +167,12 @@ func TestUnsuccessfulRemoteWritePrepare(t *testing.T) { func TestUnsuccessfulRemoteWriteStore(t *testing.T) { client := &MockRemoteWriteClient{} - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) appender.remoteWriter = client - client.On("PrepareRequest", mock.Anything).Return(EmptyWriteRequest, nil).Once() + client.On("PrepareRequest", mock.Anything).Return(emptyWriteRequest, nil).Once() client.On("Store", mock.Anything, mock.Anything).Return(fmt.Errorf("some error")).Once() _, err := appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) require.Nil(t, err) @@ -195,7 +190,7 @@ func TestUnsuccessfulRemoteWriteStore(t *testing.T) { func TestEmptyRemoteWrite(t *testing.T) { client := &MockRemoteWriteClient{} - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) appender.remoteWriter = client @@ -211,7 +206,7 @@ func TestEmptyRemoteWrite(t *testing.T) { } func TestAppenderRollback(t *testing.T) { - appendable := createBasicAppendable() + appendable := createBasicAppendable(queueCapacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.2) //nolint:errcheck @@ -225,10 +220,8 @@ func TestAppenderRollback(t *testing.T) { } func TestAppenderEvictOldest(t *testing.T) { - queueCapacity := 2 - - appendable := createBasicAppendable() - appendable.cfg.RemoteWrite.QueueCapacity = queueCapacity + capacity := 2 + appendable := createBasicAppendable(capacity) appender := appendable.Appender(context.TODO()).(*RemoteWriteAppender) @@ -237,7 +230,7 @@ func TestAppenderEvictOldest(t *testing.T) { appender.Append(0, labels.Labels{}, time.Now().UnixNano(), 11.4) //nolint:errcheck // capacity is enforced - require.Equal(t, queueCapacity, appender.queue.Length()) + require.Equal(t, capacity, appender.queue.Length()) // only two newest samples are kept require.Equal(t, appender.queue.Entries()[0].(queueEntry).sample.Value, 11.3) @@ -256,7 +249,7 @@ func createOriginContext(ruleFile, groupName string) context.Context { }) } -func createBasicAppendable() *RemoteWriteAppendable { +func createBasicAppendable(queueCapacity int) *RemoteWriteAppendable { target, err := url.Parse("http://some/target") if err != nil { panic(err) @@ -265,21 +258,22 @@ func createBasicAppendable() *RemoteWriteAppendable { return newRemoteWriteAppendable( Config{ RemoteWrite: RemoteWriteConfig{ - Enabled: true, - QueueCapacity: 10, + Enabled: true, Client: config.RemoteWriteConfig{ URL: &promConfig.URL{URL: target}, }, }, }, - fakeLimits(), - Logger, - FakeUserID, + fakeLimits(queueCapacity), + logger, + fakeUserID, ) } -func fakeLimits() RulesLimits { - o, err := validation.NewOverrides(validation.Limits{}, nil) +func fakeLimits(queueCapacity int) RulesLimits { + o, err := validation.NewOverrides(validation.Limits{ + RulerRemoteWriteQueueCapacity: queueCapacity, + }, nil) if err != nil { panic(err) } diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index a6365137f464..4199edb0899e 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -40,8 +40,6 @@ func (c *Config) Validate() error { type RemoteWriteConfig struct { Client config.RemoteWriteConfig `yaml:"client"` Enabled bool `yaml:"enabled"` - - QueueCapacity int `yaml:"queue_capacity,omitempty"` } func (c *RemoteWriteConfig) Validate() error { @@ -55,5 +53,4 @@ func (c *RemoteWriteConfig) Validate() error { // RegisterFlags adds the flags required to config this to the given FlagSet. func (c *RemoteWriteConfig) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&c.Enabled, "ruler.remote-write.enabled", false, "Remote-write recording rule samples to Prometheus-compatible remote-write receiver.") - f.IntVar(&c.QueueCapacity, "ruler.remote-write.queue-capacity", DefaultQueueCapacity, "Capacity of remote-write queues; if a queue exceeds its capacity it will evict oldest samples.") } diff --git a/pkg/ruler/remote_write_test.go b/pkg/ruler/remote_write_test.go index 70f4cd720015..5ea5fc946221 100644 --- a/pkg/ruler/remote_write_test.go +++ b/pkg/ruler/remote_write_test.go @@ -12,10 +12,10 @@ import ( func TestPrepareRequest(t *testing.T) { ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable() + appendable := createBasicAppendable(10) appender := appendable.Appender(ctx).(*RemoteWriteAppender) - client, err := newRemoteWriter(appendable.cfg, FakeUserID) + client, err := newRemoteWriter(appendable.cfg, "fake") require.Nil(t, err) appender.remoteWriter = client diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index b3efac22d12d..88950df26bc0 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -123,6 +123,7 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") + f.IntVar(&l.RulerRemoteWriteQueueCapacity, "ruler.remote-write.queue-capacity", 10000, "Capacity of remote-write queues; if a queue exceeds its capacity it will evict oldest samples.") f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides.") _ = l.RetentionPeriod.Set("744h") From bf473c8ef61f84436ce3c787823e19e50d29abef Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Tue, 1 Jun 2021 09:58:47 +0200 Subject: [PATCH 30/34] Reusing memory in request preparation Refactoring for testability Signed-off-by: Danny Kopping --- pkg/ruler/remote_write.go | 30 ++++++++++--- pkg/ruler/remote_write_test.go | 78 +++++++++++++++++++++++++++++++--- 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index 68a1b4a79273..d50d54d2a271 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -52,22 +52,40 @@ func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { }, nil } -// PrepareRequest takes the given queue and serialized it into a compressed -// proto write request that will be sent to Cortex -func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { - r.labels = make([]labels.Labels, 0, queue.Length()) - r.samples = make([]cortexpb.Sample, 0, queue.Length()) +func (r *remoteWriteClient) prepare(queue *util.EvictingQueue) error { + // reuse slices, resize if they are not big enough + if cap(r.labels) < queue.Length() { + r.labels = make([]labels.Labels, 0, queue.Length()) + } + if cap(r.samples) < queue.Length() { + r.samples = make([]cortexpb.Sample, 0, queue.Length()) + } + + r.labels = r.labels[:0] + r.samples = r.samples[:0] for _, entry := range queue.Entries() { entry, ok := entry.(queueEntry) if !ok { - return nil, fmt.Errorf("queue contains invalid entry of type: %T", entry) + return fmt.Errorf("queue contains invalid entry of type: %T", entry) } r.labels = append(r.labels, entry.labels) r.samples = append(r.samples, entry.sample) } + return nil +} + +// PrepareRequest takes the given queue and serialized it into a compressed +// proto write request that will be sent to Cortex +func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { + // prepare labels and samples from queue + err := r.prepare(queue) + if err != nil { + return nil, err + } + req := cortexpb.ToWriteRequest(r.labels, r.samples, nil, cortexpb.RULE) defer cortexpb.ReuseSlice(req.Timeseries) diff --git a/pkg/ruler/remote_write_test.go b/pkg/ruler/remote_write_test.go index 5ea5fc946221..43b77bbf374c 100644 --- a/pkg/ruler/remote_write_test.go +++ b/pkg/ruler/remote_write_test.go @@ -1,24 +1,76 @@ package ruler import ( + "math/rand" "testing" "time" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/golang/snappy" "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" -) -func TestPrepareRequest(t *testing.T) { - ctx := createOriginContext("/rule/file", "rule-group") - appendable := createBasicAppendable(10) + "github.com/grafana/loki/pkg/util" +) - appender := appendable.Appender(ctx).(*RemoteWriteAppender) - client, err := newRemoteWriter(appendable.cfg, "fake") +func TestPrepare(t *testing.T) { + client := remoteWriteClient{} + queue, err := util.NewEvictingQueue(1000, func() {}) require.Nil(t, err) - appender.remoteWriter = client + lbs := labels.Labels{ + labels.Label{ + Name: "cluster", + Value: "us-central1", + }, + } + sample := cortexpb.Sample{ + Value: rand.Float64(), + TimestampMs: time.Now().Unix(), + } + + // first start with 10 items + for i := 0; i < 10; i++ { + queue.Append(queueEntry{labels: lbs, sample: sample}) + } + require.Nil(t, client.prepare(queue)) + + assert.Equal(t, len(client.labels), queue.Length()) + assert.Equal(t, len(client.samples), queue.Length()) + assert.Equal(t, cap(client.labels), 10) + assert.Equal(t, cap(client.samples), 10) + + queue.Clear() + + // then resize the internal slices to 100 + for i := 0; i < 100; i++ { + queue.Append(queueEntry{labels: lbs, sample: sample}) + } + require.Nil(t, client.prepare(queue)) + + assert.Equal(t, len(client.labels), queue.Length()) + assert.Equal(t, len(client.samples), queue.Length()) + assert.Equal(t, cap(client.labels), 100) + assert.Equal(t, cap(client.samples), 100) + + queue.Clear() + + // then reuse the existing slice (no resize necessary since 5 < 100) + for i := 0; i < 5; i++ { + queue.Append(queueEntry{labels: lbs, sample: sample}) + } + require.Nil(t, client.prepare(queue)) + + assert.Equal(t, len(client.labels), queue.Length()) + assert.Equal(t, len(client.samples), queue.Length()) + // cap remains 100 since no resize was necessary + assert.Equal(t, cap(client.labels), 100) + assert.Equal(t, cap(client.samples), 100) +} + +func TestPrepareRequest(t *testing.T) { + appender := createBasicAppender(t) lbs := labels.Labels{ labels.Label{ @@ -47,3 +99,15 @@ func TestPrepareRequest(t *testing.T) { require.Equal(t, req.Timeseries[0].Labels[0].Value, lbs[0].Value) require.Equal(t, req.Timeseries[0].Samples[0], sample) } + +func createBasicAppender(t *testing.T) *RemoteWriteAppender { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable(100) + + appender := appendable.Appender(ctx).(*RemoteWriteAppender) + client, err := newRemoteWriter(appendable.cfg, "fake") + require.Nil(t, err) + + appender.remoteWriter = client + return appender +} From 082f54e046670329bd6bfd97c210504043d7044f Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 2 Jun 2021 11:59:43 +0200 Subject: [PATCH 31/34] Moving remote-write metrics into struct Refactoring Unexporting and refactoring memstore metrics to match Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 29 +++++++++++++++------------ pkg/ruler/appender_test.go | 3 +++ pkg/ruler/compat.go | 22 ++++++++++++++------- pkg/ruler/compat_test.go | 2 +- pkg/ruler/memstore.go | 36 +++++++++++++++++----------------- pkg/ruler/metrics.go | 36 ---------------------------------- pkg/ruler/remote_write.go | 40 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 94 insertions(+), 74 deletions(-) delete mode 100644 pkg/ruler/metrics.go diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index 428c9dcf1879..a1985c84cb9e 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -23,15 +23,18 @@ type RemoteWriteAppendable struct { cfg Config overrides RulesLimits logger log.Logger + + metrics *remoteWriteMetrics } -func newRemoteWriteAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) *RemoteWriteAppendable { +func newRemoteWriteAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string, metrics *remoteWriteMetrics) *RemoteWriteAppendable { return &RemoteWriteAppendable{ logger: logger, userID: userID, cfg: cfg, overrides: overrides, groupAppender: make(map[string]*RemoteWriteAppender), + metrics: metrics, } } @@ -42,7 +45,8 @@ type RemoteWriteAppender struct { userID string groupKey string - queue *util.EvictingQueue + queue *util.EvictingQueue + metrics *remoteWriteMetrics } func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { @@ -61,7 +65,7 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { } capacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID) - queue, err := util.NewEvictingQueue(capacity, onEvict(a.userID, groupKey)) + queue, err := util.NewEvictingQueue(capacity, a.onEvict(a.userID, groupKey)) if err != nil { level.Error(a.logger).Log("msg", "queue creation error; setting appender as noop", "err", err, "tenant", a.userID) return &NoopAppender{} @@ -74,10 +78,11 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey: groupKey, userID: a.userID, - queue: queue, + queue: queue, + metrics: a.metrics, } - samplesQueueCapacity.WithLabelValues(a.userID, groupKey).Set(float64(capacity)) + a.metrics.samplesQueueCapacity.WithLabelValues(a.userID).Set(float64(capacity)) // only track reference if groupKey was retrieved if groupKey == "" { @@ -89,9 +94,9 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return appender } -func onEvict(userID, groupKey string) func() { +func (a *RemoteWriteAppendable) onEvict(userID, groupKey string) func() { return func() { - samplesEvicted.WithLabelValues(userID, groupKey).Inc() + a.metrics.samplesEvicted.WithLabelValues(userID, groupKey).Inc() } } @@ -104,8 +109,8 @@ func (a *RemoteWriteAppender) Append(_ uint64, l labels.Labels, t int64, v float }, }) - samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(float64(a.queue.Length())) - samplesQueuedTotal.WithLabelValues(a.userID, a.groupKey).Inc() + a.metrics.samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(float64(a.queue.Length())) + a.metrics.samplesQueuedTotal.WithLabelValues(a.userID, a.groupKey).Inc() return 0, nil } @@ -129,21 +134,21 @@ func (a *RemoteWriteAppender) Commit() error { req, err := a.remoteWriter.PrepareRequest(a.queue) if err != nil { level.Error(a.logger).Log("msg", "could not prepare remote-write request", "err", err) - remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() + a.metrics.remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() return err } err = a.remoteWriter.Store(a.ctx, req) if err != nil { level.Error(a.logger).Log("msg", "could not store recording rule samples", "err", err) - remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() + a.metrics.remoteWriteErrors.WithLabelValues(a.userID, a.groupKey).Inc() return err } // Clear the queue on a successful response a.queue.Clear() - samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(0) + a.metrics.samplesQueued.WithLabelValues(a.userID, a.groupKey).Set(0) return nil } diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index e140b1e114c6..f289eec37a8b 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -9,6 +9,7 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/go-kit/kit/log" + "github.com/prometheus/client_golang/prometheus" promConfig "github.com/prometheus/common/config" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/pkg/labels" @@ -26,6 +27,7 @@ var ( fakeUserID = "fake" emptyWriteRequest = []byte{} queueCapacity = 10 + metrics = newRemoteWriteMetrics(prometheus.DefaultRegisterer) ) func TestGroupKeyRetrieval(t *testing.T) { @@ -267,6 +269,7 @@ func createBasicAppendable(queueCapacity int) *RemoteWriteAppendable { fakeLimits(queueCapacity), logger, fakeUserID, + metrics, ) } diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index 3c8886925fa5..ed87cd8d8044 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -93,7 +93,8 @@ func MemstoreTenantManager( engine *logql.Engine, overrides RulesLimits, ) ruler.ManagerFactory { - var metrics *Metrics + var msMetrics *memstoreMetrics + var rwMetrics *remoteWriteMetrics return ruler.ManagerFactory(func( ctx context.Context, @@ -104,15 +105,22 @@ func MemstoreTenantManager( ) ruler.RulesManager { // We'll ignore the passed registerer and use the default registerer to avoid prefix issues and other weirdness. // This closure prevents re-registering. - if metrics == nil { - metrics = NewMetrics(prometheus.DefaultRegisterer) + registerer := prometheus.DefaultRegisterer + + if msMetrics == nil { + msMetrics = NewMetrics(registerer) + } + + if rwMetrics == nil { + rwMetrics = newRemoteWriteMetrics(registerer) } + logger = log.With(logger, "user", userID) queryFunc := engineQueryFunc(engine, overrides, userID) - memStore := NewMemStore(userID, queryFunc, metrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) + memStore := NewMemStore(userID, queryFunc, msMetrics, 5*time.Minute, log.With(logger, "subcomponent", "MemStore")) mgr := rules.NewManager(&rules.ManagerOptions{ - Appendable: newAppendable(cfg, overrides, logger, userID), + Appendable: newAppendable(cfg, overrides, logger, userID, rwMetrics), Queryable: memStore, QueryFunc: queryFunc, Context: user.InjectOrgID(ctx, userID), @@ -133,13 +141,13 @@ func MemstoreTenantManager( }) } -func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string) storage.Appendable { +func newAppendable(cfg Config, overrides RulesLimits, logger log.Logger, userID string, metrics *remoteWriteMetrics) storage.Appendable { if !cfg.RemoteWrite.Enabled { level.Info(logger).Log("msg", "remote-write is disabled") return &NoopAppender{} } - return newRemoteWriteAppendable(cfg, overrides, logger, userID) + return newRemoteWriteAppendable(cfg, overrides, logger, userID, metrics) } type GroupLoader struct{} diff --git a/pkg/ruler/compat_test.go b/pkg/ruler/compat_test.go index 4f3a7335927f..8ac490ac6f8f 100644 --- a/pkg/ruler/compat_test.go +++ b/pkg/ruler/compat_test.go @@ -309,7 +309,7 @@ func TestNoopAppender(t *testing.T) { } require.False(t, cfg.RemoteWrite.Enabled) - appendable := newAppendable(cfg, &validation.Overrides{}, log.NewNopLogger(), "fake") + appendable := newAppendable(cfg, &validation.Overrides{}, log.NewNopLogger(), "fake", metrics) appender := appendable.Appender(context.TODO()) require.IsType(t, NoopAppender{}, appender) } diff --git a/pkg/ruler/memstore.go b/pkg/ruler/memstore.go index 02052cb42c37..8e7364fe8fc0 100644 --- a/pkg/ruler/memstore.go +++ b/pkg/ruler/memstore.go @@ -46,23 +46,23 @@ func ForStateMetric(base labels.Labels, alertName string) labels.Labels { return b.Labels() } -type Metrics struct { - Evaluations *prometheus.CounterVec - Samples prometheus.Gauge // in memory samples - CacheHits *prometheus.CounterVec // cache hits on in memory samples +type memstoreMetrics struct { + evaluations *prometheus.CounterVec + samples prometheus.Gauge // in memory samples + cacheHits *prometheus.CounterVec // cache hits on in memory samples } -func NewMetrics(r prometheus.Registerer) *Metrics { - return &Metrics{ - Evaluations: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ +func NewMetrics(r prometheus.Registerer) *memstoreMetrics { + return &memstoreMetrics{ + evaluations: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", Name: "ruler_memory_for_state_evaluations_total", }, []string{"status", "tenant"}), - Samples: promauto.With(r).NewGauge(prometheus.GaugeOpts{ + samples: promauto.With(r).NewGauge(prometheus.GaugeOpts{ Namespace: "loki", Name: "ruler_memory_samples", }), - CacheHits: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + cacheHits: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", Name: "ruler_memory_for_state_cache_hits_total", }, []string{"tenant"}), @@ -77,7 +77,7 @@ type MemStore struct { mtx sync.Mutex userID string queryFunc rules.QueryFunc - metrics *Metrics + metrics *memstoreMetrics mgr RuleIter logger log.Logger rules map[string]*RuleCache @@ -87,7 +87,7 @@ type MemStore struct { cleanupInterval time.Duration } -func NewMemStore(userID string, queryFunc rules.QueryFunc, metrics *Metrics, cleanupInterval time.Duration, logger log.Logger) *MemStore { +func NewMemStore(userID string, queryFunc rules.QueryFunc, metrics *memstoreMetrics, cleanupInterval time.Duration, logger log.Logger) *MemStore { s := &MemStore{ userID: userID, metrics: metrics, @@ -243,7 +243,7 @@ func (m *memStoreQuerier) Select(sortSeries bool, params *storage.SelectHints, m smpl, cached := cache.Get(m.ts, ls) if cached { - m.metrics.CacheHits.WithLabelValues(m.userID).Inc() + m.metrics.cacheHits.WithLabelValues(m.userID).Inc() level.Debug(m.logger).Log("msg", "result cached", "rule", ruleKey) // Assuming the result is cached but the desired series is not in the result, it wouldn't be considered active. if smpl == nil { @@ -265,10 +265,10 @@ func (m *memStoreQuerier) Select(sortSeries bool, params *storage.SelectHints, m vec, err := m.queryFunc(m.ctx, rule.Query().String(), m.ts.Add(-rule.HoldDuration())) if err != nil { level.Info(m.logger).Log("msg", "error querying for rule", "rule", ruleKey, "err", err.Error()) - m.metrics.Evaluations.WithLabelValues(statusFailure, m.userID).Inc() + m.metrics.evaluations.WithLabelValues(statusFailure, m.userID).Inc() return storage.NoopSeriesSet() } - m.metrics.Evaluations.WithLabelValues(statusSuccess, m.userID).Inc() + m.metrics.evaluations.WithLabelValues(statusSuccess, m.userID).Inc() level.Debug(m.logger).Log("msg", "rule state successfully restored", "rule", ruleKey, "len", len(vec)) // translate the result into the ALERTS_FOR_STATE series for caching, @@ -322,11 +322,11 @@ func (*memStoreQuerier) Close() error { return nil } type RuleCache struct { mtx sync.Mutex - metrics *Metrics + metrics *memstoreMetrics data map[int64]map[uint64]promql.Sample } -func NewRuleCache(metrics *Metrics) *RuleCache { +func NewRuleCache(metrics *memstoreMetrics) *RuleCache { return &RuleCache{ data: make(map[int64]map[uint64]promql.Sample), metrics: metrics, @@ -345,7 +345,7 @@ func (c *RuleCache) Set(ts time.Time, vec promql.Vector) { for _, sample := range vec { tsMap[sample.Metric.Hash()] = sample } - c.metrics.Samples.Add(float64(len(vec))) + c.metrics.samples.Add(float64(len(vec))) } // Get returns ok if that timestamp's result is cached. @@ -377,7 +377,7 @@ func (c *RuleCache) CleanupOldSamples(olderThan time.Time) (empty bool) { for ts, tsMap := range c.data { if ts < ns { delete(c.data, ts) - c.metrics.Samples.Add(-float64(len(tsMap))) + c.metrics.samples.Add(-float64(len(tsMap))) } } diff --git a/pkg/ruler/metrics.go b/pkg/ruler/metrics.go deleted file mode 100644 index 47c6d3084427..000000000000 --- a/pkg/ruler/metrics.go +++ /dev/null @@ -1,36 +0,0 @@ -package ruler - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" -) - -var samplesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", - Name: "recording_rules_samples_evicted_total", - Help: "Number of samples evicted from queue; queue is full!", -}, []string{"user_id", "group_key"}) - -var samplesQueuedTotal = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", - Name: "recording_rules_samples_queued_total", - Help: "Number of samples queued in total.", -}, []string{"user_id", "group_key"}) - -var samplesQueued = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "loki", - Name: "recording_rules_samples_queued_current", - Help: "Number of samples queued to be remote-written.", -}, []string{"user_id", "group_key"}) - -var samplesQueueCapacity = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Namespace: "loki", - Name: "recording_rules_samples_queue_capacity", - Help: "Number of samples that can be queued before eviction of oldest samples occurs.", -}, []string{"user_id", "group_key"}) - -var remoteWriteErrors = promauto.NewCounterVec(prometheus.CounterOpts{ - Namespace: "loki", - Name: "recording_rules_remote_write_errors", - Help: "Number of samples that failed to be remote-written due to error.", -}, []string{"user_id", "group_key"}) diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index d50d54d2a271..7753a2af80ab 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -6,6 +6,8 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/golang/snappy" "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/pkg/labels" "github.com/prometheus/prometheus/storage/remote" @@ -33,6 +35,14 @@ type remoteWriteClient struct { samples []cortexpb.Sample } +type remoteWriteMetrics struct { + samplesEvicted *prometheus.CounterVec + samplesQueuedTotal *prometheus.CounterVec + samplesQueued *prometheus.GaugeVec + samplesQueueCapacity *prometheus.GaugeVec + remoteWriteErrors *prometheus.CounterVec +} + func newRemoteWriter(cfg Config, userID string) (remoteWriter, error) { writeClient, err := remote.NewWriteClient("recording_rules", &remote.ClientConfig{ URL: cfg.RemoteWrite.Client.URL, @@ -96,3 +106,33 @@ func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, e return snappy.Encode(nil, reqBytes), nil } + +func newRemoteWriteMetrics(r prometheus.Registerer) *remoteWriteMetrics { + return &remoteWriteMetrics{ + samplesEvicted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rules_samples_evicted_total", + Help: "Number of samples evicted from queue; queue is full!", + }, []string{"tenant", "group_key"}), + samplesQueuedTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rules_samples_queued_total", + Help: "Number of samples queued in total.", + }, []string{"tenant", "group_key"}), + samplesQueued: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "loki", + Name: "recording_rules_samples_queued_current", + Help: "Number of samples queued to be remote-written.", + }, []string{"tenant", "group_key"}), + samplesQueueCapacity: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "loki", + Name: "recording_rules_samples_queue_capacity", + Help: "Number of samples that can be queued before eviction of oldest samples occurs.", + }, []string{"tenant"}), + remoteWriteErrors: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Namespace: "loki", + Name: "recording_rules_remote_write_errors", + Help: "Number of samples that failed to be remote-written due to error.", + }, []string{"tenant", "group_key"}), + } +} From 14a2d74acf09fe12a0919cc5652bfdecdda22a81 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 2 Jun 2021 12:02:21 +0200 Subject: [PATCH 32/34] Applying review suggestions Signed-off-by: Danny Kopping --- pkg/ruler/config.go | 4 ---- pkg/ruler/remote_write.go | 2 +- pkg/util/evicting_queue.go | 3 ++- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index 4199edb0899e..8a40c77e5d95 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -9,10 +9,6 @@ import ( "github.com/prometheus/prometheus/config" ) -// DefaultQueueCapacity defines the default size of the samples queue which will hold samples -// while the remote-write endpoint is unavailable -const DefaultQueueCapacity = 10000 - type Config struct { ruler.Config `yaml:",inline"` diff --git a/pkg/ruler/remote_write.go b/pkg/ruler/remote_write.go index 7753a2af80ab..13b4f910df53 100644 --- a/pkg/ruler/remote_write.go +++ b/pkg/ruler/remote_write.go @@ -87,7 +87,7 @@ func (r *remoteWriteClient) prepare(queue *util.EvictingQueue) error { return nil } -// PrepareRequest takes the given queue and serialized it into a compressed +// PrepareRequest takes the given queue and serializes it into a compressed // proto write request that will be sent to Cortex func (r *remoteWriteClient) PrepareRequest(queue *util.EvictingQueue) ([]byte, error) { // prepare labels and samples from queue diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index 754b854d6442..dd331f052bae 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -40,7 +40,8 @@ func (q *EvictingQueue) Append(entry interface{}) { func (q *EvictingQueue) evictOldest() { q.onEvict() - q.entries = append(q.entries[:0], q.entries[1:]...) + start := (len(q.entries) - q.Capacity()) + 1 + q.entries = append(q.entries[:0], q.entries[start:]...) } func (q *EvictingQueue) Entries() []interface{} { From 22f628ce050f721f315c931e96e03e3a63096c66 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 2 Jun 2021 18:16:58 +0200 Subject: [PATCH 33/34] Allowing for runtime changing of per-tenant remote-write queue capacity Signed-off-by: Danny Kopping --- pkg/ruler/appender.go | 19 ++++++++++++++++--- pkg/ruler/appender_test.go | 22 +++++++++++++++++++++ pkg/util/evicting_queue.go | 39 ++++++++++++++++++++++++++++++-------- 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/pkg/ruler/appender.go b/pkg/ruler/appender.go index a1985c84cb9e..21bacc8b430d 100644 --- a/pkg/ruler/appender.go +++ b/pkg/ruler/appender.go @@ -52,9 +52,16 @@ type RemoteWriteAppender struct { func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { groupKey := retrieveGroupKeyFromContext(ctx) + capacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID) + // create or retrieve an appender associated with this groupKey (unique ID for rule group) appender, found := a.groupAppender[groupKey] if found { + err := appender.WithQueueCapacity(capacity) + if err != nil { + level.Warn(a.logger).Log("msg", "attempting to set capacity failed", "err", err) + } + return appender } @@ -64,7 +71,6 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { return &NoopAppender{} } - capacity := a.overrides.RulerRemoteWriteQueueCapacity(a.userID) queue, err := util.NewEvictingQueue(capacity, a.onEvict(a.userID, groupKey)) if err != nil { level.Error(a.logger).Log("msg", "queue creation error; setting appender as noop", "err", err, "tenant", a.userID) @@ -82,8 +88,6 @@ func (a *RemoteWriteAppendable) Appender(ctx context.Context) storage.Appender { metrics: a.metrics, } - a.metrics.samplesQueueCapacity.WithLabelValues(a.userID).Set(float64(capacity)) - // only track reference if groupKey was retrieved if groupKey == "" { level.Warn(a.logger).Log("msg", "blank group key passed via context; creating new appender") @@ -159,6 +163,15 @@ func (a *RemoteWriteAppender) Rollback() error { return nil } +func (a *RemoteWriteAppender) WithQueueCapacity(capacity int) error { + if err := a.queue.SetCapacity(capacity); err != nil { + return err + } + + a.metrics.samplesQueueCapacity.WithLabelValues(a.userID).Set(float64(capacity)) + return nil +} + func retrieveGroupKeyFromContext(ctx context.Context) string { data, found := ctx.Value(promql.QueryOrigin{}).(map[string]interface{}) if !found { diff --git a/pkg/ruler/appender_test.go b/pkg/ruler/appender_test.go index f289eec37a8b..34e959af7248 100644 --- a/pkg/ruler/appender_test.go +++ b/pkg/ruler/appender_test.go @@ -57,6 +57,28 @@ func TestMemoizedAppenders(t *testing.T) { require.NotSame(t, appender, appendable.Appender(ctx)) } +// TestMemoizedAppendersWithRuntimeCapacityChange tests that memoized appenders can reconfigure their capacity +func TestMemoizedAppendersWithRuntimeCapacityChange(t *testing.T) { + ctx := createOriginContext("/rule/file", "rule-group") + appendable := createBasicAppendable(queueCapacity) + + appender := appendable.Appender(ctx) + + // appender is configured with default queue capacity initially + capacity := appender.(*RemoteWriteAppender).queue.Capacity() + require.Equal(t, queueCapacity, capacity) + + newCapacity := 123 + + // reconfigure the overrides to simulate a runtime config change + appendable.overrides = fakeLimits(newCapacity) + + // appender is reconfigured with new queue capacity when retrieved again + appender = appendable.Appender(ctx) + capacity = appender.(*RemoteWriteAppender).queue.Capacity() + require.Equal(t, newCapacity, capacity) +} + func TestAppenderSeparationByRuleGroup(t *testing.T) { ctxA := createOriginContext("/rule/fileA", "rule-groupA") ctxB := createOriginContext("/rule/fileB", "rule-groupB") diff --git a/pkg/util/evicting_queue.go b/pkg/util/evicting_queue.go index dd331f052bae..f18238ce5884 100644 --- a/pkg/util/evicting_queue.go +++ b/pkg/util/evicting_queue.go @@ -14,16 +14,21 @@ type EvictingQueue struct { } func NewEvictingQueue(capacity int, onEvict func()) (*EvictingQueue, error) { - if capacity <= 0 { - // a queue of 0 (or smaller) capacity is invalid - return nil, errors.New("queue cannot have a zero or negative capacity") + if err := validateCapacity(capacity); err != nil { + return nil, err + } + + queue := &EvictingQueue{ + onEvict: onEvict, + entries: make([]interface{}, 0, capacity), } - return &EvictingQueue{ - capacity: capacity, - onEvict: onEvict, - entries: make([]interface{}, 0, capacity), - }, nil + err := queue.SetCapacity(capacity) + if err != nil { + return nil, err + } + + return queue, nil } func (q *EvictingQueue) Append(entry interface{}) { @@ -62,9 +67,27 @@ func (q *EvictingQueue) Capacity() int { return q.capacity } +func (q *EvictingQueue) SetCapacity(capacity int) error { + if err := validateCapacity(capacity); err != nil { + return err + } + + q.capacity = capacity + return nil +} + func (q *EvictingQueue) Clear() { q.Lock() defer q.Unlock() q.entries = q.entries[:0] } + +func validateCapacity(capacity int) error { + if capacity <= 0 { + // a queue of 0 (or smaller) capacity is invalid + return errors.New("queue cannot have a zero or negative capacity") + } + + return nil +} From a320a4620736f854e762e727aa39c09227111c6f Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 2 Jun 2021 18:28:13 +0200 Subject: [PATCH 34/34] Appeasing the linter Signed-off-by: Danny Kopping --- pkg/ruler/compat.go | 2 +- pkg/ruler/memstore.go | 2 +- pkg/ruler/memstore_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index ed87cd8d8044..1ef56ca8ed35 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -108,7 +108,7 @@ func MemstoreTenantManager( registerer := prometheus.DefaultRegisterer if msMetrics == nil { - msMetrics = NewMetrics(registerer) + msMetrics = newMemstoreMetrics(registerer) } if rwMetrics == nil { diff --git a/pkg/ruler/memstore.go b/pkg/ruler/memstore.go index 8e7364fe8fc0..82f0aac9c39a 100644 --- a/pkg/ruler/memstore.go +++ b/pkg/ruler/memstore.go @@ -52,7 +52,7 @@ type memstoreMetrics struct { cacheHits *prometheus.CounterVec // cache hits on in memory samples } -func NewMetrics(r prometheus.Registerer) *memstoreMetrics { +func newMemstoreMetrics(r prometheus.Registerer) *memstoreMetrics { return &memstoreMetrics{ evaluations: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Namespace: "loki", diff --git a/pkg/ruler/memstore_test.go b/pkg/ruler/memstore_test.go index f00910aaa493..2849257cc855 100644 --- a/pkg/ruler/memstore_test.go +++ b/pkg/ruler/memstore_test.go @@ -15,7 +15,7 @@ import ( ) var ( - NilMetrics = NewMetrics(nil) + NilMetrics = newMemstoreMetrics(nil) NilLogger = log.NewNopLogger() )