Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus support on v1/sys/metrics endpoint #5308

Merged
merged 12 commits into from
Feb 14, 2019
44 changes: 33 additions & 11 deletions command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics/circonus"
"github.com/armon/go-metrics/datadog"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -466,10 +467,12 @@ func (c *ServerCommand) Run(args []string) int {
"in a Docker container, provide the IPC_LOCK cap to the container."))
}

if err := c.setupTelemetry(config); err != nil {
inMemMetrics, err := c.setupTelemetry(config)
if err != nil {
c.UI.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
return 1
}
prometheusEnabled := config.Telemetry != nil && config.Telemetry.PrometheusRetentionTime > 0

// Initialize the backend
factory, exists := c.PhysicalBackends[config.Storage.Type]
Expand Down Expand Up @@ -561,6 +564,8 @@ func (c *ServerCommand) Run(args []string) int {
AllLoggers: allLoggers,
BuiltinRegistry: builtinplugins.Registry,
DisableKeyEncodingChecks: config.DisablePrintableCheck,
InMemSink: inMemMetrics,
PrometheusEnabled: prometheusEnabled,
}
if c.flagDev {
coreConfig.DevToken = c.flagDevRootTokenID
Expand Down Expand Up @@ -1666,8 +1671,8 @@ func (c *ServerCommand) detectRedirect(detect physical.RedirectDetect,
return url.String(), nil
}

// setupTelemetry is used to setup the telemetry sub-systems
func (c *ServerCommand) setupTelemetry(config *server.Config) error {
// setupTelemetry is used to setup the telemetry sub-systems and returns the in-memory sink to be used in http configuration
func (c *ServerCommand) setupTelemetry(config *server.Config) (*metrics.InmemSink, error) {
/* Setup telemetry
Aggregate on 10 second intervals for 1 minute. Expose the
metrics over stderr when there is a SIGUSR1 received.
Expand All @@ -1676,10 +1681,10 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
metrics.DefaultInmemSignal(inm)

var telConfig *server.Telemetry
if config.Telemetry == nil {
telConfig = &server.Telemetry{}
} else {
if config.Telemetry != nil {
telConfig = config.Telemetry
} else {
telConfig = &server.Telemetry{}
}

metricsConf := metrics.DefaultConfig("vault")
Expand All @@ -1690,7 +1695,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
if telConfig.StatsiteAddr != "" {
sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand All @@ -1699,7 +1704,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
if telConfig.StatsdAddr != "" {
sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand Down Expand Up @@ -1735,12 +1740,29 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := circonus.NewCirconusSink(cfg)
if err != nil {
return err
return nil, err
}
sink.Start()
fanout = append(fanout, sink)
}

// Configure the Prometheus sink
if telConfig.PrometheusRetentionTime.Nanoseconds() > 0 {
uepoch marked this conversation as resolved.
Show resolved Hide resolved
prometheusOpts := prometheus.PrometheusOpts{
Expiration: telConfig.PrometheusRetentionTime,
}
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
return nil, err
}
// Hostname enabled will create poor quality metrics name
if !telConfig.DisableHostname {
c.UI.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.")

}
fanout = append(fanout, sink)
}

if telConfig.DogStatsDAddr != "" {
var tags []string

Expand All @@ -1750,7 +1772,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := datadog.NewDogStatsdSink(telConfig.DogStatsDAddr, metricsConf.HostName)
if err != nil {
return errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
return nil, errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
}
sink.SetTags(tags)
fanout = append(fanout, sink)
Expand All @@ -1764,7 +1786,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
metricsConf.EnableHostname = false
metrics.NewGlobal(metricsConf, inm)
}
return nil
return inm, nil
}

func (c *ServerCommand) Reload(lock *sync.RWMutex, reloadFuncs *map[string][]reload.ReloadFunc, configPath []string) error {
Expand Down
20 changes: 19 additions & 1 deletion command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ func DevConfig(ha, transactional bool) *Config {

EnableUI: true,

Telemetry: &Telemetry{},
Telemetry: &Telemetry{
PrometheusRetentionTime: 24 * time.Hour,
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
DisableHostname: true,
},
}

switch {
Expand Down Expand Up @@ -233,6 +236,13 @@ type Telemetry struct {
// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
DogStatsDTags []string `hcl:"dogstatsd_tags"`

// Prometheus:
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
// A value of 0 disable Prometheus support.
uepoch marked this conversation as resolved.
Show resolved Hide resolved
// Default: 0
ncabatoff marked this conversation as resolved.
Show resolved Hide resolved
PrometheusRetentionTime time.Duration `hcl:-`
PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"`
}

func (s *Telemetry) GoString() string {
Expand Down Expand Up @@ -816,5 +826,13 @@ func parseTelemetry(result *Config, list *ast.ObjectList) error {
if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil {
return multierror.Prefix(err, "telemetry:")
}

if result.Telemetry.PrometheusRetentionTimeRaw != nil {
var err error
if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil {
return err
}
}

return nil
}
10 changes: 10 additions & 0 deletions vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,10 @@ type Core struct {
// Stores loggers so we can reset the level
allLoggers []log.Logger
allLoggersLock sync.RWMutex

// Telemetry objects
inMemSink *metrics.InmemSink
prometheusEnabled bool
}

// CoreConfig is used to parameterize a core
Expand Down Expand Up @@ -470,6 +474,10 @@ type CoreConfig struct {
DisableKeyEncodingChecks bool

AllLoggers []log.Logger

// Telemetry objects
InMemSink *metrics.InmemSink
PrometheusEnabled bool
}

func (c *CoreConfig) Clone() *CoreConfig {
Expand Down Expand Up @@ -576,6 +584,8 @@ func NewCore(conf *CoreConfig) (*Core, error) {
activeContextCancelFunc: new(atomic.Value),
allLoggers: conf.AllLoggers,
builtinRegistry: conf.BuiltinRegistry,
inMemSink: conf.InMemSink,
prometheusEnabled: conf.PrometheusEnabled,
}

atomic.StoreUint32(c.sealed, 1)
Expand Down
61 changes: 61 additions & 0 deletions vault/logical_system.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package vault

import (
"bytes"
"context"
"crypto/sha256"
"crypto/sha512"
Expand All @@ -18,6 +19,8 @@ import (
"sync"
"time"

"github.com/prometheus/common/expfmt"

"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
memdb "github.com/hashicorp/go-memdb"
Expand All @@ -33,6 +36,7 @@ import (
"github.com/hashicorp/vault/logical"
"github.com/hashicorp/vault/logical/framework"
"github.com/mitchellh/mapstructure"
"github.com/prometheus/client_golang/prometheus"
uepoch marked this conversation as resolved.
Show resolved Hide resolved
)

var (
Expand Down Expand Up @@ -145,6 +149,7 @@ func NewSystemBackend(core *Core, logger log.Logger) *SystemBackend {
b.Backend.Paths = append(b.Backend.Paths, b.capabilitiesPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.internalPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.remountPath())
b.Backend.Paths = append(b.Backend.Paths, b.metricsPath())

if core.rawEnabled {
b.Backend.Paths = append(b.Backend.Paths, &framework.Path{
Expand Down Expand Up @@ -2473,6 +2478,58 @@ func (b *SystemBackend) responseWrappingUnwrap(ctx context.Context, te *logical.
return response, nil
}

func (b *SystemBackend) handleMetrics(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {

format := data.Get("format").(string)

acceptHeaders := req.Headers["Accept"]
if format == "prometheus" || (len(acceptHeaders) > 0 && strings.HasPrefix(acceptHeaders[0], "application/openmetrics-text")) {
if !b.Core.prometheusEnabled {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for having this guard? It's an ACL'd call so presumably if you're allowed to access it you should be allowed to get that data in whatever format you want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the same logic as other telemetry backends, with it being disabled by default, unless you specify a valid retention in telemetry config

Do you feel we should add a by-default value and enable it for everybody ? @jefferai @ncabatoff

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the same logic as other telemetry backends, with it being disabled by default, unless you specify a valid retention in telemetry config

Right, but then unlike the other types you're plumbing a value all the way through core. Prometheus is just a format, I don't really see a reason to do this. If someone has access to the metrics, it seems like it ought to be able to be fetched in whatever format they want.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point, will modify to set a default value

err := "prometheus support is not enabled"
return nil, fmt.Errorf(err)
}

metricsFamilies, err := prometheus.DefaultGatherer.Gather()
uepoch marked this conversation as resolved.
Show resolved Hide resolved
if err != nil && len(metricsFamilies) == 0 {
return nil, fmt.Errorf("no prometheus metrics could be decoded: %s", err)
}

// Initialize a byte buffer.
buf := &bytes.Buffer{}
defer buf.Reset()

e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
err := e.Encode(mf)
if err != nil {
return nil, fmt.Errorf("error during the encoding of metrics: %s", err)
}
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: expfmt.FmtText,
logical.HTTPRawBody: buf.Bytes(),
logical.HTTPStatusCode: 200,
},
}, nil
}
summary, err := b.Core.inMemSink.DisplayMetrics(nil, nil)
if err != nil {
return nil, fmt.Errorf("error while fetching the in-memory metrics: %s", err)
}
content, err := json.Marshal(summary)
if err != nil {
return nil, fmt.Errorf("error while marshalling the in-memory metrics: %s", err)
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "application/json",
logical.HTTPRawBody: content,
logical.HTTPStatusCode: 200,
},
}, nil
}

func (b *SystemBackend) handleWrappingLookup(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
// This ordering of lookups has been validated already in the wrapping
// validation func, we're just doing this for a safety check
Expand Down Expand Up @@ -3853,4 +3910,8 @@ This path responds to the following HTTP methods.
"Information about a token's resultant ACL. Internal API; its location, inputs, and outputs may change.",
"",
},
"metrics": {
"Export the metrics aggregated for telemetry purpose.",
"",
},
}
18 changes: 18 additions & 0 deletions vault/logical_system_paths.go
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,24 @@ func (b *SystemBackend) remountPath() *framework.Path {
}
}

func (b *SystemBackend) metricsPath() *framework.Path {
return &framework.Path{
Pattern: "metrics",
Fields: map[string]*framework.FieldSchema{
"format": &framework.FieldSchema{
Type: framework.TypeString,
Description: "Format to export metrics into. Currently accept only \"prometheus\"",
},
},
Callbacks: map[logical.Operation]framework.OperationFunc{
logical.ReadOperation: b.handleMetrics,
},
HelpSynopsis: strings.TrimSpace(sysHelp["metrics"][0]),
HelpDescription: strings.TrimSpace(sysHelp["metrics"][1]),
}

}

func (b *SystemBackend) authPaths() []*framework.Path {
return []*framework.Path{
{
Expand Down
4 changes: 4 additions & 0 deletions vault/mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,7 @@ func (c *Core) loadMounts(ctx context.Context) error {
for _, coreMount := range c.mounts.Entries {
if coreMount.Type == requiredMount.Type {
foundRequired = true
coreMount.Config = requiredMount.Config
break
}
}
Expand Down Expand Up @@ -1267,6 +1268,9 @@ func (c *Core) requiredMountTable() *MountTable {
UUID: sysUUID,
Accessor: sysAccessor,
BackendAwareUUID: sysBackendUUID,
Config: MountConfig{
uepoch marked this conversation as resolved.
Show resolved Hide resolved
PassthroughRequestHeaders: []string{"Accept"},
},
}

identityUUID, err := uuid.GenerateUUID()
Expand Down
Loading