Skip to content

Commit

Permalink
Prometheus support on v1/sys/metrics endpoint (#5308)
Browse files Browse the repository at this point in the history
* initial commit for prometheus and sys/metrics support

* Throw an error if prometheusRetentionTime is 0,add prometheus in devmode

* return when format=prometheus is used and prom is disable

* parse prometheus_retention_time from string instead of int

* Initialize config.Telemetry if nil

* address PR issues

* add sys/metrics framework.Path in a factory

* Apply requiredMountTable entries's MountConfig to existing core table

* address pr comments

* enable prometheus sink by default

* Move Metric-related code in a separate metricsutil helper
  • Loading branch information
uepoch authored and briankassouf committed Feb 14, 2019
1 parent 9408c3b commit 5dd50ef
Show file tree
Hide file tree
Showing 14 changed files with 1,529 additions and 17 deletions.
61 changes: 46 additions & 15 deletions command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/base64"
"encoding/hex"
"fmt"
"github.com/hashicorp/vault/helper/metricsutil"
"io"
"io/ioutil"
"net"
Expand All @@ -23,6 +24,7 @@ import (
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics/circonus"
"github.com/armon/go-metrics/datadog"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -469,7 +471,8 @@ func (c *ServerCommand) Run(args []string) int {
"in a Docker container, provide the IPC_LOCK cap to the container."))
}

if err := c.setupTelemetry(config); err != nil {
metricsHelper, err := c.setupTelemetry(config)
if err != nil {
c.UI.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
return 1
}
Expand Down Expand Up @@ -563,6 +566,7 @@ func (c *ServerCommand) Run(args []string) int {
AllLoggers: allLoggers,
BuiltinRegistry: builtinplugins.Registry,
DisableKeyEncodingChecks: config.DisablePrintableCheck,
MetricsHelper: metricsHelper,
}
if c.flagDev {
coreConfig.DevToken = c.flagDevRootTokenID
Expand Down Expand Up @@ -1691,8 +1695,8 @@ func (c *ServerCommand) detectRedirect(detect physical.RedirectDetect,
return url.String(), nil
}

// setupTelemetry is used to setup the telemetry sub-systems
func (c *ServerCommand) setupTelemetry(config *server.Config) error {
// setupTelemetry is used to setup the telemetry sub-systems and returns the in-memory sink to be used in http configuration
func (c *ServerCommand) setupTelemetry(config *server.Config) (*metricsutil.MetricsHelper, error) {
/* Setup telemetry
Aggregate on 10 second intervals for 1 minute. Expose the
metrics over stderr when there is a SIGUSR1 received.
Expand All @@ -1701,21 +1705,40 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
metrics.DefaultInmemSignal(inm)

var telConfig *server.Telemetry
if config.Telemetry == nil {
telConfig = &server.Telemetry{}
} else {
if config.Telemetry != nil {
telConfig = config.Telemetry
} else {
telConfig = &server.Telemetry{}
}

metricsConf := metrics.DefaultConfig("vault")
metricsConf.EnableHostname = !telConfig.DisableHostname

// Configure the statsite sink
var fanout metrics.FanoutSink
var prometheusEnabled bool

// Configure the Prometheus sink
if telConfig.PrometheusRetentionTime != 0 {
prometheusEnabled = true
prometheusOpts := prometheus.PrometheusOpts{
Expiration: telConfig.PrometheusRetentionTime,
}

sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
return nil, err
}
fanout = append(fanout, sink)
}

metricHelper := metricsutil.NewMetricsHelper(inm, prometheusEnabled)


if telConfig.StatsiteAddr != "" {
sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand All @@ -1724,7 +1747,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
if telConfig.StatsdAddr != "" {
sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand Down Expand Up @@ -1760,7 +1783,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := circonus.NewCirconusSink(cfg)
if err != nil {
return err
return nil, err
}
sink.Start()
fanout = append(fanout, sink)
Expand All @@ -1775,21 +1798,29 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := datadog.NewDogStatsdSink(telConfig.DogStatsDAddr, metricsConf.HostName)
if err != nil {
return errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
return nil, errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
}
sink.SetTags(tags)
fanout = append(fanout, sink)
}

// Initialize the global sink
if len(fanout) > 0 {
fanout = append(fanout, inm)
metrics.NewGlobal(metricsConf, fanout)
if len(fanout) > 1 {
// Hostname enabled will create poor quality metrics name for prometheus
if !telConfig.DisableHostname {
c.UI.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.")
}
} else {
metricsConf.EnableHostname = false
metrics.NewGlobal(metricsConf, inm)
}
return nil
fanout = append(fanout, inm)
_, err := metrics.NewGlobal(metricsConf, fanout)

if err != nil {
return nil, err
}

return metricHelper, nil
}

func (c *ServerCommand) Reload(lock *sync.RWMutex, reloadFuncs *map[string][]reload.ReloadFunc, configPath []string) error {
Expand Down
25 changes: 24 additions & 1 deletion command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (
"github.com/hashicorp/vault/helper/parseutil"
)

const (
prometheusDefaultRetentionTime = 24 * time.Hour
)

// Config is the configuration for the vault server.
type Config struct {
Listeners []*Listener `hcl:"-"`
Expand Down Expand Up @@ -98,7 +102,10 @@ func DevConfig(ha, transactional bool) *Config {

EnableUI: true,

Telemetry: &Telemetry{},
Telemetry: &Telemetry{
PrometheusRetentionTime: prometheusDefaultRetentionTime,
DisableHostname: true,
},
}

switch {
Expand Down Expand Up @@ -233,6 +240,12 @@ type Telemetry struct {
// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
DogStatsDTags []string `hcl:"dogstatsd_tags"`

// Prometheus:
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
// Default: 24h
PrometheusRetentionTime time.Duration `hcl:-`
PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"`
}

func (s *Telemetry) GoString() string {
Expand Down Expand Up @@ -864,5 +877,15 @@ func parseTelemetry(result *Config, list *ast.ObjectList) error {
if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil {
return multierror.Prefix(err, "telemetry:")
}

if result.Telemetry.PrometheusRetentionTimeRaw != nil {
var err error
if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil {
return err
}
} else {
result.Telemetry.PrometheusRetentionTime = prometheusDefaultRetentionTime
}

return nil
}
104 changes: 104 additions & 0 deletions helper/metricsutil/metricsutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package metricsutil

import (
"bytes"
"encoding/json"
"fmt"
"github.com/armon/go-metrics"
"github.com/hashicorp/vault/logical"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"strings"
)

const (
OpenMetricsMIMEType = "application/openmetrics-text"
)

const (
PrometheusMetricFormat = "prometheus"
)

type MetricsHelper struct {
inMemSink *metrics.InmemSink
PrometheusEnabled bool
}

func NewMetricsHelper(inMem *metrics.InmemSink, enablePrometheus bool) *MetricsHelper{
return &MetricsHelper{inMem, enablePrometheus}
}

func FormatFromRequest(req *logical.Request) (string) {
acceptHeaders := req.Headers["Accept"]
if len(acceptHeaders) > 0 {
acceptHeader := acceptHeaders[0]
if strings.HasPrefix(acceptHeader, OpenMetricsMIMEType) {
return "prometheus"
}
}
return ""
}

func (m *MetricsHelper) ResponseForFormat(format string) (*logical.Response, error) {
switch format {
case PrometheusMetricFormat:
return m.PrometheusResponse()
case "":
return m.GenericResponse()
default:
return nil, fmt.Errorf("metric response format \"%s\" unknown", format)
}
}

func (m *MetricsHelper) PrometheusResponse() (*logical.Response, error) {
if !m.PrometheusEnabled {
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "text/plain",
logical.HTTPRawBody: "prometheus is not enabled",
logical.HTTPStatusCode: 400,
},
}, nil
}
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil && len(metricsFamilies) == 0 {
return nil, fmt.Errorf("no prometheus metrics could be decoded: %s", err)
}

// Initialize a byte buffer.
buf := &bytes.Buffer{}
defer buf.Reset()

e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
err := e.Encode(mf)
if err != nil {
return nil, fmt.Errorf("error during the encoding of metrics: %s", err)
}
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: string(expfmt.FmtText),
logical.HTTPRawBody: buf.Bytes(),
logical.HTTPStatusCode: 200,
},
}, nil
}

func (m *MetricsHelper) GenericResponse() (*logical.Response, error) {
summary, err := m.inMemSink.DisplayMetrics(nil,nil)
if err != nil {
return nil, fmt.Errorf("error while fetching the in-memory metrics: %s", err)
}
content, err := json.Marshal(summary)
if err != nil {
return nil, fmt.Errorf("error while marshalling the in-memory metrics: %s", err)
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "application/json",
logical.HTTPRawBody: content,
logical.HTTPStatusCode: 200,
},
}, nil
}
10 changes: 9 additions & 1 deletion vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"crypto/x509"
"errors"
"fmt"
"github.com/hashicorp/vault/helper/metricsutil"
"net"
"net/http"
"net/url"
Expand Down Expand Up @@ -419,7 +420,10 @@ type Core struct {

// loadCaseSensitiveIdentityStore enforces the loading of identity store
// artifacts in a case sensitive manner. To be used only in testing.
loadCaseSensitiveIdentityStore bool
loadCaseSensitiveIdentityStore bool

// Telemetry objects
metricsHelper *metricsutil.MetricsHelper
}

// CoreConfig is used to parameterize a core
Expand Down Expand Up @@ -488,6 +492,9 @@ type CoreConfig struct {
DisableKeyEncodingChecks bool

AllLoggers []log.Logger

// Telemetry objects
MetricsHelper *metricsutil.MetricsHelper
}

func (c *CoreConfig) Clone() *CoreConfig {
Expand Down Expand Up @@ -596,6 +603,7 @@ func NewCore(conf *CoreConfig) (*Core, error) {
builtinRegistry: conf.BuiltinRegistry,
neverBecomeActive: new(uint32),
clusterLeaderParams: new(atomic.Value),
metricsHelper: conf.MetricsHelper,
}

atomic.StoreUint32(c.sealed, 1)
Expand Down
14 changes: 14 additions & 0 deletions vault/logical_system.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/hashicorp/vault/helper/consts"
"github.com/hashicorp/vault/helper/identity"
"github.com/hashicorp/vault/helper/jsonutil"
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/parseutil"
"github.com/hashicorp/vault/helper/strutil"
Expand Down Expand Up @@ -145,6 +146,7 @@ func NewSystemBackend(core *Core, logger log.Logger) *SystemBackend {
b.Backend.Paths = append(b.Backend.Paths, b.capabilitiesPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.internalPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.remountPath())
b.Backend.Paths = append(b.Backend.Paths, b.metricsPath())

if core.rawEnabled {
b.Backend.Paths = append(b.Backend.Paths, &framework.Path{
Expand Down Expand Up @@ -2512,6 +2514,14 @@ func (b *SystemBackend) responseWrappingUnwrap(ctx context.Context, te *logical.
return response, nil
}

func (b *SystemBackend) handleMetrics(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
format := data.Get("format").(string)
if format == "" {
format = metricsutil.FormatFromRequest(req)
}
return b.Core.metricsHelper.ResponseForFormat(format)
}

func (b *SystemBackend) handleWrappingLookup(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
// This ordering of lookups has been validated already in the wrapping
// validation func, we're just doing this for a safety check
Expand Down Expand Up @@ -3884,4 +3894,8 @@ This path responds to the following HTTP methods.
"Information about a token's resultant ACL. Internal API; its location, inputs, and outputs may change.",
"",
},
"metrics": {
"Export the metrics aggregated for telemetry purpose.",
"",
},
}
Loading

0 comments on commit 5dd50ef

Please sign in to comment.