Skip to content

Commit

Permalink
Add support for native histograms in OM parser (#1040)
Browse files Browse the repository at this point in the history
* Start on native histogram parser
* Fix regex for nh sample
* Get nh sample appended
* Complete parsing for simple native histogram
* Add parsing for native histograms with labels, fix linting
* Mitigate type and style errors
* Add test for parsing coexisting native and classic hist with simple label set
* Solve error in Python 3.9 tests
* Add test for native + classic histograms with more than a label set and adapt logic accordigly
* Separate native histogram from value field, improve conditional/try blocks
* Clean up debug lines, add warnings, delete unnecessary lines

Signed-off-by: Arianna Vespri <arianna.vespri@yahoo.it>
  • Loading branch information
vesari committed Sep 20, 2024
1 parent 3b183b4 commit d7c9cd8
Show file tree
Hide file tree
Showing 8 changed files with 245 additions and 37 deletions.
4 changes: 3 additions & 1 deletion prometheus_client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
SummaryMetricFamily, UnknownMetricFamily, UntypedMetricFamily,
)
from .registry import CollectorRegistry, REGISTRY
from .samples import Exemplar, Sample, Timestamp
from .samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp

__all__ = (
'BucketSpan',
'CollectorRegistry',
'Counter',
'CounterMetricFamily',
Expand All @@ -21,6 +22,7 @@
'Info',
'InfoMetricFamily',
'Metric',
'NativeHistogram',
'REGISTRY',
'Sample',
'StateSetMetricFamily',
Expand Down
8 changes: 4 additions & 4 deletions prometheus_client/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ def describe(self) -> Iterable[Metric]:

def collect(self) -> Iterable[Metric]:
metric = self._get_metric()
for suffix, labels, value, timestamp, exemplar in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar)
for suffix, labels, value, timestamp, exemplar, native_histogram_value in self._samples():
metric.add_sample(self._name + suffix, labels, value, timestamp, exemplar, native_histogram_value)
return [metric]

def __str__(self) -> str:
Expand Down Expand Up @@ -246,8 +246,8 @@ def _multi_samples(self) -> Iterable[Sample]:
metrics = self._metrics.copy()
for labels, metric in metrics.items():
series_labels = list(zip(self._labelnames, labels))
for suffix, sample_labels, value, timestamp, exemplar in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar)
for suffix, sample_labels, value, timestamp, exemplar, native_histogram_value in metric._samples():
yield Sample(suffix, dict(series_labels + list(sample_labels.items())), value, timestamp, exemplar, native_histogram_value)

def _child_samples(self) -> Iterable[Sample]: # pragma: no cover
raise NotImplementedError('_child_samples() must be implemented by %r' % self)
Expand Down
7 changes: 3 additions & 4 deletions prometheus_client/metrics_core.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from typing import Dict, List, Optional, Sequence, Tuple, Union

from .samples import Exemplar, Sample, Timestamp
from .samples import Exemplar, NativeHistogram, Sample, Timestamp

METRIC_TYPES = (
'counter', 'gauge', 'summary', 'histogram',
Expand Down Expand Up @@ -36,11 +36,11 @@ def __init__(self, name: str, documentation: str, typ: str, unit: str = ''):
self.type: str = typ
self.samples: List[Sample] = []

def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None) -> None:
def add_sample(self, name: str, labels: Dict[str, str], value: float, timestamp: Optional[Union[Timestamp, float]] = None, exemplar: Optional[Exemplar] = None, native_histogram: Optional[NativeHistogram] = None) -> None:
"""Add a sample to the metric.
Internal-only, do not use."""
self.samples.append(Sample(name, labels, value, timestamp, exemplar))
self.samples.append(Sample(name, labels, value, timestamp, exemplar, native_histogram))

def __eq__(self, other: object) -> bool:
return (isinstance(other, Metric)
Expand Down Expand Up @@ -284,7 +284,6 @@ def add_metric(self,
Sample(self.name + '_sum', dict(zip(self._labelnames, labels)), sum_value, timestamp))



class GaugeHistogramMetricFamily(Metric):
"""A single gauge histogram and its samples.
Expand Down
2 changes: 1 addition & 1 deletion prometheus_client/multiprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def _accumulate_metrics(metrics, accumulate):
buckets = defaultdict(lambda: defaultdict(float))
samples_setdefault = samples.setdefault
for s in metric.samples:
name, labels, value, timestamp, exemplar = s
name, labels, value, timestamp, exemplar, native_histogram_value = s
if metric.type == 'gauge':
without_pid_key = (name, tuple(l for l in labels if l[0] != 'pid'))
if metric._multiprocess_mode in ('min', 'livemin'):
Expand Down
4 changes: 3 additions & 1 deletion prometheus_client/openmetrics/exposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
def _is_valid_exemplar_metric(metric, sample):
if metric.type == 'counter' and sample.name.endswith('_total'):
return True
if metric.type in ('histogram', 'gaugehistogram') and sample.name.endswith('_bucket'):
if metric.type in ('gaugehistogram') and sample.name.endswith('_bucket'):
return True
if metric.type in ('histogram') and sample.name.endswith('_bucket') or sample.name == metric.name:
return True
return False

Expand Down
156 changes: 133 additions & 23 deletions prometheus_client/openmetrics/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re

from ..metrics_core import Metric, METRIC_LABEL_NAME_RE
from ..samples import Exemplar, Sample, Timestamp
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
from ..utils import floatToGoString


Expand Down Expand Up @@ -364,6 +364,99 @@ def _parse_remaining_text(text):
return val, ts, exemplar


def _parse_nh_sample(text, suffixes):
labels_start = text.find("{")
# check if it's a native histogram with labels
re_nh_without_labels = re.compile(r'^[^{} ]+ {[^{}]+}$')
re_nh_with_labels = re.compile(r'[^{} ]+{[^{}]+} {[^{}]+}$')
if re_nh_with_labels.match(text):
nh_value_start = text.rindex("{")
labels_end = nh_value_start - 2
labelstext = text[labels_start + 1:labels_end]
labels = _parse_labels(labelstext)
name_end = labels_start
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
nh_value = text[nh_value_start:]
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, labels, None, None, None, nat_hist_value)
# check if it's a native histogram
if re_nh_without_labels.match(text):
nh_value_start = labels_start
nh_value = text[nh_value_start:]
name_end = nh_value_start - 1
name = text[:name_end]
if name.endswith(suffixes):
raise ValueError("the sample name of a native histogram should have no suffixes", name)
nat_hist_value = _parse_nh_struct(nh_value)
return Sample(name, None, None, None, None, nat_hist_value)
else:
# it's not a native histogram
return


def _parse_nh_struct(text):
pattern = r'(\w+):\s*([^,}]+)'

re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+,\d+:\d+)\]')
re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')

items = dict(re.findall(pattern, text))
spans = dict(re_spans.findall(text))
deltas = dict(re_deltas.findall(text))

count_value = int(items['count'])
sum_value = int(items['sum'])
schema = int(items['schema'])
zero_threshold = float(items['zero_threshold'])
zero_count = int(items['zero_count'])

try:
pos_spans_text = spans['positive_spans']
elems = pos_spans_text.split(',')
arg1 = [int(x) for x in elems[0].split(':')]
arg2 = [int(x) for x in elems[1].split(':')]
pos_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
except KeyError:
pos_spans = None

try:
neg_spans_text = spans['negative_spans']
elems = neg_spans_text.split(',')
arg1 = [int(x) for x in elems[0].split(':')]
arg2 = [int(x) for x in elems[1].split(':')]
neg_spans = (BucketSpan(arg1[0], arg1[1]), BucketSpan(arg2[0], arg2[1]))
except KeyError:
neg_spans = None

try:
pos_deltas_text = deltas['positive_deltas']
elems = pos_deltas_text.split(',')
pos_deltas = tuple([int(x) for x in elems])
except KeyError:
pos_deltas = None

try:
neg_deltas_text = deltas['negative_deltas']
elems = neg_deltas_text.split(',')
neg_deltas = tuple([int(x) for x in elems])
except KeyError:
neg_deltas = None

return NativeHistogram(
count_value=count_value,
sum_value=sum_value,
schema=schema,
zero_threshold=zero_threshold,
zero_count=zero_count,
pos_spans=pos_spans,
neg_spans=neg_spans,
pos_deltas=pos_deltas,
neg_deltas=neg_deltas
)


def _group_for_sample(sample, name, typ):
if typ == 'info':
# We can't distinguish between groups for info metrics.
Expand Down Expand Up @@ -406,6 +499,8 @@ def do_checks():
for s in samples:
suffix = s.name[len(name):]
g = _group_for_sample(s, name, 'histogram')
if len(suffix) == 0:
continue
if g != group or s.timestamp != timestamp:
if group is not None:
do_checks()
Expand Down Expand Up @@ -486,6 +581,8 @@ def build_metric(name, documentation, typ, unit, samples):
metric.samples = samples
return metric

is_nh = False
typ = None
for line in fd:
if line[-1] == '\n':
line = line[:-1]
Expand Down Expand Up @@ -518,7 +615,7 @@ def build_metric(name, documentation, typ, unit, samples):
group_timestamp_samples = set()
samples = []
allowed_names = [parts[2]]

if parts[1] == 'HELP':
if documentation is not None:
raise ValueError("More than one HELP for metric: " + line)
Expand All @@ -537,8 +634,18 @@ def build_metric(name, documentation, typ, unit, samples):
else:
raise ValueError("Invalid line: " + line)
else:
sample = _parse_sample(line)
if sample.name not in allowed_names:
if typ == 'histogram':
# set to true to account for native histograms naming exceptions/sanitizing differences
is_nh = True
sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
# It's not a native histogram
if sample is None:
is_nh = False
sample = _parse_sample(line)
else:
is_nh = False
sample = _parse_sample(line)
if sample.name not in allowed_names and not is_nh:
if name is not None:
yield build_metric(name, documentation, typ, unit, samples)
# Start an unknown metric.
Expand Down Expand Up @@ -570,26 +677,29 @@ def build_metric(name, documentation, typ, unit, samples):
or _isUncanonicalNumber(sample.labels['quantile']))):
raise ValueError("Invalid quantile label: " + line)

g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
if not is_nh:
g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
if group is not None and g != group and g in seen_groups:
raise ValueError("Invalid metric grouping: " + line)
if group is not None and g == group:
if (sample.timestamp is None) != (group_timestamp is None):
raise ValueError("Mix of timestamp presence within a group: " + line)
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
raise ValueError("Timestamps went backwards within a group: " + line)
else:
group_timestamp_samples = set()

series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)

group = g
group_timestamp = sample.timestamp
seen_groups.add(g)
else:
group_timestamp_samples = set()

series_id = (sample.name, tuple(sorted(sample.labels.items())))
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
# Not a duplicate due to timestamp truncation.
samples.append(sample)
group_timestamp_samples.add(series_id)

group = g
group_timestamp = sample.timestamp
seen_groups.add(g)

if typ == 'stateset' and sample.value not in [0, 1]:
raise ValueError("Stateset samples can only have values zero and one: " + line)
Expand All @@ -606,7 +716,7 @@ def build_metric(name, documentation, typ, unit, samples):
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
or (typ in ['counter'] and sample.name.endswith('_total'))):
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)

if name is not None:
yield build_metric(name, documentation, typ, unit, samples)

Expand Down
22 changes: 21 additions & 1 deletion prometheus_client/samples.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, NamedTuple, Optional, Union
from typing import Dict, NamedTuple, Optional, Sequence, Tuple, Union


class Timestamp:
Expand Down Expand Up @@ -34,6 +34,25 @@ def __lt__(self, other: "Timestamp") -> bool:
return self.nsec < other.nsec if self.sec == other.sec else self.sec < other.sec


# BucketSpan is experimental and subject to change at any time.
class BucketSpan(NamedTuple):
offset: int
length: int


# NativeHistogram is experimental and subject to change at any time.
class NativeHistogram(NamedTuple):
count_value: float
sum_value: float
schema: int
zero_threshold: float
zero_count: float
pos_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
neg_spans: Optional[Tuple[BucketSpan, BucketSpan]] = None
pos_deltas: Optional[Sequence[int]] = None
neg_deltas: Optional[Sequence[int]] = None


# Timestamp and exemplar are optional.
# Value can be an int or a float.
# Timestamp can be a float containing a unixtime in seconds,
Expand All @@ -51,3 +70,4 @@ class Sample(NamedTuple):
value: float
timestamp: Optional[Union[float, Timestamp]] = None
exemplar: Optional[Exemplar] = None
native_histogram: Optional[NativeHistogram] = None
Loading

0 comments on commit d7c9cd8

Please sign in to comment.