Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[receiver/splunkenterprise] fixed flaky search for avg iops metrics #35082

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .chloggen/35081-fix-iops-search.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: splunkenterprise
shalper2 marked this conversation as resolved.
Show resolved Hide resolved

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix a flaky search related to iops metrics.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [35081]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
2 changes: 1 addition & 1 deletion receiver/splunkenterprisereceiver/search_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ var searchDict = map[string]string{
`SplunkSchedulerAvgRunTime`: `search=search earliest=-10m latest=now index=_internal host=* sourcetype=scheduler (status="completed" OR status="skipped" OR status="deferred" OR status="success") | eval runTime = avg(run_time) | stats avg(runTime) AS runTime by host | eval host = if(isnull(host), "(UNKNOWN)", host) | eval run_time_avg = round(runTime, 2) | fields host, run_time_avg`,
`SplunkIndexerRawWriteSeconds`: `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(write_cpu_seconds) AS "raw_data_write_seconds" by host | fields host, raw_data_write_seconds`,
`SplunkIndexerCpuSeconds`: `search=search earliest=-10m latest=now index=_internal host=* source=*metrics.log sourcetype=splunkd group=pipeline name=indexerpipe processor=indexer | eval ingest_pipe = if(isnotnull(ingest_pipe), ingest_pipe, "none") | search ingest_pipe=* | stats sum(service_cpu_seconds) AS "service_cpu_seconds" by host | fields host, service_cpu_seconds`,
`SplunkIoAvgIops`: `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| search data.mount_point="/opt/splunk/var" | stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
`SplunkIoAvgIops`: `search=search earliest=-10m latest=now index=_introspection sourcetype=splunk_resource_usage component=IOStats host=* | eval mount_point = 'data.mount_point' | eval reads_ps = 'data.reads_ps' | eval writes_ps = 'data.writes_ps' | eval interval = 'data.interval' | eval total_io = reads_ps %2B writes_ps| eval op_count = (interval * total_io)| stats avg(op_count) as iops by host| eval iops = round(iops) | fields host, iops`,
`SplunkPipelineQueues`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="dmc_group_indexer" /services/server/introspection/queues | search title=parsingQueue* OR title=aggQueue* OR title=typingQueue* OR title=indexQueue* | eval fill_perc=round(current_size_bytes / max_size_bytes * 100,2) | fields splunk_server, title, fill_perc | rex field=title %22%28%3F%3Cqueue_name%3E%5E%5Cw%2B%29%28%3F%3A%5C.%28%3F%3Cpipeline_number%3E%5Cd%2B%29%29%3F%22 | eval fill_perc = if(isnotnull(pipeline_number), "pset".pipeline_number.": ".fill_perc, fill_perc) | chart values(fill_perc) over splunk_server by queue_name | eval pset_count = mvcount(parsingQueue)] | eval host = splunk_server | stats sum(pset_count) as "pipeline_sets", sum(parsingQueue) as "parse_queue_ratio", sum(aggQueue) as "agg_queue_ratio", sum(typingQueue) as "typing_queue_ratio", sum(indexQueue) as "index_queue_ratio" by host | fields host, pipeline_sets, parse_queue_ratio, agg_queue_ratio, typing_queue_ratio, index_queue_ratio`,
`SplunkBucketsSearchableStatus`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_cluster_master splunk_server_group=* /services/cluster/master/peers | eval splunk_server = label | fields splunk_server, label, is_searchable, status, site, bucket_count, host_port_pair, last_heartbeat, replication_port, base_generation_id, title, bucket_count_by_index.* | eval is_searchable = if(is_searchable == 1 or is_searchable == "1", "Yes", "No")] | sort - last_heartbeat | search label="***" | search is_searchable="*" | search status="*" | search site="*" | eval host = splunk_server | stats values(is_searchable) as is_searchable, values(status) as status, avg(bucket_count) as bucket_count by host | fields host, is_searchable, status, bucket_count`,
`SplunkIndexesData`: `search=search earliest=-10m latest=now index=_telemetry | stats count(index) | appendcols [| rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes] | join title splunk_server type=outer [ rest splunk_server_group=dmc_group_indexer splunk_server_group="*" /services/data/indexes-extended ] | eval elapsedTime = now() - strptime(minTime,"%25Y-%25m-%25dT%25H%3A%25M%3A%25S%25z") | eval dataAge = ceiling(elapsedTime / 86400) | eval indexSizeGB = if(currentDBSizeMB >= 1 AND totalEventCount >=1, currentDBSizeMB/1024, null()) | eval maxSizeGB = maxTotalDataSizeMB / 1024 | eval sizeUsagePerc = indexSizeGB / maxSizeGB * 100 | stats dc(splunk_server) AS splunk_server_count count(indexSizeGB) as "non_empty_instances" sum(indexSizeGB) AS total_size_gb avg(indexSizeGB) as average_size_gb avg(sizeUsagePerc) as average_usage_perc median(dataAge) as median_data_age max(dataAge) as oldest_data_age latest(bucket_dirs.home.warm_bucket_count) as warm_bucket_count latest(bucket_dirs.home.hot_bucket_count) as hot_bucket_count by title, datatype | eval warm_bucket_count = if(isnotnull(warm_bucket_count), warm_bucket_count, 0)| eval hot_bucket_count = if(isnotnull(hot_bucket_count), hot_bucket_count, 0)| eval bucket_count = (warm_bucket_count %2B hot_bucket_count)| eval total_size_gb = if(isnotnull(total_size_gb), round(total_size_gb, 2), 0) | eval average_size_gb = if(isnotnull(average_size_gb), round(average_size_gb, 2), 0) | eval average_usage_perc = if(isnotnull(average_usage_perc), round(average_usage_perc, 2), 0) | eval median_data_age = if(isNum(median_data_age), median_data_age, 0) | eval oldest_data_age = if(isNum(oldest_data_age), oldest_data_age, 0) | fields title splunk_server_count non_empty_instances total_size_gb average_size_gb average_usage_perc median_data_age bucket_count warm_bucket_count hot_bucket_count`,
Expand Down
Loading