Metrics now have expiration if not updated

This commit is contained in:
Jan Lošťák
2025-02-22 21:24:03 +01:00
parent 2ed310eef7
commit f78df9d3e3
10 changed files with 455 additions and 232 deletions

139
metrics/pve_ttl_metrics.go Normal file
View File

@@ -0,0 +1,139 @@
package metrics
import (
"fmt"
"sort"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
// TTLMetric is an interface for metrics that support time-to-live cleanup.
// Any metric that implements Cleanup() can be registered in the TTLRegistry.
type TTLMetric interface {
// Cleanup removes stale metric label sets that have exceeded their TTL.
Cleanup()
}
// TTLGaugeVec wraps a Prometheus GaugeVec and tracks the last update time
// for each set of labels. When a set of labels is not updated within the TTL,
// it is automatically removed from the underlying GaugeVec.
type TTLGaugeVec struct {
gaugeVec *prometheus.GaugeVec // Underlying Prometheus GaugeVec.
ttl time.Duration // Duration after which an unused label set is considered stale.
lastUpdate sync.Map // Map storing last update time for each label set (key is a sorted labels string).
}
// NewTTLGaugeVec creates a new TTLGaugeVec using the provided GaugeOpts, label names, and TTL.
// The underlying GaugeVec is registered using promauto.
func NewTTLGaugeVec(opts prometheus.GaugeOpts, labelNames []string, ttl time.Duration) *TTLGaugeVec {
return &TTLGaugeVec{
gaugeVec: promauto.NewGaugeVec(opts, labelNames),
ttl: ttl,
}
}
// With returns the gauge for the given label set and records the current time
// as the last update for those labels.
func (t *TTLGaugeVec) With(labels prometheus.Labels) prometheus.Gauge {
key := labelsKey(labels)
t.lastUpdate.Store(key, time.Now())
return t.gaugeVec.With(labels)
}
// Delete removes the metric associated with the given label set from both the underlying
// GaugeVec and the lastUpdate tracking map. It returns true if the deletion was successful.
func (t *TTLGaugeVec) Delete(labels prometheus.Labels) bool {
key := labelsKey(labels)
t.lastUpdate.Delete(key)
return t.gaugeVec.Delete(labels)
}
// Cleanup iterates over all tracked label sets and deletes those that have not been updated
// within the TTL duration.
func (t *TTLGaugeVec) Cleanup() {
now := time.Now()
t.lastUpdate.Range(func(key, value interface{}) bool {
if last, ok := value.(time.Time); ok {
if now.Sub(last) > t.ttl {
labels := parseLabels(key.(string))
t.gaugeVec.Delete(labels)
t.lastUpdate.Delete(key)
}
}
return true
})
}
// labelsKey creates a deterministic key from a Prometheus labels map.
// It sorts the keys and concatenates them in the format "key=value" separated by commas.
func labelsKey(labels prometheus.Labels) string {
var keys []string
for k := range labels {
keys = append(keys, k)
}
sort.Strings(keys)
var parts []string
for _, k := range keys {
parts = append(parts, fmt.Sprintf("%s=%s", k, labels[k]))
}
return strings.Join(parts, ",")
}
// parseLabels converts a sorted key string back into a Prometheus labels map.
// The key is expected to be in the format produced by labelsKey.
func parseLabels(key string) prometheus.Labels {
labels := prometheus.Labels{}
parts := strings.Split(key, ",")
for _, part := range parts {
kv := strings.SplitN(part, "=", 2)
if len(kv) == 2 {
labels[kv[0]] = kv[1]
}
}
return labels
}
// TTLRegistry manages multiple TTLMetric instances and periodically cleans them up.
type TTLRegistry struct {
mu sync.RWMutex
metrics []TTLMetric
}
// NewTTLRegistry creates and returns a new TTLRegistry.
func NewTTLRegistry() *TTLRegistry {
return &TTLRegistry{
metrics: make([]TTLMetric, 0),
}
}
// Register adds a TTLMetric to the registry for periodic cleanup.
func (r *TTLRegistry) Register(metric TTLMetric) {
r.mu.Lock()
defer r.mu.Unlock()
r.metrics = append(r.metrics, metric)
}
// Cleanup calls the Cleanup method on each registered TTLMetric.
func (r *TTLRegistry) Cleanup() {
r.mu.RLock()
defer r.mu.RUnlock()
for _, metric := range r.metrics {
metric.Cleanup()
}
}
// StartCleanupLoop starts a background goroutine that periodically cleans up stale metrics.
// The cleanup is performed at the specified interval.
func (r *TTLRegistry) StartCleanupLoop(interval time.Duration) {
go func() {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for range ticker.C {
r.Cleanup()
}
}()
}