Metrics now have expiration if not updated
This commit is contained in:
@@ -2,9 +2,9 @@ package metrics
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"lostak.dev/pve-exporter/proxmox"
|
||||
)
|
||||
@@ -12,212 +12,254 @@ import (
|
||||
// PVE cluster state collector.
|
||||
type PveNodeStatusCollector struct {
|
||||
apiClient *proxmox.PveApiClient // PVE API client instance.
|
||||
registry *TTLRegistry // TTL metrics registry.
|
||||
|
||||
state *prometheus.GaugeVec // Node state prometheus gauge.
|
||||
uptime *prometheus.GaugeVec // Node uptime in seconds prometheus gauge.
|
||||
cpus *prometheus.GaugeVec // Node CPU count prometheus gauge.
|
||||
cpuUsage *prometheus.GaugeVec // Node CPU usage in percent prometheus gauge.
|
||||
memBytes *prometheus.GaugeVec // Node total RAM capacity in bytes prometheus gauge.
|
||||
memBytesUsed *prometheus.GaugeVec // Node RAM usage in bytes prometheus gauge.
|
||||
memBytesFree *prometheus.GaugeVec // Node RAM free in bytes prometheus gauge.
|
||||
ksmShared *prometheus.GaugeVec // Node Kernel samepage shared in bytes prometheus gauge.
|
||||
cgroupMode *prometheus.GaugeVec // Node CGroups mode prometheus gauge.
|
||||
load1 *prometheus.GaugeVec // Node load1 unix like (CPU seconds) prometheus gauge.
|
||||
load5 *prometheus.GaugeVec // Node load5 unix like (CPU seconds) prometheus gauge.
|
||||
load15 *prometheus.GaugeVec // Node load15 unix like (CPU seconds) prometheus gauge.
|
||||
fSFree *prometheus.GaugeVec // Node filesystem free space in bytes prometheus gauge.
|
||||
fSUsed *prometheus.GaugeVec // Node filesystem used space in bytes prometheus gauge.
|
||||
fSTotal *prometheus.GaugeVec // Node filesystem total space in bytes prometheus gauge.
|
||||
fSAvail *prometheus.GaugeVec // Node filesystem available capacity in bytes prometheus gauge.
|
||||
cpuInfo *prometheus.GaugeVec // Node CPU info prometheus gauge.
|
||||
systemInfo *prometheus.GaugeVec // Node system info prometheus gauge.
|
||||
time *prometheus.GaugeVec // Node time prometheus gauge.
|
||||
localTime *prometheus.GaugeVec // Node localtime prometheus gauge.
|
||||
state *TTLGaugeVec // Node state prometheus gauge.
|
||||
uptime *TTLGaugeVec // Node uptime in seconds prometheus gauge.
|
||||
cpus *TTLGaugeVec // Node CPU count prometheus gauge.
|
||||
cpuUsage *TTLGaugeVec // Node CPU usage in percent prometheus gauge.
|
||||
memBytes *TTLGaugeVec // Node total RAM capacity in bytes prometheus gauge.
|
||||
memBytesUsed *TTLGaugeVec // Node RAM usage in bytes prometheus gauge.
|
||||
memBytesFree *TTLGaugeVec // Node RAM free in bytes prometheus gauge.
|
||||
ksmShared *TTLGaugeVec // Node Kernel samepage shared in bytes prometheus gauge.
|
||||
cgroupMode *TTLGaugeVec // Node CGroups mode prometheus gauge.
|
||||
load1 *TTLGaugeVec // Node load1 unix like (CPU seconds) prometheus gauge.
|
||||
load5 *TTLGaugeVec // Node load5 unix like (CPU seconds) prometheus gauge.
|
||||
load15 *TTLGaugeVec // Node load15 unix like (CPU seconds) prometheus gauge.
|
||||
fSFree *TTLGaugeVec // Node filesystem free space in bytes prometheus gauge.
|
||||
fSUsed *TTLGaugeVec // Node filesystem used space in bytes prometheus gauge.
|
||||
fSTotal *TTLGaugeVec // Node filesystem total space in bytes prometheus gauge.
|
||||
fSAvail *TTLGaugeVec // Node filesystem available capacity in bytes prometheus gauge.
|
||||
cpuInfo *TTLGaugeVec // Node CPU info prometheus gauge.
|
||||
systemInfo *TTLGaugeVec // Node system info prometheus gauge.
|
||||
time *TTLGaugeVec // Node time prometheus gauge.
|
||||
localTime *TTLGaugeVec // Node localtime prometheus gauge.
|
||||
}
|
||||
|
||||
// Create new instance of PVE cluster state collector.
|
||||
func NewPveNodeStatusCollector(apiClient *proxmox.PveApiClient) *PveNodeStatusCollector {
|
||||
func NewPveNodeStatusCollector(apiClient *proxmox.PveApiClient, registry *TTLRegistry) *PveNodeStatusCollector {
|
||||
c := PveNodeStatusCollector{apiClient: apiClient}
|
||||
c.registry = registry
|
||||
|
||||
// Node state.
|
||||
c.state = promauto.NewGaugeVec(
|
||||
c.state = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_state",
|
||||
Help: "Node state.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.state)
|
||||
|
||||
// Node uptime.
|
||||
c.uptime = promauto.NewGaugeVec(
|
||||
c.uptime = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_uptime",
|
||||
Help: "Node uptime.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.uptime)
|
||||
|
||||
// Node cpu count.
|
||||
c.cpus = promauto.NewGaugeVec(
|
||||
c.cpus = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_cpu_count",
|
||||
Help: "Node CPU count.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.cpus)
|
||||
|
||||
// Node CPU usage.
|
||||
c.cpuUsage = promauto.NewGaugeVec(
|
||||
c.cpuUsage = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_cpu_usage",
|
||||
Help: "Cluster node CPU usage %.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.cpuUsage)
|
||||
|
||||
// Node memory in bytes.
|
||||
c.memBytes = promauto.NewGaugeVec(
|
||||
c.memBytes = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_memory_total_bytes",
|
||||
Help: "Node total memory in bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.memBytes)
|
||||
|
||||
// Cluster node memory used in bytes.
|
||||
c.memBytesUsed = promauto.NewGaugeVec(
|
||||
c.memBytesUsed = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_memory_used_bytes",
|
||||
Help: "Node used memory in bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.memBytesUsed)
|
||||
|
||||
// Node memory free in bytes.
|
||||
c.memBytesFree = promauto.NewGaugeVec(
|
||||
c.memBytesFree = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_memory_free_bytes",
|
||||
Help: "Node free memory in bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.memBytesFree)
|
||||
|
||||
// Kernel samepage shared in bytes.
|
||||
c.ksmShared = promauto.NewGaugeVec(
|
||||
c.ksmShared = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_ksm_bytes",
|
||||
Help: "Node kernel samepage shares in bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.ksmShared)
|
||||
|
||||
// Node memory cgroup mode.
|
||||
c.cgroupMode = promauto.NewGaugeVec(
|
||||
c.cgroupMode = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_cgroup_mode",
|
||||
Help: "Node cgroup mode.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.cgroupMode)
|
||||
|
||||
// Node load 1.
|
||||
c.load1 = promauto.NewGaugeVec(
|
||||
c.load1 = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_load1",
|
||||
Help: "Node CPU load 1 minute average.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.load1)
|
||||
|
||||
// Node load 5.
|
||||
c.load5 = promauto.NewGaugeVec(
|
||||
c.load5 = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_load5",
|
||||
Help: "Node CPU load 5 minutes average.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.load5)
|
||||
|
||||
// Cluster node load 15.
|
||||
c.load15 = promauto.NewGaugeVec(
|
||||
c.load15 = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_load15",
|
||||
Help: "Node CPU load 15 minutes average.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.load15)
|
||||
|
||||
// Node root FS free bytes.
|
||||
c.fSFree = promauto.NewGaugeVec(
|
||||
c.fSFree = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_rootfs_free_bytes",
|
||||
Help: "Node RootFS free bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.fSFree)
|
||||
|
||||
// Node root filesystem used bytes.
|
||||
c.fSUsed = promauto.NewGaugeVec(
|
||||
c.fSUsed = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_rootfs_used_bytes",
|
||||
Help: "Node root filesystem used bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.fSUsed)
|
||||
|
||||
// Node root filesystem total bytes.
|
||||
c.fSTotal = promauto.NewGaugeVec(
|
||||
c.fSTotal = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_rootfs_total_bytes",
|
||||
Help: "Node root filesystem total bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.fSTotal)
|
||||
|
||||
// Node root filesystem avail bytes.
|
||||
c.fSAvail = promauto.NewGaugeVec(
|
||||
c.fSAvail = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_rootfs_avail_bytes",
|
||||
Help: "Node root filesystem avail bytes.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.fSAvail)
|
||||
|
||||
// Node CPU info.
|
||||
c.cpuInfo = promauto.NewGaugeVec(
|
||||
c.cpuInfo = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_cpuinfo",
|
||||
Help: "Node CPU info.",
|
||||
},
|
||||
[]string{"cluster", "node", "flags", "cores", "model", "sockets", "cpus", "hvm"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.cpuInfo)
|
||||
|
||||
// Node system info metrics.
|
||||
c.systemInfo = promauto.NewGaugeVec(
|
||||
c.systemInfo = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_systeminfo",
|
||||
Help: "Node system info.",
|
||||
},
|
||||
[]string{"cluster", "node", "kversion", "pveversion", "machine", "sysname", "release"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.systemInfo)
|
||||
|
||||
// Node time info.
|
||||
c.time = promauto.NewGaugeVec(
|
||||
c.time = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_time",
|
||||
Help: "Node time.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.time)
|
||||
|
||||
// Node localtime info.
|
||||
c.localTime = promauto.NewGaugeVec(
|
||||
c.localTime = NewTTLGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "pve_node_localtime",
|
||||
Help: "Node localtime.",
|
||||
},
|
||||
[]string{"cluster", "node"},
|
||||
1*time.Minute,
|
||||
)
|
||||
c.registry.Register(c.localTime)
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -229,27 +271,6 @@ func (c *PveNodeStatusCollector) CollectMetrics() error {
|
||||
return err
|
||||
}
|
||||
|
||||
c.state.Reset()
|
||||
c.uptime.Reset()
|
||||
c.cpus.Reset()
|
||||
c.cpuUsage.Reset()
|
||||
c.memBytes.Reset()
|
||||
c.memBytesUsed.Reset()
|
||||
c.memBytesFree.Reset()
|
||||
c.ksmShared.Reset()
|
||||
c.cgroupMode.Reset()
|
||||
c.load1.Reset()
|
||||
c.load5.Reset()
|
||||
c.load15.Reset()
|
||||
c.fSFree.Reset()
|
||||
c.fSUsed.Reset()
|
||||
c.fSTotal.Reset()
|
||||
c.fSAvail.Reset()
|
||||
c.cpuInfo.Reset()
|
||||
c.systemInfo.Reset()
|
||||
c.time.Reset()
|
||||
c.localTime.Reset()
|
||||
|
||||
for _, node := range cluster.NodeStatuses {
|
||||
labels := prometheus.Labels{
|
||||
"cluster": cluster.GetClusterName(),
|
||||
|
||||
Reference in New Issue
Block a user