Metrics now have expiration if not updated

This commit is contained in:
Jan Lošťák
2025-02-22 21:24:03 +01:00
parent 2ed310eef7
commit f78df9d3e3
10 changed files with 455 additions and 232 deletions

View File

@@ -1,155 +1,184 @@
package metrics
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE container collector.
type PveContainerCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
registry *TTLRegistry // TTL metrics registry.
state *prometheus.GaugeVec // Container state prometheus gauge.
uptime *prometheus.GaugeVec // Container uptime prometheus gauge.
state *TTLGaugeVec // Container state prometheus gauge.
uptime *TTLGaugeVec // Container uptime prometheus gauge.
cpu *prometheus.GaugeVec // Container count of CPUs prometheus gauge.
cpuUsage *prometheus.GaugeVec // Container CPU usage % prometheus gauge.
cpu *TTLGaugeVec // Container count of CPUs prometheus gauge.
cpuUsage *TTLGaugeVec // Container CPU usage % prometheus gauge.
memBytes *prometheus.GaugeVec // Container memory in bytes prometheus gauge.
memBytesUsed *prometheus.GaugeVec // Container memory usage in bytes prometheus gauge.
memBytes *TTLGaugeVec // Container memory in bytes prometheus gauge.
memBytesUsed *TTLGaugeVec // Container memory usage in bytes prometheus gauge.
netReceive *prometheus.GaugeVec // Container network RX in bytes prometheus gauge.
netTransmit *prometheus.GaugeVec // Container network TX in bytes prometheus gauge.
netReceive *TTLGaugeVec // Container network RX in bytes prometheus gauge.
netTransmit *TTLGaugeVec // Container network TX in bytes prometheus gauge.
diskWrite *prometheus.GaugeVec // Container disk written in bytes prometheus gauge.
diskRead *prometheus.GaugeVec // Container disk read in bytes prometheus gauge.
diskWrite *TTLGaugeVec // Container disk written in bytes prometheus gauge.
diskRead *TTLGaugeVec // Container disk read in bytes prometheus gauge.
disk *prometheus.GaugeVec // Container disk space usage in bytes prometheus gauge.
diskMax *prometheus.GaugeVec // Container disk size in bytes prometheus gauge.
swap *prometheus.GaugeVec // Container swap usage in bytes prometheus gauge.
disk *TTLGaugeVec // Container disk space usage in bytes prometheus gauge.
diskMax *TTLGaugeVec // Container disk size in bytes prometheus gauge.
swap *TTLGaugeVec // Container swap usage in bytes prometheus gauge.
}
// Create new instance of PVE container collector.
func NewPveContainerCollector(apiClient *proxmox.PveApiClient) *PveContainerCollector {
func NewPveContainerCollector(apiClient *proxmox.PveApiClient, registry *TTLRegistry) *PveContainerCollector {
c := PveContainerCollector{apiClient: apiClient}
c.registry = registry
// Container state.
c.state = promauto.NewGaugeVec(
c.state = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_state",
Help: "Container state.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.state)
// Container uptime.
c.uptime = promauto.NewGaugeVec(
c.uptime = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_uptime",
Help: "Container uptime.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.uptime)
// Container CPU count.
c.cpu = promauto.NewGaugeVec(
c.cpu = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_cpu_count",
Help: "Container CPU count.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.cpu)
// Container CPU usage.
c.cpuUsage = promauto.NewGaugeVec(
c.cpuUsage = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_cpu_usage",
Help: "Container CPU usage.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.cpuUsage)
// Container memory total.
c.memBytes = promauto.NewGaugeVec(
c.memBytes = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_mem_total_bytes",
Help: "Container total memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.memBytes)
// Container memory usage.
c.memBytesUsed = promauto.NewGaugeVec(
c.memBytesUsed = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_mem_used_bytes",
Help: "Container used memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.memBytesUsed)
// Container network RX.
c.netReceive = promauto.NewGaugeVec(
c.netReceive = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_network_in_bytes",
Help: "Container network RX bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.netReceive)
// Container network TX.
c.netTransmit = promauto.NewGaugeVec(
c.netTransmit = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_network_out_bytes",
Help: "Container network TX bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.netTransmit)
// Container disk written.
c.diskWrite = promauto.NewGaugeVec(
c.diskWrite = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_wr_bytes",
Help: "Container disk written bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.diskWrite)
// Container disk read.
c.diskRead = promauto.NewGaugeVec(
c.diskRead = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_rd_bytes",
Help: "Container disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.diskRead)
// Container disk size.
c.disk = promauto.NewGaugeVec(
c.disk = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_usage_bytes",
Help: "Container disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.disk)
// Container disk size.
c.diskMax = promauto.NewGaugeVec(
c.diskMax = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_size_bytes",
Help: "Container disk size bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.diskMax)
// Container swap usage.
c.swap = promauto.NewGaugeVec(
c.swap = NewTTLGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_swap_used_bytes",
Help: "Container swap usage bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
1*time.Minute,
)
c.registry.Register(c.swap)
return &c
}
@@ -161,20 +190,6 @@ func (c *PveContainerCollector) CollectMetrics() error {
return err
}
c.state.Reset()
c.cpu.Reset()
c.memBytes.Reset()
c.diskMax.Reset()
c.uptime.Reset()
c.cpuUsage.Reset()
c.memBytesUsed.Reset()
c.netReceive.Reset()
c.netTransmit.Reset()
c.diskRead.Reset()
c.diskWrite.Reset()
c.disk.Reset()
c.swap.Reset()
for _, node := range cluster.NodeStatuses {
containers, err := c.apiClient.GetNodeContainerList(node.Name)
if err != nil {