Files
pve-exporter/metrics/pve_node_status_collector.go
Jan Lošťák a1ab163804 Initial commit
2024-05-27 21:27:07 +02:00

326 lines
9.0 KiB
Go

package metrics
import (
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
log "github.com/sirupsen/logrus"
"lostak.dev/pve-exporter/proxmox"
)
// PVE cluster state collector.
type PveNodeStatusCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // Node state prometheus gauge.
uptime *prometheus.GaugeVec // Node uptime in seconds prometheus gauge.
cpus *prometheus.GaugeVec // Node CPU count prometheus gauge.
cpuUsage *prometheus.GaugeVec // Node CPU usage in percent prometheus gauge.
memBytes *prometheus.GaugeVec // Node total RAM capacity in bytes prometheus gauge.
memBytesUsed *prometheus.GaugeVec // Node RAM usage in bytes prometheus gauge.
memBytesFree *prometheus.GaugeVec // Node RAM free in bytes prometheus gauge.
ksmShared *prometheus.GaugeVec // Node Kernel samepage shared in bytes prometheus gauge.
cgroupMode *prometheus.GaugeVec // Node CGroups mode prometheus gauge.
load1 *prometheus.GaugeVec // Node load1 unix like (CPU seconds) prometheus gauge.
load5 *prometheus.GaugeVec // Node load5 unix like (CPU seconds) prometheus gauge.
load15 *prometheus.GaugeVec // Node load15 unix like (CPU seconds) prometheus gauge.
fSFree *prometheus.GaugeVec // Node filesystem free space in bytes prometheus gauge.
fSUsed *prometheus.GaugeVec // Node filesystem used space in bytes prometheus gauge.
fSTotal *prometheus.GaugeVec // Node filesystem total space in bytes prometheus gauge.
fSAvail *prometheus.GaugeVec // Node filesystem available capacity in bytes prometheus gauge.
cpuInfo *prometheus.GaugeVec // Node CPU info prometheus gauge.
systemInfo *prometheus.GaugeVec // Node system info prometheus gauge.
time *prometheus.GaugeVec // Node time prometheus gauge.
localTime *prometheus.GaugeVec // Node localtime prometheus gauge.
}
// Create new instance of PVE cluster state collector.
func NewPveNodeStatusCollector(apiClient *proxmox.PveApiClient) *PveNodeStatusCollector {
c := PveNodeStatusCollector{apiClient: apiClient}
// Node state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_state",
Help: "Node state.",
},
[]string{"cluster", "node"},
)
// Node uptime.
c.uptime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_uptime",
Help: "Node uptime.",
},
[]string{"cluster", "node"},
)
// Node cpu count.
c.cpus = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpu_count",
Help: "Node CPU count.",
},
[]string{"cluster", "node"},
)
// Node CPU usage.
c.cpuUsage = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpu_usage",
Help: "Cluster node CPU usage %.",
},
[]string{"cluster", "node"},
)
// Node memory in bytes.
c.memBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_total_bytes",
Help: "Node total memory in bytes.",
},
[]string{"cluster", "node"},
)
// Cluster node memory used in bytes.
c.memBytesUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_used_bytes",
Help: "Node used memory in bytes.",
},
[]string{"cluster", "node"},
)
// Node memory free in bytes.
c.memBytesFree = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_free_bytes",
Help: "Node free memory in bytes.",
},
[]string{"cluster", "node"},
)
// Kernel samepage shared in bytes.
c.ksmShared = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_ksm_bytes",
Help: "Node kernel samepage shares in bytes.",
},
[]string{"cluster", "node"},
)
// Node memory cgroup mode.
c.cgroupMode = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cgroup_mode",
Help: "Node cgroup mode.",
},
[]string{"cluster", "node"},
)
// Node load 1.
c.load1 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load1",
Help: "Node CPU load 1 minute average.",
},
[]string{"cluster", "node"},
)
// Node load 5.
c.load5 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load5",
Help: "Node CPU load 5 minutes average.",
},
[]string{"cluster", "node"},
)
// Cluster node load 15.
c.load15 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load15",
Help: "Node CPU load 15 minutes average.",
},
[]string{"cluster", "node"},
)
// Node root FS free bytes.
c.fSFree = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_free_bytes",
Help: "Node RootFS free bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem used bytes.
c.fSUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_used_bytes",
Help: "Node root filesystem used bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem total bytes.
c.fSTotal = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_total_bytes",
Help: "Node root filesystem total bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem avail bytes.
c.fSAvail = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_avail_bytes",
Help: "Node root filesystem avail bytes.",
},
[]string{"cluster", "node"},
)
// Node CPU info.
c.cpuInfo = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpuinfo",
Help: "Node CPU info.",
},
[]string{"cluster", "node", "flags", "cores", "model", "sockets", "cpus", "hvm"},
)
// Node system info metrics.
c.systemInfo = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_systeminfo",
Help: "Node system info.",
},
[]string{"cluster", "node", "kversion", "pveversion", "machine", "sysname", "release"},
)
// Node time info.
c.time = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_time",
Help: "Node time.",
},
[]string{"cluster", "node"},
)
// Node localtime info.
c.localTime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_localtime",
Help: "Node localtime.",
},
[]string{"cluster", "node"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveNodeStatusCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
}
time, err := c.apiClient.GetNodeTime(node.Name)
if err != nil {
return err
} else {
c.time.With(labels).Set(float64(time.Time))
c.localTime.With(labels).Set(float64(time.LocalTime))
}
status, err := c.apiClient.GetNodeStatusDetail(node.Name)
if err != nil {
return err
} else {
c.state.With(labels).Set(float64(node.Online))
c.uptime.With(labels).Set(float64(status.Uptime))
c.memBytes.With(labels).Set(float64(status.Memory.Total))
c.memBytesUsed.With(labels).Set(float64(status.Memory.Used))
c.memBytesFree.With(labels).Set(float64(status.Memory.Free))
c.ksmShared.With(labels).Set(float64(status.Ksm.Shared))
c.fSFree.With(labels).Set(float64(status.Rootfs.Free))
c.fSUsed.With(labels).Set(float64(status.Rootfs.Used))
c.fSTotal.With(labels).Set(float64(status.Rootfs.Total))
c.fSAvail.With(labels).Set(float64(status.Rootfs.Avail))
// CPU load avg.
if len(status.LoadAvg) > 0 {
// Node load 1 metrics.
f, err := strconv.ParseFloat(status.LoadAvg[0], 64)
if err != nil {
log.Errorf("Unable to parse load1. Error: %s.", err)
} else {
c.load1.With(labels).Set(f)
}
// Node load 5 metrics.
f, err = strconv.ParseFloat(status.LoadAvg[1], 64)
if err != nil {
log.Errorf("Unable to parse load5. Error: %s.", err)
} else {
c.load5.With(labels).Set(f)
}
// Node load 15 metrics.
f, err = strconv.ParseFloat(status.LoadAvg[2], 64)
if err != nil {
log.Errorf("Unable to parse load15. Error: %s.", err)
} else {
c.load15.With(labels).Set(f)
}
} else {
log.Error("CPU load stats are empty.")
}
// Node CPU info.
cpuLabels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"flags": status.CPUInfo.Flags,
"cores": strconv.Itoa(status.CPUInfo.Cores),
"model": status.CPUInfo.Model,
"sockets": strconv.Itoa(status.CPUInfo.Sockets),
"cpus": strconv.Itoa(status.CPUInfo.CPUs),
"hvm": status.CPUInfo.HVM,
}
c.cpuInfo.With(cpuLabels).Set(1)
// Node system info.
sysLabels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"kversion": status.Kversion,
"pveversion": status.PveVersion,
"machine": status.CurrentKernel.Machine,
"sysname": status.CurrentKernel.Sysname,
"release": status.CurrentKernel.Release,
}
c.systemInfo.With(sysLabels).Set(1)
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveNodeStatusCollector) GetName() string {
return "Node State"
}