package metrics import ( "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" log "github.com/sirupsen/logrus" "lostak.dev/pve-exporter/proxmox" ) // PVE cluster state collector. type PveNodeStatusCollector struct { apiClient *proxmox.PveApiClient // PVE API client instance. state *prometheus.GaugeVec // Node state prometheus gauge. uptime *prometheus.GaugeVec // Node uptime in seconds prometheus gauge. cpus *prometheus.GaugeVec // Node CPU count prometheus gauge. cpuUsage *prometheus.GaugeVec // Node CPU usage in percent prometheus gauge. memBytes *prometheus.GaugeVec // Node total RAM capacity in bytes prometheus gauge. memBytesUsed *prometheus.GaugeVec // Node RAM usage in bytes prometheus gauge. memBytesFree *prometheus.GaugeVec // Node RAM free in bytes prometheus gauge. ksmShared *prometheus.GaugeVec // Node Kernel samepage shared in bytes prometheus gauge. cgroupMode *prometheus.GaugeVec // Node CGroups mode prometheus gauge. load1 *prometheus.GaugeVec // Node load1 unix like (CPU seconds) prometheus gauge. load5 *prometheus.GaugeVec // Node load5 unix like (CPU seconds) prometheus gauge. load15 *prometheus.GaugeVec // Node load15 unix like (CPU seconds) prometheus gauge. fSFree *prometheus.GaugeVec // Node filesystem free space in bytes prometheus gauge. fSUsed *prometheus.GaugeVec // Node filesystem used space in bytes prometheus gauge. fSTotal *prometheus.GaugeVec // Node filesystem total space in bytes prometheus gauge. fSAvail *prometheus.GaugeVec // Node filesystem available capacity in bytes prometheus gauge. cpuInfo *prometheus.GaugeVec // Node CPU info prometheus gauge. systemInfo *prometheus.GaugeVec // Node system info prometheus gauge. time *prometheus.GaugeVec // Node time prometheus gauge. localTime *prometheus.GaugeVec // Node localtime prometheus gauge. } // Create new instance of PVE cluster state collector. func NewPveNodeStatusCollector(apiClient *proxmox.PveApiClient) *PveNodeStatusCollector { c := PveNodeStatusCollector{apiClient: apiClient} // Node state. c.state = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_state", Help: "Node state.", }, []string{"cluster", "node"}, ) // Node uptime. c.uptime = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_uptime", Help: "Node uptime.", }, []string{"cluster", "node"}, ) // Node cpu count. c.cpus = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_cpu_count", Help: "Node CPU count.", }, []string{"cluster", "node"}, ) // Node CPU usage. c.cpuUsage = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_cpu_usage", Help: "Cluster node CPU usage %.", }, []string{"cluster", "node"}, ) // Node memory in bytes. c.memBytes = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_memory_total_bytes", Help: "Node total memory in bytes.", }, []string{"cluster", "node"}, ) // Cluster node memory used in bytes. c.memBytesUsed = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_memory_used_bytes", Help: "Node used memory in bytes.", }, []string{"cluster", "node"}, ) // Node memory free in bytes. c.memBytesFree = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_memory_free_bytes", Help: "Node free memory in bytes.", }, []string{"cluster", "node"}, ) // Kernel samepage shared in bytes. c.ksmShared = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_ksm_bytes", Help: "Node kernel samepage shares in bytes.", }, []string{"cluster", "node"}, ) // Node memory cgroup mode. c.cgroupMode = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_cgroup_mode", Help: "Node cgroup mode.", }, []string{"cluster", "node"}, ) // Node load 1. c.load1 = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_load1", Help: "Node CPU load 1 minute average.", }, []string{"cluster", "node"}, ) // Node load 5. c.load5 = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_load5", Help: "Node CPU load 5 minutes average.", }, []string{"cluster", "node"}, ) // Cluster node load 15. c.load15 = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_load15", Help: "Node CPU load 15 minutes average.", }, []string{"cluster", "node"}, ) // Node root FS free bytes. c.fSFree = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_rootfs_free_bytes", Help: "Node RootFS free bytes.", }, []string{"cluster", "node"}, ) // Node root filesystem used bytes. c.fSUsed = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_rootfs_used_bytes", Help: "Node root filesystem used bytes.", }, []string{"cluster", "node"}, ) // Node root filesystem total bytes. c.fSTotal = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_rootfs_total_bytes", Help: "Node root filesystem total bytes.", }, []string{"cluster", "node"}, ) // Node root filesystem avail bytes. c.fSAvail = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_rootfs_avail_bytes", Help: "Node root filesystem avail bytes.", }, []string{"cluster", "node"}, ) // Node CPU info. c.cpuInfo = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_cpuinfo", Help: "Node CPU info.", }, []string{"cluster", "node", "flags", "cores", "model", "sockets", "cpus", "hvm"}, ) // Node system info metrics. c.systemInfo = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_systeminfo", Help: "Node system info.", }, []string{"cluster", "node", "kversion", "pveversion", "machine", "sysname", "release"}, ) // Node time info. c.time = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_time", Help: "Node time.", }, []string{"cluster", "node"}, ) // Node localtime info. c.localTime = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_node_localtime", Help: "Node localtime.", }, []string{"cluster", "node"}, ) return &c } // PveMetricsCollector interface implementation. func (c *PveNodeStatusCollector) CollectMetrics() error { cluster, err := c.apiClient.GetClusterStatus() if err != nil { return err } c.state.Reset() c.uptime.Reset() c.cpus.Reset() c.cpuUsage.Reset() c.memBytes.Reset() c.memBytesUsed.Reset() c.memBytesFree.Reset() c.ksmShared.Reset() c.cgroupMode.Reset() c.load1.Reset() c.load5.Reset() c.load15.Reset() c.fSFree.Reset() c.fSUsed.Reset() c.fSTotal.Reset() c.fSAvail.Reset() c.cpuInfo.Reset() c.systemInfo.Reset() c.time.Reset() c.localTime.Reset() for _, node := range cluster.NodeStatuses { labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, } time, err := c.apiClient.GetNodeTime(node.Name) if err != nil { return err } else { c.time.With(labels).Set(float64(time.Time)) c.localTime.With(labels).Set(float64(time.LocalTime)) } status, err := c.apiClient.GetNodeStatusDetail(node.Name) if err != nil { return err } else { c.state.With(labels).Set(float64(node.Online)) c.cpus.With(labels).Set(float64(status.CPUInfo.CPUs)) c.cpuUsage.With(labels).Set(float64(status.CPU)) c.uptime.With(labels).Set(float64(status.Uptime)) c.memBytes.With(labels).Set(float64(status.Memory.Total)) c.memBytesUsed.With(labels).Set(float64(status.Memory.Used)) c.memBytesFree.With(labels).Set(float64(status.Memory.Free)) c.ksmShared.With(labels).Set(float64(status.Ksm.Shared)) c.fSFree.With(labels).Set(float64(status.Rootfs.Free)) c.fSUsed.With(labels).Set(float64(status.Rootfs.Used)) c.fSTotal.With(labels).Set(float64(status.Rootfs.Total)) c.fSAvail.With(labels).Set(float64(status.Rootfs.Avail)) // CPU load avg. if len(status.LoadAvg) > 0 { // Node load 1 metrics. f, err := strconv.ParseFloat(status.LoadAvg[0], 64) if err != nil { log.Errorf("Unable to parse load1. Error: %s.", err) } else { c.load1.With(labels).Set(f) } // Node load 5 metrics. f, err = strconv.ParseFloat(status.LoadAvg[1], 64) if err != nil { log.Errorf("Unable to parse load5. Error: %s.", err) } else { c.load5.With(labels).Set(f) } // Node load 15 metrics. f, err = strconv.ParseFloat(status.LoadAvg[2], 64) if err != nil { log.Errorf("Unable to parse load15. Error: %s.", err) } else { c.load15.With(labels).Set(f) } } else { log.Error("CPU load stats are empty.") } // Node CPU info. cpuLabels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "flags": status.CPUInfo.Flags, "cores": strconv.Itoa(status.CPUInfo.Cores), "model": status.CPUInfo.Model, "sockets": strconv.Itoa(status.CPUInfo.Sockets), "cpus": strconv.Itoa(status.CPUInfo.CPUs), "hvm": status.CPUInfo.HVM, } c.cpuInfo.With(cpuLabels).Set(1) // Node system info. sysLabels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "kversion": status.Kversion, "pveversion": status.PveVersion, "machine": status.CurrentKernel.Machine, "sysname": status.CurrentKernel.Sysname, "release": status.CurrentKernel.Release, } c.systemInfo.With(sysLabels).Set(1) } } return nil } // PveMetricsCollector interface implementation. func (c *PveNodeStatusCollector) GetName() string { return "Node State" }