package metrics import ( "strconv" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "lostak.dev/pve-exporter/proxmox" ) // PVE virtual machine collector. type PveVirtualMachineCollector struct { apiClient *proxmox.PveApiClient // PVE API client instance. state *prometheus.GaugeVec // Virtual machine state prometheus gauge. uptime *prometheus.GaugeVec // Virtual machine uptime prometheus gauge. cpu *prometheus.GaugeVec // Virtual machine count of CPUs prometheus gauge. cpuUsage *prometheus.GaugeVec // Virtual machine CPU usage % prometheus gauge. memBytes *prometheus.GaugeVec // Virtual machine memory in bytes prometheus gauge. memBytesUsed *prometheus.GaugeVec // Virtual machine memory usage in bytes prometheus gauge. disk *prometheus.GaugeVec // Virtual machine disk space usage in bytes prometheus gauge. diskMax *prometheus.GaugeVec // Virtual machine disk size in bytes prometheus gauge. swap *prometheus.GaugeVec // Virtual machine swap usage in bytes prometheus gauge. netReceive *prometheus.GaugeVec // Virtual machine network receive in bytes prometheus gauge. netTransmit *prometheus.GaugeVec // Virtual machine network transmit in bytes prometheus gauge. diskReadOps *prometheus.GaugeVec // Virtual machine disk read ops prometheus gauge. diskWriteOps *prometheus.GaugeVec // Virtual machine disk write ops prometheus gauge. diskReadBytes *prometheus.GaugeVec // Virtual machine disk read bytes prometheus gauge. diskWriteBytes *prometheus.GaugeVec // Virtual machine disk write bytes prometheus gauge. diskReadTimeNs *prometheus.GaugeVec // Virtual machine disk read time total prometheus gauge. diskWriteTimeNs *prometheus.GaugeVec // Virtual machine disk write time total prometheus gauge. diskFailedReadOps *prometheus.GaugeVec // Virtual machine disk failed read ops prometheus gauge. diskFailedWriteOps *prometheus.GaugeVec // Virtual machine disk failed write ops prometheus gauge. agent *prometheus.GaugeVec // Virtual machine agent enabled prometheus gauge. } // Create new instance of PVE virtual machine collector. func NewPveVirtualMachineCollector(apiClient *proxmox.PveApiClient) *PveVirtualMachineCollector { c := PveVirtualMachineCollector{apiClient: apiClient} // Virtual machine state. c.state = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_state", Help: "Virtual machine state.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine uptime. c.uptime = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_uptime", Help: "Virtual machine uptime.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine agent state. c.agent = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_agent", Help: "Virtual machine agent state.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine CPU count. c.cpu = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_cpu_count", Help: "Virtual machine CPU count.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine CPU usage. c.cpuUsage = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_cpu_usage", Help: "Virtual machine CPU usage.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine memory total. c.memBytes = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_mem_total_bytes", Help: "Virtual machine total memory in bytes.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine memory usage. c.memBytesUsed = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_mem_used_bytes", Help: "Virtual machine used memory in bytes.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine disk size. c.disk = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_usage_bytes", Help: "Virtual machine disk read bytes.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine disk size. c.diskMax = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_size_bytes", Help: "Virtual machine disk size bytes.", }, []string{"cluster", "node", "vmid", "name"}, ) // Virtual machine network receive bytes. c.netReceive = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_network_in_bytes", Help: "Virtual machine network receive in bytes.", }, []string{"cluster", "node", "vmid", "name", "interface"}, ) // Virtual machine network transmit bytes. c.netTransmit = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_network_out_bytes", Help: "Virtual machine network transmit in bytes.", }, []string{"cluster", "node", "vmid", "name", "interface"}, ) // Virtual machine disk read ops. c.diskReadOps = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_operations", Help: "Virtual machine disk read ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine disk write ops. c.diskWriteOps = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_operations", Help: "Virtual machine disk write ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine disk read bytes. c.diskReadBytes = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_bytes", Help: "Virtual machine disk read bytes.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine disk write bytes. c.diskWriteBytes = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_bytes", Help: "Virtual machine disk write bytes.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine failed disk read ops. c.diskFailedReadOps = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_failed_rd_ops", Help: "Virtual machine failed disk read ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine failed disk write ops. c.diskFailedWriteOps = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_failed_wr_ops", Help: "Virtual machine failed disk write ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine disk read time total nanoseconds. c.diskReadTimeNs = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_time_total_ns", Help: "Virtual machine disk read time total in nanoseconds.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) // Virtual machine disk write time total nanoseconds. c.diskWriteTimeNs = promauto.NewGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_time_total_ns", Help: "Virtual machine disk write time total in nanoseconds.", }, []string{"cluster", "node", "vmid", "name", "device"}, ) return &c } // PveMetricsCollector interface implementation. func (c *PveVirtualMachineCollector) CollectMetrics() error { cluster, err := c.apiClient.GetClusterStatus() if err != nil { return err } c.state.Reset() c.cpu.Reset() c.memBytes.Reset() c.diskMax.Reset() c.uptime.Reset() c.cpuUsage.Reset() c.memBytesUsed.Reset() c.netReceive.Reset() c.netTransmit.Reset() c.diskReadOps.Reset() c.diskWriteOps.Reset() c.diskReadBytes.Reset() c.diskWriteBytes.Reset() c.diskFailedReadOps.Reset() c.diskFailedWriteOps.Reset() c.diskReadTimeNs.Reset() c.diskWriteTimeNs.Reset() for _, node := range cluster.NodeStatuses { qemus, err := c.apiClient.GetNodeQemuList(node.Name) if err != nil { return err } for _, qemu := range *qemus { // Skip templates because they are always offline. if qemu.Template == 1 { continue } labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, } c.state.With(labels).Set(qemu.GetStatusNumeric()) c.cpu.With(labels).Set(float64(qemu.CPUs)) c.memBytes.With(labels).Set(float64(qemu.MaxMem)) c.diskMax.With(labels).Set(float64(qemu.MaxDisk)) // Metrics only on running virtual machines. if qemu.IsRunning() { c.uptime.With(labels).Set(float64(qemu.Uptime)) c.cpuUsage.With(labels).Set(float64(qemu.CPU)) c.memBytesUsed.With(labels).Set(float64(qemu.Mem)) detail, err := c.apiClient.GetNodeQemu(node.Name, strconv.Itoa(qemu.VMID)) if err != nil { return err } c.agent.With(labels).Set(float64(detail.Agent)) for iface, value := range detail.Nics { labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, "interface": iface, } c.netReceive.With(labels).Set(float64(value.NetIn)) c.netTransmit.With(labels).Set(float64(value.NetOut)) } for device, value := range detail.BlockStat { labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, "device": device, } c.diskReadOps.With(labels).Set(float64(value.RdOperations)) c.diskWriteOps.With(labels).Set(float64(value.WrOperations)) c.diskReadBytes.With(labels).Set(float64(value.RdBytes)) c.diskWriteBytes.With(labels).Set(float64(value.WrBytes)) c.diskFailedReadOps.With(labels).Set(float64(value.FailedRdOperations)) c.diskFailedWriteOps.With(labels).Set(float64(value.FailedWrOperations)) c.diskReadTimeNs.With(labels).Set(float64(value.RdTotalTimeNs)) c.diskWriteTimeNs.With(labels).Set(float64(value.WrTotalTimeNs)) } } } } return nil } // PveMetricsCollector interface implementation. func (c *PveVirtualMachineCollector) GetName() string { return "Virtual Machine" }