package metrics import ( "strconv" "time" "github.com/prometheus/client_golang/prometheus" "lostak.dev/pve-exporter/proxmox" ) // PVE virtual machine collector. type PveVirtualMachineCollector struct { apiClient *proxmox.PveApiClient // PVE API client instance. registry *TTLRegistry // TTL metrics registry. state *TTLGaugeVec // Virtual machine state prometheus gauge. uptime *TTLGaugeVec // Virtual machine uptime prometheus gauge. cpu *TTLGaugeVec // Virtual machine count of CPUs prometheus gauge. cpuUsage *TTLGaugeVec // Virtual machine CPU usage % prometheus gauge. memBytes *TTLGaugeVec // Virtual machine memory in bytes prometheus gauge. memBytesUsed *TTLGaugeVec // Virtual machine memory usage in bytes prometheus gauge. disk *TTLGaugeVec // Virtual machine disk space usage in bytes prometheus gauge. diskMax *TTLGaugeVec // Virtual machine disk size in bytes prometheus gauge. swap *TTLGaugeVec // Virtual machine swap usage in bytes prometheus gauge. netReceive *TTLGaugeVec // Virtual machine network receive in bytes prometheus gauge. netTransmit *TTLGaugeVec // Virtual machine network transmit in bytes prometheus gauge. diskReadOps *TTLGaugeVec // Virtual machine disk read ops prometheus gauge. diskWriteOps *TTLGaugeVec // Virtual machine disk write ops prometheus gauge. diskReadBytes *TTLGaugeVec // Virtual machine disk read bytes prometheus gauge. diskWriteBytes *TTLGaugeVec // Virtual machine disk write bytes prometheus gauge. diskReadTimeNs *TTLGaugeVec // Virtual machine disk read time total prometheus gauge. diskWriteTimeNs *TTLGaugeVec // Virtual machine disk write time total prometheus gauge. diskFailedReadOps *TTLGaugeVec // Virtual machine disk failed read ops prometheus gauge. diskFailedWriteOps *TTLGaugeVec // Virtual machine disk failed write ops prometheus gauge. agent *TTLGaugeVec // Virtual machine agent enabled prometheus gauge. } // Create new instance of PVE virtual machine collector. func NewPveVirtualMachineCollector(apiClient *proxmox.PveApiClient, registry *TTLRegistry) *PveVirtualMachineCollector { c := PveVirtualMachineCollector{apiClient: apiClient} c.registry = registry // Virtual machine state. c.state = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_state", Help: "Virtual machine state.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.state) // Virtual machine uptime. c.uptime = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_uptime", Help: "Virtual machine uptime.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.uptime) // Virtual machine agent state. c.agent = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_agent", Help: "Virtual machine agent state.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.agent) // Virtual machine CPU count. c.cpu = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_cpu_count", Help: "Virtual machine CPU count.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.cpu) // Virtual machine CPU usage. c.cpuUsage = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_cpu_usage", Help: "Virtual machine CPU usage.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.cpuUsage) // Virtual machine memory total. c.memBytes = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_mem_total_bytes", Help: "Virtual machine total memory in bytes.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.memBytes) // Virtual machine memory usage. c.memBytesUsed = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_mem_used_bytes", Help: "Virtual machine used memory in bytes.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.memBytesUsed) // Virtual machine disk size. c.disk = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_usage_bytes", Help: "Virtual machine disk read bytes.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.disk) // Virtual machine disk size. c.diskMax = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_size_bytes", Help: "Virtual machine disk size bytes.", }, []string{"cluster", "node", "vmid", "name"}, 5*time.Minute, ) c.registry.Register(c.diskMax) // Virtual machine network receive bytes. c.netReceive = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_network_in_bytes", Help: "Virtual machine network receive in bytes.", }, []string{"cluster", "node", "vmid", "name", "interface"}, 5*time.Minute, ) c.registry.Register(c.netReceive) // Virtual machine network transmit bytes. c.netTransmit = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_network_out_bytes", Help: "Virtual machine network transmit in bytes.", }, []string{"cluster", "node", "vmid", "name", "interface"}, 5*time.Minute, ) c.registry.Register(c.netTransmit) // Virtual machine disk read ops. c.diskReadOps = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_operations", Help: "Virtual machine disk read ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskReadOps) // Virtual machine disk write ops. c.diskWriteOps = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_operations", Help: "Virtual machine disk write ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskWriteOps) // Virtual machine disk read bytes. c.diskReadBytes = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_bytes", Help: "Virtual machine disk read bytes.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskReadBytes) // Virtual machine disk write bytes. c.diskWriteBytes = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_bytes", Help: "Virtual machine disk write bytes.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskWriteBytes) // Virtual machine failed disk read ops. c.diskFailedReadOps = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_failed_rd_ops", Help: "Virtual machine failed disk read ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskFailedReadOps) // Virtual machine failed disk write ops. c.diskFailedWriteOps = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_failed_wr_ops", Help: "Virtual machine failed disk write ops.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskFailedWriteOps) // Virtual machine disk read time total nanoseconds. c.diskReadTimeNs = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_rd_time_total_ns", Help: "Virtual machine disk read time total in nanoseconds.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskReadTimeNs) // Virtual machine disk write time total nanoseconds. c.diskWriteTimeNs = NewTTLGaugeVec( prometheus.GaugeOpts{ Name: "pve_vm_disk_wr_time_total_ns", Help: "Virtual machine disk write time total in nanoseconds.", }, []string{"cluster", "node", "vmid", "name", "device"}, 5*time.Minute, ) c.registry.Register(c.diskWriteTimeNs) return &c } // PveMetricsCollector interface implementation. func (c *PveVirtualMachineCollector) CollectMetrics() error { cluster, err := c.apiClient.GetClusterStatus() if err != nil { return err } for _, node := range cluster.NodeStatuses { qemus, err := c.apiClient.GetNodeQemuList(node.Name) if err != nil { return err } for _, qemu := range *qemus { // Skip templates because they are always offline. if qemu.Template == 1 { continue } labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, } c.state.With(labels).Set(qemu.GetStatusNumeric()) c.cpu.With(labels).Set(float64(qemu.CPUs)) c.memBytes.With(labels).Set(float64(qemu.MaxMem)) c.diskMax.With(labels).Set(float64(qemu.MaxDisk)) // Metrics only on running virtual machines. if qemu.IsRunning() { c.uptime.With(labels).Set(float64(qemu.Uptime)) c.cpuUsage.With(labels).Set(float64(qemu.CPU)) c.memBytesUsed.With(labels).Set(float64(qemu.Mem)) detail, err := c.apiClient.GetNodeQemu(node.Name, strconv.Itoa(qemu.VMID)) if err != nil { return err } c.agent.With(labels).Set(float64(detail.Agent)) for iface, value := range detail.Nics { labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, "interface": iface, } c.netReceive.With(labels).Set(float64(value.NetIn)) c.netTransmit.With(labels).Set(float64(value.NetOut)) } for device, value := range detail.BlockStat { labels := prometheus.Labels{ "cluster": cluster.GetClusterName(), "node": node.Name, "vmid": strconv.Itoa(qemu.VMID), "name": qemu.Name, "device": device, } c.diskReadOps.With(labels).Set(float64(value.RdOperations)) c.diskWriteOps.With(labels).Set(float64(value.WrOperations)) c.diskReadBytes.With(labels).Set(float64(value.RdBytes)) c.diskWriteBytes.With(labels).Set(float64(value.WrBytes)) c.diskFailedReadOps.With(labels).Set(float64(value.FailedRdOperations)) c.diskFailedWriteOps.With(labels).Set(float64(value.FailedWrOperations)) c.diskReadTimeNs.With(labels).Set(float64(value.RdTotalTimeNs)) c.diskWriteTimeNs.With(labels).Set(float64(value.WrTotalTimeNs)) } } } } return nil } // PveMetricsCollector interface implementation. func (c *PveVirtualMachineCollector) GetName() string { return "Virtual Machine" }