312 lines
9.3 KiB
Go
312 lines
9.3 KiB
Go
package metrics
|
|
|
|
import (
|
|
"strconv"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"lostak.dev/pve-exporter/proxmox"
|
|
)
|
|
|
|
// PVE virtual machine collector.
|
|
type PveVirtualMachineCollector struct {
|
|
apiClient *proxmox.PveApiClient // PVE API client instance.
|
|
|
|
state *prometheus.GaugeVec // Virtual machine state prometheus gauge.
|
|
uptime *prometheus.GaugeVec // Virtual machine uptime prometheus gauge.
|
|
|
|
cpu *prometheus.GaugeVec // Virtual machine count of CPUs prometheus gauge.
|
|
cpuUsage *prometheus.GaugeVec // Virtual machine CPU usage % prometheus gauge.
|
|
|
|
memBytes *prometheus.GaugeVec // Virtual machine memory in bytes prometheus gauge.
|
|
memBytesUsed *prometheus.GaugeVec // Virtual machine memory usage in bytes prometheus gauge.
|
|
|
|
disk *prometheus.GaugeVec // Virtual machine disk space usage in bytes prometheus gauge.
|
|
diskMax *prometheus.GaugeVec // Virtual machine disk size in bytes prometheus gauge.
|
|
swap *prometheus.GaugeVec // Virtual machine swap usage in bytes prometheus gauge.
|
|
|
|
netReceive *prometheus.GaugeVec // Virtual machine network receive in bytes prometheus gauge.
|
|
netTransmit *prometheus.GaugeVec // Virtual machine network transmit in bytes prometheus gauge.
|
|
|
|
diskReadOps *prometheus.GaugeVec // Virtual machine disk read ops prometheus gauge.
|
|
diskWriteOps *prometheus.GaugeVec // Virtual machine disk write ops prometheus gauge.
|
|
|
|
diskReadBytes *prometheus.GaugeVec // Virtual machine disk read bytes prometheus gauge.
|
|
diskWriteBytes *prometheus.GaugeVec // Virtual machine disk write bytes prometheus gauge.
|
|
|
|
diskReadTimeNs *prometheus.GaugeVec // Virtual machine disk read time total prometheus gauge.
|
|
diskWriteTimeNs *prometheus.GaugeVec // Virtual machine disk write time total prometheus gauge.
|
|
|
|
diskFailedReadOps *prometheus.GaugeVec // Virtual machine disk failed read ops prometheus gauge.
|
|
diskFailedWriteOps *prometheus.GaugeVec // Virtual machine disk failed write ops prometheus gauge.
|
|
|
|
agent *prometheus.GaugeVec // Virtual machine agent enabled prometheus gauge.
|
|
}
|
|
|
|
// Create new instance of PVE virtual machine collector.
|
|
func NewPveVirtualMachineCollector(apiClient *proxmox.PveApiClient) *PveVirtualMachineCollector {
|
|
c := PveVirtualMachineCollector{apiClient: apiClient}
|
|
|
|
// Virtual machine state.
|
|
c.state = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_state",
|
|
Help: "Virtual machine state.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine uptime.
|
|
c.uptime = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_uptime",
|
|
Help: "Virtual machine uptime.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine agent state.
|
|
c.agent = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_agent",
|
|
Help: "Virtual machine agent state.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine CPU count.
|
|
c.cpu = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_cpu_count",
|
|
Help: "Virtual machine CPU count.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine CPU usage.
|
|
c.cpuUsage = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_cpu_usage",
|
|
Help: "Virtual machine CPU usage.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine memory total.
|
|
c.memBytes = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_mem_total_bytes",
|
|
Help: "Virtual machine total memory in bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine memory usage.
|
|
c.memBytesUsed = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_mem_used_bytes",
|
|
Help: "Virtual machine used memory in bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine disk size.
|
|
c.disk = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_usage_bytes",
|
|
Help: "Virtual machine disk read bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine disk size.
|
|
c.diskMax = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_size_bytes",
|
|
Help: "Virtual machine disk size bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name"},
|
|
)
|
|
|
|
// Virtual machine network receive bytes.
|
|
c.netReceive = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_network_in_bytes",
|
|
Help: "Virtual machine network receive in bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "interface"},
|
|
)
|
|
|
|
// Virtual machine network transmit bytes.
|
|
c.netTransmit = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_network_out_bytes",
|
|
Help: "Virtual machine network transmit in bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "interface"},
|
|
)
|
|
|
|
// Virtual machine disk read ops.
|
|
c.diskReadOps = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_rd_operations",
|
|
Help: "Virtual machine disk read ops.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine disk write ops.
|
|
c.diskWriteOps = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_wr_operations",
|
|
Help: "Virtual machine disk write ops.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine disk read bytes.
|
|
c.diskReadBytes = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_rd_bytes",
|
|
Help: "Virtual machine disk read bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine disk write bytes.
|
|
c.diskWriteBytes = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_wr_bytes",
|
|
Help: "Virtual machine disk write bytes.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine failed disk read ops.
|
|
c.diskFailedReadOps = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_failed_rd_ops",
|
|
Help: "Virtual machine failed disk read ops.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine failed disk write ops.
|
|
c.diskFailedWriteOps = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_failed_wr_ops",
|
|
Help: "Virtual machine failed disk write ops.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine disk read time total nanoseconds.
|
|
c.diskReadTimeNs = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_rd_time_total_ns",
|
|
Help: "Virtual machine disk read time total in nanoseconds.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
// Virtual machine disk write time total nanoseconds.
|
|
c.diskWriteTimeNs = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "pve_vm_disk_wr_time_total_ns",
|
|
Help: "Virtual machine disk write time total in nanoseconds.",
|
|
},
|
|
[]string{"cluster", "node", "vmid", "name", "device"},
|
|
)
|
|
|
|
return &c
|
|
}
|
|
|
|
// PveMetricsCollector interface implementation.
|
|
func (c *PveVirtualMachineCollector) CollectMetrics() error {
|
|
cluster, err := c.apiClient.GetClusterStatus()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, node := range cluster.NodeStatuses {
|
|
qemus, err := c.apiClient.GetNodeQemuList(node.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, qemu := range *qemus {
|
|
// Skip templates because they are always offline.
|
|
if qemu.Template == 1 {
|
|
continue
|
|
}
|
|
|
|
labels := prometheus.Labels{
|
|
"cluster": cluster.GetClusterName(),
|
|
"node": node.Name,
|
|
"vmid": strconv.Itoa(qemu.VMID),
|
|
"name": qemu.Name,
|
|
}
|
|
|
|
c.state.With(labels).Set(qemu.GetStatusNumeric())
|
|
c.cpu.With(labels).Set(float64(qemu.CPUs))
|
|
c.memBytes.With(labels).Set(float64(qemu.MaxMem))
|
|
c.diskMax.With(labels).Set(float64(qemu.MaxDisk))
|
|
|
|
// Metrics only on running virtual machines.
|
|
if qemu.IsRunning() {
|
|
c.uptime.With(labels).Set(float64(qemu.Uptime))
|
|
c.cpuUsage.With(labels).Set(float64(qemu.CPU))
|
|
c.memBytesUsed.With(labels).Set(float64(qemu.Mem))
|
|
|
|
detail, err := c.apiClient.GetNodeQemu(node.Name, strconv.Itoa(qemu.VMID))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.agent.With(labels).Set(float64(detail.Agent))
|
|
|
|
for iface, value := range detail.Nics {
|
|
labels := prometheus.Labels{
|
|
"cluster": cluster.GetClusterName(),
|
|
"node": node.Name,
|
|
"vmid": strconv.Itoa(qemu.VMID),
|
|
"name": qemu.Name,
|
|
"interface": iface,
|
|
}
|
|
|
|
c.netReceive.With(labels).Set(float64(value.NetIn))
|
|
c.netTransmit.With(labels).Set(float64(value.NetOut))
|
|
}
|
|
|
|
for device, value := range detail.BlockStat {
|
|
labels := prometheus.Labels{
|
|
"cluster": cluster.GetClusterName(),
|
|
"node": node.Name,
|
|
"vmid": strconv.Itoa(qemu.VMID),
|
|
"name": qemu.Name,
|
|
"device": device,
|
|
}
|
|
|
|
c.diskReadOps.With(labels).Set(float64(value.RdOperations))
|
|
c.diskWriteOps.With(labels).Set(float64(value.WrOperations))
|
|
|
|
c.diskReadBytes.With(labels).Set(float64(value.RdBytes))
|
|
c.diskWriteBytes.With(labels).Set(float64(value.WrBytes))
|
|
|
|
c.diskFailedReadOps.With(labels).Set(float64(value.FailedRdOperations))
|
|
c.diskFailedWriteOps.With(labels).Set(float64(value.FailedWrOperations))
|
|
|
|
c.diskReadTimeNs.With(labels).Set(float64(value.RdTotalTimeNs))
|
|
c.diskWriteTimeNs.With(labels).Set(float64(value.WrTotalTimeNs))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// PveMetricsCollector interface implementation.
|
|
func (c *PveVirtualMachineCollector) GetName() string {
|
|
return "Virtual Machine"
|
|
}
|