Initial commit

This commit is contained in:
Jan Lošťák
2024-05-27 21:27:07 +02:00
commit a1ab163804
22 changed files with 2920 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE cluster state collector.
type PveClusterStateCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
nodes *prometheus.GaugeVec // Count of nodes prometheus gauge.
quorate *prometheus.GaugeVec // Cluster quorum state prometheus gauge.
}
// Create new instance of PVE cluster state collector.
func NewPveClusterStateCollector(apiClient *proxmox.PveApiClient) *PveClusterStateCollector {
c := PveClusterStateCollector{apiClient: apiClient}
// Cluster meta gauge.
c.nodes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_cluster_nodes",
Help: "Cluster nodes count.",
},
[]string{"cluster"},
)
// Cluster quorate gauge.
c.quorate = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_cluster_quorate",
Help: "Cluster quorum state.",
},
[]string{"cluster"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveClusterStateCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
l := prometheus.Labels{"cluster": cluster.Name}
c.nodes.With(l).Set(float64(cluster.Nodes))
c.quorate.With(l).Set(float64(cluster.Quorate))
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveClusterStateCollector) GetName() string {
return "Cluster State"
}

View File

@@ -0,0 +1,126 @@
package metrics
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
log "github.com/sirupsen/logrus"
"lostak.dev/pve-exporter/configuration"
"lostak.dev/pve-exporter/proxmox"
"lostak.dev/pve-exporter/utils"
)
// PVE metrics collector interface.
type PveMetricsCollector interface {
CollectMetrics() error // Called by manager on metrics colletion.
GetName() string // Used for logging purposes if error happens in collector.
}
// PVE metrics manager
type PveMetricsManager struct {
apiClient *proxmox.PveApiClient // Proxmox virtual environment API client instance.
collectors []PveMetricsCollector // Metrics collector instances.
latencySummary *prometheus.SummaryVec // Collection latency summary.
interval int // Collection interval.
stop chan struct{} // Stop channel which is used in ticker.
}
// Create new instance of PVE metrics collector.
func NewPveMetricsManager(apiClient *proxmox.PveApiClient, conf *configuration.PveConfiguration) *PveMetricsManager {
c := PveMetricsManager{apiClient: apiClient, interval: conf.Interval}
metricsCf := conf.Metrics
// Cluster state metrics collector.
if metricsCf.ClusterState {
c.RegisterCollector(NewPveClusterStateCollector(apiClient))
}
// Node state metrics collector.
if metricsCf.NodeStatus {
c.RegisterCollector(NewPveNodeStatusCollector(apiClient))
}
// Node subscription state collector.
if metricsCf.Subscription {
c.RegisterCollector(NewPveSubscriptionCollector(apiClient))
}
// Node disk collector.
if metricsCf.Disk {
c.RegisterCollector(NewPveNodeDiskCollector(apiClient))
}
// Node SDN collector.
if metricsCf.SDN {
c.RegisterCollector(NewPveSdnCollector(apiClient))
}
// Node storage collector.
if metricsCf.Storage {
c.RegisterCollector(NewPveStorageCollector(apiClient))
}
// Node container collector.
if metricsCf.LXC {
c.RegisterCollector(NewPveContainerCollector(apiClient))
}
// Node virtual machine collector.
if metricsCf.QEMU {
c.RegisterCollector(NewPveVirtualMachineCollector(apiClient))
}
// Metrics collection latency summary.
c.latencySummary = promauto.NewSummaryVec(prometheus.SummaryOpts{
Name: "pve_metrics_collection_latency_ms",
Help: "Summary of metrics collection latency milliseconds from PVE API.",
}, []string{"collector"})
return &c
}
// Called periodically by ticker and invokes collection on all registered collectors.
func (c *PveMetricsManager) collectMetrics() {
for _, collector := range c.collectors {
start := time.Now()
log.Tracef("Collecting %s metrics...", collector.GetName())
err := collector.CollectMetrics()
if err != nil {
log.Errorf("Failed to collect '%s' metrics. Error: %s.", collector.GetName(), err)
} else {
latency := time.Since(start)
log.Tracef("Finished collecting '%s' metrics after %s.", collector.GetName(), utils.HumanDuration(latency))
c.latencySummary.With(prometheus.Labels{"collector": collector.GetName()}).Observe(float64(latency.Milliseconds()))
}
}
}
// Registers collector instance for metrics collection.
func (c *PveMetricsManager) RegisterCollector(collector PveMetricsCollector) {
c.collectors = append(c.collectors, collector)
log.Infof("Metrics collector '%s' registered successfully.", collector.GetName())
}
// Start metrics collector
func (c *PveMetricsManager) Start() {
ticker := time.NewTicker(time.Second * time.Duration(c.interval))
c.collectMetrics()
go func() {
for {
select {
case <-ticker.C:
c.collectMetrics()
case <-c.stop:
return
}
}
}()
}
// Close stop metrics collector.
func (c *PveMetricsManager) Stop() {
close(c.stop)
}

View File

@@ -0,0 +1,209 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE container collector.
type PveContainerCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // Container state prometheus gauge.
uptime *prometheus.GaugeVec // Container uptime prometheus gauge.
cpu *prometheus.GaugeVec // Container count of CPUs prometheus gauge.
cpuUsage *prometheus.GaugeVec // Container CPU usage % prometheus gauge.
memBytes *prometheus.GaugeVec // Container memory in bytes prometheus gauge.
memBytesUsed *prometheus.GaugeVec // Container memory usage in bytes prometheus gauge.
netReceive *prometheus.GaugeVec // Container network RX in bytes prometheus gauge.
netTransmit *prometheus.GaugeVec // Container network TX in bytes prometheus gauge.
diskWrite *prometheus.GaugeVec // Container disk written in bytes prometheus gauge.
diskRead *prometheus.GaugeVec // Container disk read in bytes prometheus gauge.
disk *prometheus.GaugeVec // Container disk space usage in bytes prometheus gauge.
diskMax *prometheus.GaugeVec // Container disk size in bytes prometheus gauge.
swap *prometheus.GaugeVec // Container swap usage in bytes prometheus gauge.
}
// Create new instance of PVE container collector.
func NewPveContainerCollector(apiClient *proxmox.PveApiClient) *PveContainerCollector {
c := PveContainerCollector{apiClient: apiClient}
// Container state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_state",
Help: "Container state.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container uptime.
c.uptime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_uptime",
Help: "Container uptime.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container CPU count.
c.cpu = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_cpu_count",
Help: "Container CPU count.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container CPU usage.
c.cpuUsage = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_cpu_usage",
Help: "Container CPU usage.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container memory total.
c.memBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_mem_total_bytes",
Help: "Container total memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container memory usage.
c.memBytesUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_mem_used_bytes",
Help: "Container used memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container network RX.
c.netReceive = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_network_receive_bytes",
Help: "Container network RX bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container network TX.
c.netTransmit = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_network_transmit_bytes",
Help: "Container network TX bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container disk written.
c.diskWrite = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_written_bytes",
Help: "Container disk written bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container disk read.
c.diskRead = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_read_bytes",
Help: "Container disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container disk size.
c.disk = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_usage_bytes",
Help: "Container disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container disk size.
c.diskMax = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_disk_size_bytes",
Help: "Container disk size bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Container swap usage.
c.swap = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_ct_swap_used_bytes",
Help: "Container swap usage bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveContainerCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
containers, err := c.apiClient.GetNodeContainerList(node.Name)
if err != nil {
return err
}
for _, container := range *containers {
// Skip templates because they are always offline.
if container.Template == 1 {
continue
}
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"vmid": container.VMID,
"name": container.Name,
}
c.state.With(labels).Set(container.GetStatusNumeric())
c.cpu.With(labels).Set(float64(container.CPUs))
c.memBytes.With(labels).Set(float64(container.MaxMem))
c.diskMax.With(labels).Set(float64(container.MaxDisk))
// Metrics only on running container.
if container.IsRunning() {
c.uptime.With(labels).Set(float64(container.Uptime))
c.cpuUsage.With(labels).Set(float64(container.CPU))
c.memBytesUsed.With(labels).Set(float64(container.Mem))
c.netReceive.With(labels).Set(float64(container.NetIn))
c.netTransmit.With(labels).Set(float64(container.NetOut))
c.diskRead.With(labels).Set(float64(container.DiskRead))
c.diskWrite.With(labels).Set(float64(container.DiskWrite))
c.disk.With(labels).Set(float64(container.Disk))
c.swap.With(labels).Set(float64(container.Swap))
}
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveContainerCollector) GetName() string {
return "Container"
}

View File

@@ -0,0 +1,101 @@
package metrics
import (
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE cluster state collector.
type PveNodeDiskCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
healthy *prometheus.GaugeVec // Node disk SMART passed state prometheus gauge.
wearout *prometheus.GaugeVec // Node disk wearout % prometheus gauge.
sizeBytes *prometheus.GaugeVec // Node disk size in bytes prometheus gauge.
}
// Create new instance of PVE cluster state collector.
func NewPveNodeDiskCollector(apiClient *proxmox.PveApiClient) *PveNodeDiskCollector {
c := PveNodeDiskCollector{apiClient: apiClient}
// Node disk healthy state.
c.healthy = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_disk_healthy",
Help: "Node disk healthy state.",
},
[]string{"cluster", "node", "wwn", "type", "model", "serial", "vendor", "used", "osd_id"},
)
// Node disk wearout.
c.wearout = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_disk_wearout",
Help: "Node disk wearout percent.",
},
[]string{"cluster", "node", "wwn", "type", "model", "serial", "vendor", "used", "osd_id"},
)
// Node disk size in bytes.
c.sizeBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_disk_size_bytes",
Help: "Node disk size in bytes.",
},
[]string{"cluster", "node", "wwn", "type", "model", "serial", "vendor", "used", "osd_id"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveNodeDiskCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
disks, err := c.apiClient.GetNodeDisksList(node.Name)
if err != nil {
return err
}
for _, disk := range *disks {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"wwn": disk.WWN,
"type": disk.Type,
"model": disk.Model,
"serial": disk.Serial,
"vendor": strings.TrimSpace(disk.Vendor),
"used": disk.Used,
"osd_id": strconv.Itoa(disk.OSDID),
}
// Disk healthy state.
c.healthy.With(labels).Set(disk.GetSmartPassedState())
// Disk wearout %.
wearout, ok := disk.WearOut.(float64)
if ok {
c.wearout.With(labels).Set(wearout)
}
// Disk size in bytes.
c.sizeBytes.With(labels).Set(float64(disk.Size))
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveNodeDiskCollector) GetName() string {
return "Node Disks"
}

View File

@@ -0,0 +1,65 @@
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE SDN state collector.
type PveSdnCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // SDN state prometheus gauge.
}
// Create new instance of PVE SDN collector.
func NewPveSdnCollector(apiClient *proxmox.PveApiClient) *PveSdnCollector {
c := PveSdnCollector{apiClient: apiClient}
// SDN Up state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_sdn_state",
Help: "Node software defined network state.",
},
[]string{"cluster", "node", "sdn", "sdn_id"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveSdnCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
resources, err := c.apiClient.GetClusterResources()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
sdns := resources.FindNodeSDN(node.Name)
if len(*sdns) > 0 {
for _, sdn := range *sdns {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"sdn": sdn.SDN,
"sdn_id": sdn.ID,
}
c.state.With(labels).Set(sdn.GetStatusNumeric())
}
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveSdnCollector) GetName() string {
return "SDN"
}

View File

@@ -0,0 +1,325 @@
package metrics
import (
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
log "github.com/sirupsen/logrus"
"lostak.dev/pve-exporter/proxmox"
)
// PVE cluster state collector.
type PveNodeStatusCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // Node state prometheus gauge.
uptime *prometheus.GaugeVec // Node uptime in seconds prometheus gauge.
cpus *prometheus.GaugeVec // Node CPU count prometheus gauge.
cpuUsage *prometheus.GaugeVec // Node CPU usage in percent prometheus gauge.
memBytes *prometheus.GaugeVec // Node total RAM capacity in bytes prometheus gauge.
memBytesUsed *prometheus.GaugeVec // Node RAM usage in bytes prometheus gauge.
memBytesFree *prometheus.GaugeVec // Node RAM free in bytes prometheus gauge.
ksmShared *prometheus.GaugeVec // Node Kernel samepage shared in bytes prometheus gauge.
cgroupMode *prometheus.GaugeVec // Node CGroups mode prometheus gauge.
load1 *prometheus.GaugeVec // Node load1 unix like (CPU seconds) prometheus gauge.
load5 *prometheus.GaugeVec // Node load5 unix like (CPU seconds) prometheus gauge.
load15 *prometheus.GaugeVec // Node load15 unix like (CPU seconds) prometheus gauge.
fSFree *prometheus.GaugeVec // Node filesystem free space in bytes prometheus gauge.
fSUsed *prometheus.GaugeVec // Node filesystem used space in bytes prometheus gauge.
fSTotal *prometheus.GaugeVec // Node filesystem total space in bytes prometheus gauge.
fSAvail *prometheus.GaugeVec // Node filesystem available capacity in bytes prometheus gauge.
cpuInfo *prometheus.GaugeVec // Node CPU info prometheus gauge.
systemInfo *prometheus.GaugeVec // Node system info prometheus gauge.
time *prometheus.GaugeVec // Node time prometheus gauge.
localTime *prometheus.GaugeVec // Node localtime prometheus gauge.
}
// Create new instance of PVE cluster state collector.
func NewPveNodeStatusCollector(apiClient *proxmox.PveApiClient) *PveNodeStatusCollector {
c := PveNodeStatusCollector{apiClient: apiClient}
// Node state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_state",
Help: "Node state.",
},
[]string{"cluster", "node"},
)
// Node uptime.
c.uptime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_uptime",
Help: "Node uptime.",
},
[]string{"cluster", "node"},
)
// Node cpu count.
c.cpus = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpu_count",
Help: "Node CPU count.",
},
[]string{"cluster", "node"},
)
// Node CPU usage.
c.cpuUsage = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpu_usage",
Help: "Cluster node CPU usage %.",
},
[]string{"cluster", "node"},
)
// Node memory in bytes.
c.memBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_total_bytes",
Help: "Node total memory in bytes.",
},
[]string{"cluster", "node"},
)
// Cluster node memory used in bytes.
c.memBytesUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_used_bytes",
Help: "Node used memory in bytes.",
},
[]string{"cluster", "node"},
)
// Node memory free in bytes.
c.memBytesFree = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_memory_free_bytes",
Help: "Node free memory in bytes.",
},
[]string{"cluster", "node"},
)
// Kernel samepage shared in bytes.
c.ksmShared = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_ksm_bytes",
Help: "Node kernel samepage shares in bytes.",
},
[]string{"cluster", "node"},
)
// Node memory cgroup mode.
c.cgroupMode = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cgroup_mode",
Help: "Node cgroup mode.",
},
[]string{"cluster", "node"},
)
// Node load 1.
c.load1 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load1",
Help: "Node CPU load 1 minute average.",
},
[]string{"cluster", "node"},
)
// Node load 5.
c.load5 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load5",
Help: "Node CPU load 5 minutes average.",
},
[]string{"cluster", "node"},
)
// Cluster node load 15.
c.load15 = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_load15",
Help: "Node CPU load 15 minutes average.",
},
[]string{"cluster", "node"},
)
// Node root FS free bytes.
c.fSFree = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_free_bytes",
Help: "Node RootFS free bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem used bytes.
c.fSUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_used_bytes",
Help: "Node root filesystem used bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem total bytes.
c.fSTotal = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_total_bytes",
Help: "Node root filesystem total bytes.",
},
[]string{"cluster", "node"},
)
// Node root filesystem avail bytes.
c.fSAvail = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_rootfs_avail_bytes",
Help: "Node root filesystem avail bytes.",
},
[]string{"cluster", "node"},
)
// Node CPU info.
c.cpuInfo = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_cpuinfo",
Help: "Node CPU info.",
},
[]string{"cluster", "node", "flags", "cores", "model", "sockets", "cpus", "hvm"},
)
// Node system info metrics.
c.systemInfo = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_systeminfo",
Help: "Node system info.",
},
[]string{"cluster", "node", "kversion", "pveversion", "machine", "sysname", "release"},
)
// Node time info.
c.time = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_time",
Help: "Node time.",
},
[]string{"cluster", "node"},
)
// Node localtime info.
c.localTime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_localtime",
Help: "Node localtime.",
},
[]string{"cluster", "node"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveNodeStatusCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
}
time, err := c.apiClient.GetNodeTime(node.Name)
if err != nil {
return err
} else {
c.time.With(labels).Set(float64(time.Time))
c.localTime.With(labels).Set(float64(time.LocalTime))
}
status, err := c.apiClient.GetNodeStatusDetail(node.Name)
if err != nil {
return err
} else {
c.state.With(labels).Set(float64(node.Online))
c.uptime.With(labels).Set(float64(status.Uptime))
c.memBytes.With(labels).Set(float64(status.Memory.Total))
c.memBytesUsed.With(labels).Set(float64(status.Memory.Used))
c.memBytesFree.With(labels).Set(float64(status.Memory.Free))
c.ksmShared.With(labels).Set(float64(status.Ksm.Shared))
c.fSFree.With(labels).Set(float64(status.Rootfs.Free))
c.fSUsed.With(labels).Set(float64(status.Rootfs.Used))
c.fSTotal.With(labels).Set(float64(status.Rootfs.Total))
c.fSAvail.With(labels).Set(float64(status.Rootfs.Avail))
// CPU load avg.
if len(status.LoadAvg) > 0 {
// Node load 1 metrics.
f, err := strconv.ParseFloat(status.LoadAvg[0], 64)
if err != nil {
log.Errorf("Unable to parse load1. Error: %s.", err)
} else {
c.load1.With(labels).Set(f)
}
// Node load 5 metrics.
f, err = strconv.ParseFloat(status.LoadAvg[1], 64)
if err != nil {
log.Errorf("Unable to parse load5. Error: %s.", err)
} else {
c.load5.With(labels).Set(f)
}
// Node load 15 metrics.
f, err = strconv.ParseFloat(status.LoadAvg[2], 64)
if err != nil {
log.Errorf("Unable to parse load15. Error: %s.", err)
} else {
c.load15.With(labels).Set(f)
}
} else {
log.Error("CPU load stats are empty.")
}
// Node CPU info.
cpuLabels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"flags": status.CPUInfo.Flags,
"cores": strconv.Itoa(status.CPUInfo.Cores),
"model": status.CPUInfo.Model,
"sockets": strconv.Itoa(status.CPUInfo.Sockets),
"cpus": strconv.Itoa(status.CPUInfo.CPUs),
"hvm": status.CPUInfo.HVM,
}
c.cpuInfo.With(cpuLabels).Set(1)
// Node system info.
sysLabels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"kversion": status.Kversion,
"pveversion": status.PveVersion,
"machine": status.CurrentKernel.Machine,
"sysname": status.CurrentKernel.Sysname,
"release": status.CurrentKernel.Release,
}
c.systemInfo.With(sysLabels).Set(1)
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveNodeStatusCollector) GetName() string {
return "Node State"
}

View File

@@ -0,0 +1,104 @@
package metrics
import (
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE Storage state collector.
type PveStorageCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // Storage state prometheus gauge.
total *prometheus.GaugeVec // Storage total bytes prometheus gauge.
avail *prometheus.GaugeVec // Storage available bytes prometheus gauge.
used *prometheus.GaugeVec // Storage used bytes prometheus gauge.
}
// Create new instance of PVE SDN collector.
func NewPveStorageCollector(apiClient *proxmox.PveApiClient) *PveStorageCollector {
c := PveStorageCollector{apiClient: apiClient}
// Storage state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_storage_up",
Help: "Node storage UP state.",
},
[]string{"cluster", "node", "storage", "type", "content", "shared"},
)
// Storage total bytes.
c.total = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_storage_total_bytes",
Help: "Node storage total capacity in bytes.",
},
[]string{"cluster", "node", "storage", "type", "content", "shared"},
)
// Storage available bytes.
c.avail = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_storage_avail_bytes",
Help: "Node storage available capacity in bytes.",
},
[]string{"cluster", "node", "storage", "type", "content", "shared"},
)
// Storage used bytes.
c.used = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_storage_used_bytes",
Help: "Node storage used capacity in bytes.",
},
[]string{"cluster", "node", "storage", "type", "content", "shared"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveStorageCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
storages, err := c.apiClient.GetNodeStorages(node.Name)
if err != nil {
return err
}
for _, storage := range *storages {
// Skip disabled storages.
if storage.Enabled == 0 {
continue
}
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"storage": storage.Storage,
"type": storage.Type,
"content": storage.Content,
"shared": strconv.Itoa(storage.Shared),
}
c.state.With(labels).Set(float64(storage.Active))
c.total.With(labels).Set(float64(storage.Total))
c.avail.With(labels).Set(float64(storage.Avail))
c.used.With(labels).Set(float64(storage.Used))
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveStorageCollector) GetName() string {
return "Storage"
}

View File

@@ -0,0 +1,135 @@
package metrics
import (
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE subscription state collector.
type PveSubscriptionCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
info *prometheus.GaugeVec // Node subscription info prometheus gauge.
status *prometheus.GaugeVec // Node subscription status prometheus gauge.
nextDueDate *prometheus.GaugeVec // Node subscription next due date prometheus gauge.
regDate *prometheus.GaugeVec // Node subscription registration date prometheus gauge.
sockets *prometheus.GaugeVec // Node subscription sockets count prometheus gauge.
}
// Create new instance of PVE cluster state collector.
func NewPveSubscriptionCollector(apiClient *proxmox.PveApiClient) *PveSubscriptionCollector {
c := PveSubscriptionCollector{apiClient: apiClient}
// Node subscription info.
c.info = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_subscription_info",
Help: "Node subscription info.",
},
[]string{"cluster", "node", "productname", "serverid"},
)
// Node subscription status.
c.status = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_subscription_status",
Help: "Node subscription status.",
},
[]string{"cluster", "node"},
)
// Node subscription registration date.
c.regDate = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_subscription_regdate",
Help: "Node subscription registration date.",
},
[]string{"cluster", "node"},
)
// Node subscription next due date.
c.nextDueDate = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_subscription_nextduedate",
Help: "Node subscription next due date.",
},
[]string{"cluster", "node"},
)
// Node subscription count of sockets.
c.sockets = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_node_subscription_sockets",
Help: "Node subscription count of sockets.",
},
[]string{"cluster", "node"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveSubscriptionCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
}
// Node subscription.
subscription, err := c.apiClient.GetNodeSubscription(node.Name)
if err != nil {
return err
} else {
// Subscription info.
subsLabels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"productname": subscription.ProductName,
"serverid": subscription.ServerID,
}
c.info.With(subsLabels).Set(1)
// Subscription state.
c.status.With(labels).Set(subscription.GetActiveNumeric())
// Subscription sockets count.
c.sockets.With(labels).Set(float64(subscription.Sockets))
// Subscription registered date.
if subscription.Registration != "" {
registrationTime, err := time.Parse("2006-01-02 15:04:05", subscription.Registration)
if err != nil {
return err
}
c.regDate.With(labels).Set(float64(registrationTime.Unix()))
}
// Subscription due date.
if subscription.NextDueDate != "" {
nextDueTime, err := time.Parse("2006-01-02", subscription.NextDueDate)
if err != nil {
return err
}
c.nextDueDate.With(labels).Set(float64(nextDueTime.Unix()))
}
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveSubscriptionCollector) GetName() string {
return "Node Subscription"
}

View File

@@ -0,0 +1,311 @@
package metrics
import (
"strconv"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"lostak.dev/pve-exporter/proxmox"
)
// PVE virtual machine collector.
type PveVirtualMachineCollector struct {
apiClient *proxmox.PveApiClient // PVE API client instance.
state *prometheus.GaugeVec // Virtual machine state prometheus gauge.
uptime *prometheus.GaugeVec // Virtual machine uptime prometheus gauge.
cpu *prometheus.GaugeVec // Virtual machine count of CPUs prometheus gauge.
cpuUsage *prometheus.GaugeVec // Virtual machine CPU usage % prometheus gauge.
memBytes *prometheus.GaugeVec // Virtual machine memory in bytes prometheus gauge.
memBytesUsed *prometheus.GaugeVec // Virtual machine memory usage in bytes prometheus gauge.
disk *prometheus.GaugeVec // Virtual machine disk space usage in bytes prometheus gauge.
diskMax *prometheus.GaugeVec // Virtual machine disk size in bytes prometheus gauge.
swap *prometheus.GaugeVec // Virtual machine swap usage in bytes prometheus gauge.
netReceive *prometheus.GaugeVec // Virtual machine network receive in bytes prometheus gauge.
netTransmit *prometheus.GaugeVec // Virtual machine network transmit in bytes prometheus gauge.
diskReadOps *prometheus.GaugeVec // Virtual machine disk read ops prometheus gauge.
diskWriteOps *prometheus.GaugeVec // Virtual machine disk write ops prometheus gauge.
diskReadBytes *prometheus.GaugeVec // Virtual machine disk read bytes prometheus gauge.
diskWriteBytes *prometheus.GaugeVec // Virtual machine disk write bytes prometheus gauge.
diskReadTimeNs *prometheus.GaugeVec // Virtual machine disk read time total prometheus gauge.
diskWriteTimeNs *prometheus.GaugeVec // Virtual machine disk write time total prometheus gauge.
diskFailedReadOps *prometheus.GaugeVec // Virtual machine disk failed read ops prometheus gauge.
diskFailedWriteOps *prometheus.GaugeVec // Virtual machine disk failed write ops prometheus gauge.
agent *prometheus.GaugeVec // Virtual machine agent enabled prometheus gauge.
}
// Create new instance of PVE virtual machine collector.
func NewPveVirtualMachineCollector(apiClient *proxmox.PveApiClient) *PveVirtualMachineCollector {
c := PveVirtualMachineCollector{apiClient: apiClient}
// Virtual machine state.
c.state = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_state",
Help: "Virtual machine state.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine uptime.
c.uptime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_uptime",
Help: "Virtual machine uptime.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine agent state.
c.agent = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_agent",
Help: "Virtual machine agent state.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine CPU count.
c.cpu = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_cpu_count",
Help: "Virtual machine CPU count.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine CPU usage.
c.cpuUsage = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_cpu_usage",
Help: "Virtual machine CPU usage.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine memory total.
c.memBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_mem_total_bytes",
Help: "Virtual machine total memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine memory usage.
c.memBytesUsed = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_mem_used_bytes",
Help: "Virtual machine used memory in bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine disk size.
c.disk = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_usage_bytes",
Help: "Virtual machine disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine disk size.
c.diskMax = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_size_bytes",
Help: "Virtual machine disk size bytes.",
},
[]string{"cluster", "node", "vmid", "name"},
)
// Virtual machine network receive bytes.
c.netReceive = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_network_in_bytes",
Help: "Virtual machine network receive in bytes.",
},
[]string{"cluster", "node", "vmid", "name", "interface"},
)
// Virtual machine network transmit bytes.
c.netTransmit = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_network_out_bytes",
Help: "Virtual machine network transmit in bytes.",
},
[]string{"cluster", "node", "vmid", "name", "interface"},
)
// Virtual machine disk read ops.
c.diskReadOps = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_rd_operations",
Help: "Virtual machine disk read ops.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine disk write ops.
c.diskWriteOps = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_wr_operations",
Help: "Virtual machine disk write ops.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine disk read bytes.
c.diskReadBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_rd_bytes",
Help: "Virtual machine disk read bytes.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine disk write bytes.
c.diskWriteBytes = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_wr_bytes",
Help: "Virtual machine disk write bytes.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine failed disk read ops.
c.diskFailedReadOps = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_failed_rd_ops",
Help: "Virtual machine failed disk read ops.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine failed disk write ops.
c.diskFailedWriteOps = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_failed_wr_ops",
Help: "Virtual machine failed disk write ops.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine disk read time total nanoseconds.
c.diskReadTimeNs = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_rd_time_total_ns",
Help: "Virtual machine disk read time total in nanoseconds.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
// Virtual machine disk write time total nanoseconds.
c.diskWriteTimeNs = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "pve_vm_disk_wr_time_total_ns",
Help: "Virtual machine disk write time total in nanoseconds.",
},
[]string{"cluster", "node", "vmid", "name", "device"},
)
return &c
}
// PveMetricsCollector interface implementation.
func (c *PveVirtualMachineCollector) CollectMetrics() error {
cluster, err := c.apiClient.GetClusterStatus()
if err != nil {
return err
}
for _, node := range cluster.NodeStatuses {
qemus, err := c.apiClient.GetNodeQemuList(node.Name)
if err != nil {
return err
}
for _, qemu := range *qemus {
// Skip templates because they are always offline.
if qemu.Template == 1 {
continue
}
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"vmid": strconv.Itoa(qemu.VMID),
"name": qemu.Name,
}
c.state.With(labels).Set(qemu.GetStatusNumeric())
c.cpu.With(labels).Set(float64(qemu.CPUs))
c.memBytes.With(labels).Set(float64(qemu.MaxMem))
c.diskMax.With(labels).Set(float64(qemu.MaxDisk))
// Metrics only on running virtual machines.
if qemu.IsRunning() {
c.uptime.With(labels).Set(float64(qemu.Uptime))
c.cpuUsage.With(labels).Set(float64(qemu.CPU))
c.memBytesUsed.With(labels).Set(float64(qemu.Mem))
detail, err := c.apiClient.GetNodeQemu(node.Name, strconv.Itoa(qemu.VMID))
if err != nil {
return err
}
c.agent.With(labels).Set(float64(detail.Agent))
for iface, value := range detail.Nics {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"vmid": strconv.Itoa(qemu.VMID),
"name": qemu.Name,
"interface": iface,
}
c.netReceive.With(labels).Set(float64(value.NetIn))
c.netTransmit.With(labels).Set(float64(value.NetOut))
}
for device, value := range detail.BlockStat {
labels := prometheus.Labels{
"cluster": cluster.GetClusterName(),
"node": node.Name,
"vmid": strconv.Itoa(qemu.VMID),
"name": qemu.Name,
"device": device,
}
c.diskReadOps.With(labels).Set(float64(value.RdOperations))
c.diskWriteOps.With(labels).Set(float64(value.WrOperations))
c.diskReadBytes.With(labels).Set(float64(value.RdBytes))
c.diskWriteBytes.With(labels).Set(float64(value.WrBytes))
c.diskFailedReadOps.With(labels).Set(float64(value.FailedRdOperations))
c.diskFailedWriteOps.With(labels).Set(float64(value.FailedWrOperations))
c.diskReadTimeNs.With(labels).Set(float64(value.RdTotalTimeNs))
c.diskWriteTimeNs.With(labels).Set(float64(value.WrTotalTimeNs))
}
}
}
}
return nil
}
// PveMetricsCollector interface implementation.
func (c *PveVirtualMachineCollector) GetName() string {
return "Virtual Machine"
}