Unverified Commit 51586efa authored by Luiz Aoqui's avatar Luiz Aoqui
Browse files

metrics: use client RPC IP in node_rpc_ip label

The IP in HTTP address is used by external consumers which is different
from the client IP seen internally by Nomad, which is the RPC IP. This
value is likely to be more useful in correlating metrics data.
parent 2ed16217
Showing with 47 additions and 37 deletions
+47 -37
```release-note:improvement
metrics: add `node_ip` label to host metrics
metrics: add `node_rpc_ip` label to host metrics
```
......@@ -772,9 +772,8 @@ func (c *Client) NodeID() string {
}
// NodeIP returns the IP address the node is listening
func (c *Client) NodeIP() string {
addr, _, _ := net.SplitHostPort(c.config.Node.HTTPAddr)
return addr
func (c *Client) NodeRPCIP() string {
return c.config.RPCAddr.IP.String()
}
// secretNodeID returns the secret node ID for the given client
......@@ -2890,7 +2889,7 @@ func (c *Client) emitStats() {
// is ready
c.baseLabels = []metrics.Label{
{Name: "node_id", Value: c.NodeID()},
{Name: "node_ip", Value: c.NodeIP()},
{Name: "node_rpc_ip", Value: c.NodeRPCIP()},
{Name: "datacenter", Value: c.Datacenter()},
{Name: "node_class", Value: emittedNodeClass},
}
......
......@@ -73,13 +73,13 @@ func TestClient_BaseLabels(t *testing.T) {
assert.NotEqual(0, len(baseLabels))
nodeID := client.Node().ID
nodeIP, _, _ := net.SplitHostPort(client.Node().HTTPAddr)
nodeRPCIP := client.config.RPCAddr.IP.String()
for _, e := range baseLabels {
if e.Name == "node_id" {
assert.Equal(nodeID, e.Value)
}
if e.Name == "node_ip" {
assert.Equal(nodeIP, e.Value)
if e.Name == "node_rpc_ip" {
assert.Equal(nodeRPCIP, e.Value)
}
}
}
......
......@@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"io"
"net"
"os"
"reflect"
"strconv"
......@@ -124,6 +125,9 @@ type Config struct {
// Servers is a list of known server addresses. These are as "host:port"
Servers []string
// RPCAddr is the RPC address used by the client.
RPCAddr *net.TCPAddr
// RPCHandler can be provided to avoid network traffic if the
// server is running locally.
RPCHandler RPCHandler
......@@ -736,6 +740,7 @@ func DefaultConfig() *Config {
Attempts: helper.IntToPtr(0), // unlimited
},
},
RPCAddr: &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 4647},
RPCHoldTimeout: 5 * time.Second,
CNIPath: "/opt/cni/bin",
CNIConfigDir: "/opt/cni/config",
......
......@@ -695,6 +695,12 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
conf.ACLPolicyTTL = agentConfig.ACL.PolicyTTL
// Setup networking configuration
rpcAddr, err := net.ResolveTCPAddr("tcp", agentConfig.normalizedAddrs.RPC)
if err != nil {
return nil, fmt.Errorf("Failed to parse client RPC address %q: %v", agentConfig.normalizedAddrs.RPC, err)
}
conf.RPCAddr.IP = rpcAddr.IP
conf.RPCAddr.Port = rpcAddr.Port
conf.CNIPath = agentConfig.Client.CNIPath
conf.CNIConfigDir = agentConfig.Client.CNIConfigDir
conf.BridgeNetworkName = agentConfig.Client.BridgeNetworkName
......
......@@ -150,35 +150,35 @@ parameterized or periodic job respectively. For example, a dispatch job with the
Nomad will emit [tagged metrics][tagged-metrics], in the below format:
| Metric | Description | Unit | Type | Labels |
| --------------------------------------- | ----------------------------------------------------------------------------------- | ---------- | ----- | ---------------------------------------------------------------------------------------------- |
| `nomad.client.allocated.cpu` | Total amount of CPU shares the scheduler has allocated to tasks | Mhz | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocated.memory` | Total amount of memory the scheduler has allocated to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocated_disk` | Total amount of disk space the scheduler has allocated to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.blocked` | Number of allocations blocked | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.migrating` | Number of allocations migrating | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.pending` | Number of allocations pending | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.running` | Number of allocations running | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.start` | Number of allocations starting | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.terminal` | Number of allocations terminal | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocs.oom_killed` | Number of allocations OOM killed | Integer | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.idle` | CPU utilization in idle state | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.system` | CPU utilization in system space | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.total` | Total CPU utilization | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.user` | CPU utilization in user space | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.available` | Amount of space which is available | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.inodes_percent` | Disk space consumed by the inodes | Percentage | Gauge | datacenter, disk, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.size` | Total size of the device | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.used_percent` | Percentage of disk space used | Percentage | Gauge | datacenter, disk, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.used` | Amount of space which has been used | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.available` | Total amount of memory available to processes which includes free and cached memory | Bytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.free` | Amount of memory which is free | Bytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.total` | Total amount of physical memory on the node | Bytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.used` | Amount of memory used by processes | Bytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.cpu` | Total amount of CPU shares free for the scheduler to allocate to tasks | Mhz | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.disk` | Total amount of disk space free for the scheduler to allocate to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.memory` | Total amount of memory free for the scheduler to allocate to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| `nomad.client.uptime` | Uptime of the host running the Nomad client | Seconds | Gauge | datacenter, host, node_class, node_id, node_ip, node_scheduling_eligibility, node_status |
| Metric | Description | Unit | Type | Labels |
| --------------------------------------- | ----------------------------------------------------------------------------------- | ---------- | ----- | -------------------------------------------------------------------------------------------------- |
| `nomad.client.allocated.cpu` | Total amount of CPU shares the scheduler has allocated to tasks | Mhz | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocated.memory` | Total amount of memory the scheduler has allocated to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocated_disk` | Total amount of disk space the scheduler has allocated to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.blocked` | Number of allocations blocked | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.migrating` | Number of allocations migrating | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.pending` | Number of allocations pending | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.running` | Number of allocations running | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.start` | Number of allocations starting | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocations.terminal` | Number of allocations terminal | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.allocs.oom_killed` | Number of allocations OOM killed | Integer | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.idle` | CPU utilization in idle state | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.system` | CPU utilization in system space | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.total` | Total CPU utilization | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.cpu.user` | CPU utilization in user space | Percentage | Gauge | cpu, datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.available` | Amount of space which is available | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.inodes_percent` | Disk space consumed by the inodes | Percentage | Gauge | datacenter, disk, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.size` | Total size of the device | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.used_percent` | Percentage of disk space used | Percentage | Gauge | datacenter, disk, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.disk.used` | Amount of space which has been used | Bytes | Gauge | datacenter, disk, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.available` | Total amount of memory available to processes which includes free and cached memory | Bytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.free` | Amount of memory which is free | Bytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.total` | Total amount of physical memory on the node | Bytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.host.memory.used` | Amount of memory used by processes | Bytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.cpu` | Total amount of CPU shares free for the scheduler to allocate to tasks | Mhz | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.disk` | Total amount of disk space free for the scheduler to allocate to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.unallocated.memory` | Total amount of memory free for the scheduler to allocate to tasks | Megabytes | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
| `nomad.client.uptime` | Uptime of the host running the Nomad client | Seconds | Gauge | datacenter, host, node_class, node_id, node_rpc_ip, node_scheduling_eligibility, node_status |
## Allocation Metrics
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment