Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • gitlab-org/build/omnibus-mirror/node_exporter
1 result
Show changes
Commits on Source (2)
  • Matt Layher's avatar
    f6f9c8d6
  • Karsten Weiss's avatar
    cpu: Support processor-less (memory-only) NUMA nodes (#734) · a8d7d110
    Karsten Weiss authored
    * cpu: Support processor-less (memory-only) NUMA nodes
    
    Processor-less (memory-only) NUMA nodes exist e.g. in systems that use
    Intel Optane drives for RAM expansion using Intel Memory Drive
    Technology (IMDT).
    
    IMDT RAM expansion supports two modes:
    
    * "Unify Remote Memory domains": present a processor-less (memory-only)
      NUMA domain, which is the default
    * "Expand local memory domains": to expand each processor’s memory domain
      with a portion of the memory made available by Optane and IMDT
    
    This commit fixes a crash in the first case (when "cpulist" is empty).
    
    Here's an example of such a system:
    
    $ numastat -m|head -n5
    
    Per-node system memory usage (in MBs):
                              Node 0          Node 1          Node 2           Total
                     --------------- --------------- --------------- ---------------
    MemTotal               118239.56       130816.00       464384.00       713439.56
    
    $ for i in {0..2}; do echo -n "$i: " ; cat /sys/bus/node/devices/node$i/cpulist ; done
    0: 0-7,16-23
    1: 8-15,24-31
    2:
    
    $ /opt/vsmp/bin/vsmpversion -vvv
    Memory Drive Technology: 8.2.1455.74 (Sep 28 2017 13:09:59)
    System configuration:
        Boards:      3
           1 x Proc. + I/O + Memory
           2 x NVM devices (Intel SSDPED1K375GAQ)
        Processors:  2, Cores: 16, Threads: 32
            Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz Stepping 01
        Memory (MB): 713472 (of 977450), Cache: 251416, Private: 12562
           1 x 249088MB   [262036/   678/12270]
           1 x 232192MB   [357707/125369/  146]  82:00.0#1
           1 x 232192MB   [357707/125369/  146]  83:00.0#1
    
    * cpu: rename some variables (pkg => node)
    
    * cpu: Use %v not %q in log.Debugf() format strings
    a8d7d110
Loading
Loading
@@ -111,7 +111,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
_, cpuname := filepath.Split(cpu)
 
if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing cpufreq", cpu)
log.Debugf("CPU %v is missing cpufreq", cpu)
} else {
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
Loading
Loading
@@ -132,7 +132,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
}
 
if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing thermal_throttle", cpu)
log.Debugf("CPU %v is missing thermal_throttle", cpu)
continue
}
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
Loading
Loading
@@ -141,36 +141,43 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
}
 
pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
if err != nil {
return err
}
 
// package/node loop
for _, pkg := range pkgs {
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) {
log.Debugf("package %q is missing cpulist", pkg)
// package / NUMA node loop
for _, node := range nodes {
if _, err := os.Stat(filepath.Join(node, "cpulist")); os.IsNotExist(err) {
log.Debugf("NUMA node %v is missing cpulist", node)
continue
}
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist"))
cpulist, err := ioutil.ReadFile(filepath.Join(node, "cpulist"))
if err != nil {
log.Debugf("could not read cpulist of package %q", pkg)
log.Debugf("could not read cpulist of NUMA node %v", node)
return err
}
// cpulist example of one package/node with HT: "0-11,24-35"
line := strings.Split(string(cpulist), "\n")[0]
if line == "" {
// Skip processor-less (memory-only) NUMA nodes.
// E.g. RAM expansion with Intel Optane Drive(s) using
// Intel Memory Drive Technology (IMDT).
log.Debugf("skipping processor-less (memory-only) NUMA node %v", node)
continue
}
firstCPU := strings.FieldsFunc(line, func(r rune) bool {
return r == '-' || r == ','
})[0]
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU)
if _, err := os.Stat(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Node %v CPU %v is missing package_throttle", node, firstCPU)
continue
}
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
if value, err = readUintFromFile(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
return err
}
pkgno := digitRegexp.FindAllString(pkg, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno)
nodeno := digitRegexp.FindAllString(node, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), nodeno)
}
 
return nil
Loading
Loading
Loading
Loading
@@ -150,6 +150,14 @@ Mode: 644
Path: sys/bus/node/devices/node0/cpulist
Lines: 1
0-3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node1
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node1/cpulist
Lines: 1
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class
Loading
Loading
Loading
Loading
@@ -24,6 +24,7 @@ import (
"regexp"
"strconv"
"strings"
"syscall"
 
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
Loading
Loading
@@ -61,8 +62,8 @@ func cleanMetricName(name string) string {
}
 
func addValueFile(data map[string]map[string]string, sensor string, prop string, file string) {
raw, e := ioutil.ReadFile(file)
if e != nil {
raw, err := sysReadFile(file)
if err != nil {
return
}
value := strings.Trim(string(raw), "\n")
Loading
Loading
@@ -74,6 +75,28 @@ func addValueFile(data map[string]map[string]string, sensor string, prop string,
data[sensor][prop] = value
}
 
// sysReadFile is a simplified ioutil.ReadFile that invokes syscall.Read directly.
func sysReadFile(file string) ([]byte, error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close()
// On some machines, hwmon drivers are broken and return EAGAIN. This causes
// Go's ioutil.ReadFile implementation to poll forever.
//
// Since we either want to read data or bail immediately, do the simplest
// possible read using syscall directly.
b := make([]byte, 128)
n, err := syscall.Read(int(f.Fd()), b)
if err != nil {
return nil, err
}
return b[:n], nil
}
// explodeSensorFilename splits a sensor name into <type><num>_<property>.
func explodeSensorFilename(filename string) (ok bool, sensorType string, sensorNum int, sensorProperty string) {
matches := hwmonFilenameFormat.FindStringSubmatch(filename)
Loading
Loading