ACK: [PATCH 08/11] fwts: Use linux perf counters for cpu benchmarking

Colin Ian King colin.king at canonical.com
Tue May 26 09:40:48 UTC 2015


On 21/05/15 10:34, Jeremy Kerr wrote:
> Currently, we use a loop counter to measure cpu performance. There are a
> couple of drawbacks of this:
> 
> 1) the numbers are fairly arbitrary, and don't reflect any external
> values; and
> 
> 2) the results are subject to peturbation from other tasks.
> 
> This change uses the linux perf counters to measure actual CPU cycles,
> where available. This means we get numbers that are meaningful, and will
> we'll get the performance value even if our process has been scheduled
> off the CPU.
> 
> Now, the new results should match the actual cpu frequencies:
> 
>   CPU 0: 16 CPU frequency steps supported.
>    Frequency | Relative Speed |  Cycles    | Bogo loops
>   -----------+----------------+------------+-----------
>    1.200 GHz |      36.5 %    | 1197262295 |    114643
>    1.300 GHz |      39.5 %    | 1297048295 |    124214
>    1.400 GHz |      42.6 %    | 1396813066 |    133889
>    1.500 GHz |      45.6 %    | 1496592732 |    143456
>    1.600 GHz |      48.6 %    | 1596393751 |    150124
>    1.700 GHz |      51.7 %    | 1696137977 |    162350
>    1.800 GHz |      54.7 %    | 1795918438 |    172071
>    1.900 GHz |      57.8 %    | 1895685372 |    181249
>    2.000 GHz |      60.8 %    | 1995453204 |    191176
>    2.100 GHz |      63.8 %    | 2095225345 |    200753
>    2.200 GHz |      66.9 %    | 2194993645 |    210282
>    2.300 GHz |      69.9 %    | 2294780535 |    219945
>    2.400 GHz |      73.0 %    | 2394544770 |    229664
>    2.500 GHz |      76.0 %    | 2494315101 |    239121
>    2.600 GHz |      79.0 %    | 2594055116 |    248536
>    2.601 GHz |     100.0 %    | 3281891417 |    314546
> 
>   CPU 0 performance scaling OK
> 
> Because we still want benchmarks to work when perf counters are not
> available, we still measure the loop count. This means we need to
> represent test results as a struct, with cycles and loops members.
> 
> When tests need a scalar performance value,
> fwts_cpu_benchmark_get_best_result will return the most accurate result
> measured.
> 
> Signed-off-by: Jeremy Kerr <jk at ozlabs.org>
> 
> ---
>  src/acpi/cstates/cstates.c |    4 -
>  src/cpu/cpufreq/cpufreq.c  |   63 +++++++++++++--------
>  src/lib/include/fwts_cpu.h |   11 +++
>  src/lib/src/fwts_cpu.c     |  107 ++++++++++++++++++++++++++++++++++---
>  4 files changed, 153 insertions(+), 32 deletions(-)
> 
> diff --git a/src/acpi/cstates/cstates.c b/src/acpi/cstates/cstates.c
> index b58f15c..42c634c 100644
> --- a/src/acpi/cstates/cstates.c
> +++ b/src/acpi/cstates/cstates.c
> @@ -133,9 +133,9 @@ static void do_cpu(fwts_framework *fw, int nth, int cpus, int cpu, char *path)
>  		if ((i & 7) < 4)
>  			sleep(1);
>  		else {
> -			uint64_t loop_count;
> +			fwts_cpu_benchmark_result result;
>  
> -			if (fwts_cpu_performance(fw, cpu, &loop_count) != FWTS_OK) {
> +			if (fwts_cpu_benchmark(fw, cpu, &result) != FWTS_OK) {
>  				fwts_failed(fw, LOG_LEVEL_HIGH, "CPUFailedPerformance",
>  					"Could not determine the CPU performance, this "
>  					"may be due to not being able to get or set the "
> diff --git a/src/cpu/cpufreq/cpufreq.c b/src/cpu/cpufreq/cpufreq.c
> index 979a9e1..e409375 100644
> --- a/src/cpu/cpufreq/cpufreq.c
> +++ b/src/cpu/cpufreq/cpufreq.c
> @@ -45,7 +45,7 @@
>  
>  typedef struct {
>  	uint64_t	Hz;
> -	uint64_t	speed;
> +	fwts_cpu_benchmark_result perf;
>  } fwts_cpu_freq;
>  
>  struct cpu {
> @@ -149,6 +149,7 @@ static int get_performance_repeat(
>  	const int type,
>  	uint64_t *retval)
>  {
> +	fwts_cpu_benchmark_result result;
>  	int i;
>  
>  	uint64_t max = 0;
> @@ -159,9 +160,10 @@ static int get_performance_repeat(
>  	for (i = 0; i < count; i++) {
>  		uint64_t temp;
>  
> -		if (fwts_cpu_performance(fw, cpu->idx, &temp) != FWTS_OK)
> +		if (fwts_cpu_benchmark(fw, cpu->idx, &result) != FWTS_OK)
>  			return FWTS_ERROR;
>  
> +		temp = fwts_cpu_benchmark_best_result(&result);
>  		if (temp) {
>  			if (temp < min)
>  				min = temp;
> @@ -276,17 +278,19 @@ static int test_one_cpu_performance(fwts_framework *fw, struct cpu *cpu,
>  	int i;
>  
>  	for (i = 0; i < cpu->n_freqs; i++) {
> +		uint64_t perf;
> +
>  		cpu_set_frequency(fw, cpu, cpu->freqs[i].Hz);
>  
> -		if (fwts_cpu_performance(fw, cpu->idx, &cpu->freqs[i].speed)
> +		if (fwts_cpu_benchmark(fw, cpu->idx, &cpu->freqs[i].perf)
>  				!= FWTS_OK) {
>  			fwts_log_error(fw, "Failed to get CPU performance for "
>  				"CPU frequency %" PRId64 " Hz.",
>  				cpu->freqs[i].Hz);
> -			cpu->freqs[i].speed = 0;
>  		}
> -		if (cpu->freqs[i].speed > cpu_top_perf)
> -			cpu_top_perf = cpu->freqs[i].speed;
> +		perf = fwts_cpu_benchmark_best_result(&cpu->freqs[i].perf);
> +		if (perf > cpu_top_perf)
> +			cpu_top_perf = perf;
>  
>  		fwts_progress(fw, (100 * ((cpu_idx * cpu->n_freqs) + i)) /
>  				(n_online_cpus * cpu->n_freqs));
> @@ -294,37 +298,46 @@ static int test_one_cpu_performance(fwts_framework *fw, struct cpu *cpu,
>  
>  	fwts_log_info(fw, "CPU %d: %i CPU frequency steps supported.",
>  			cpu->idx, cpu->n_freqs);
> -	fwts_log_info_verbatum(fw, " Frequency | Relative Speed | Bogo loops");
> -	fwts_log_info_verbatum(fw, "-----------+----------------+-----------");
> +	fwts_log_info_verbatum(fw,
> +		" Frequency | Relative Speed |  Cycles    | Bogo loops");
> +	fwts_log_info_verbatum(fw,
> +		"-----------+----------------+------------+-----------");
>  	for (i = 0; i < cpu->n_freqs; i++) {
>  		char *turbo = "";
>  #ifdef FWTS_ARCH_INTEL
>  		if ((i == 0) && (cpu->n_freqs > 1) &&
> -		    (hz_almost_equal(cpu->freqs[i].Hz, cpu->freqs[i + 1].Hz)))
> +			(hz_almost_equal(cpu->freqs[i].Hz, cpu->freqs[i + 1].Hz)))
>  			turbo = " (Turbo Boost)";
>  #endif
> -		fwts_log_info_verbatum(fw, "%10s |     %5.1f %%    | %9" PRIu64
> -				"%s",
> +		uint64_t perf = fwts_cpu_benchmark_best_result(
> +				&cpu->freqs[i].perf);
> +		fwts_log_info_verbatum(fw,
> +				"%10s |     %5.1f %%    "
> +				"| %10" PRIu64 " | %9" PRIu64 "%s",
>  			hz_to_human(cpu->freqs[i].Hz),
> -			100.0 * cpu->freqs[i].speed / cpu_top_perf,
> -			cpu->freqs[i].speed, turbo);
> +			100.0 * perf / cpu_top_perf,
> +			cpu->freqs[i].perf.cycles,
> +			cpu->freqs[i].perf.loops,
> +			turbo);
>  	}
>  
>  	fwts_log_nl(fw);
>  
>  	/* now check for increasing performance */
>  	for (i = 0; i < cpu->n_freqs - 1; i++) {
> -		if (cpu->freqs[i].speed <= cpu->freqs[i+1].speed)
> +		uint64_t perf, last_perf;
> +
> +		last_perf = fwts_cpu_benchmark_best_result(&cpu->freqs[i].perf);
> +		perf = fwts_cpu_benchmark_best_result(&cpu->freqs[i+1].perf);
> +		if (last_perf <= perf)
>  			continue;
>  
>  		fwts_log_warning(fw,
>  			"Supposedly higher frequency %s is slower (%" PRIu64
> -			" bogo loops) than frequency %s (%" PRIu64
> -			" bogo loops) on CPU %i.",
> -			hz_to_human(cpu->freqs[i+1].Hz),
> -			cpu->freqs[i+1].speed,
> -			hz_to_human(cpu->freqs[i].Hz),
> -			cpu->freqs[i].speed,
> +			") than frequency %s (%" PRIu64
> +			") on CPU %i.",
> +			hz_to_human(cpu->freqs[i+1].Hz), perf,
> +			hz_to_human(cpu->freqs[i].Hz), last_perf,
>  			cpu->idx);
>  		return FWTS_ERROR;
>  	}
> @@ -459,6 +472,7 @@ static int cpufreq_test_sw_any(fwts_framework *fw)
>  {
>  	uint64_t low_perf, high_perf, newhigh_perf;
>  	int i, j, rc, n_tests, performed_tests;
> +	fwts_cpu_benchmark_result result;
>  	bool ok;
>  
>  	rc = sw_tests_possible(fw);
> @@ -478,12 +492,13 @@ static int cpufreq_test_sw_any(fwts_framework *fw)
>  		cpu_set_lowest_frequency(fw, &cpus[i]);
>  
>  	/* assume that all processors have the same low performance */
> -	if (fwts_cpu_performance(fw, cpus[0].idx, &low_perf) != FWTS_OK) {
> +	if (fwts_cpu_benchmark(fw, cpus[0].idx, &result) != FWTS_OK) {
>  		fwts_failed(fw, LOG_LEVEL_MEDIUM,
>  			"CPUFreqCPsSetToSW_ANYGetPerf",
>  			"Cannot get CPU performance.");
>  		return FWTS_ERROR;
>  	}
> +	low_perf = fwts_cpu_benchmark_best_result(&result);
>  
>  	ok = true;
>  
> @@ -497,12 +512,13 @@ static int cpufreq_test_sw_any(fwts_framework *fw)
>  		if (!cpu->online)
>  			continue;
>  
> -		if (fwts_cpu_performance(fw, cpu->idx, &high_perf) != FWTS_OK) {
> +		if (fwts_cpu_benchmark(fw, cpu->idx, &result) != FWTS_OK) {
>  			fwts_failed(fw, LOG_LEVEL_MEDIUM,
>  				"CPUFreqCPsSetToSW_ANYGetPerf",
>  				"Cannot get CPU performance.");
>  			return FWTS_ERROR;
>  		}
> +		high_perf = fwts_cpu_benchmark_best_result(&result);
>  
>  		performed_tests++;
>  		fwts_progress(fw, 100 * performed_tests/n_tests);
> @@ -514,13 +530,14 @@ static int cpufreq_test_sw_any(fwts_framework *fw)
>  		for (j = 0; j < num_cpus; j++)
>  			if (i != j)
>  				cpu_set_lowest_frequency(fw, &cpus[j]);
> -		if (fwts_cpu_performance(fw, cpu->idx, &newhigh_perf)
> +		if (fwts_cpu_benchmark(fw, cpu->idx, &result)
>  				!= FWTS_OK) {
>  			fwts_failed(fw, LOG_LEVEL_MEDIUM,
>  				"CPUFreqCPsSetToSW_ANYGetPerf",
>  				"Cannot get CPU performance.");
>  			return FWTS_ERROR;
>  		}
> +		newhigh_perf = fwts_cpu_benchmark_best_result(&result);
>  		if ((high_perf > newhigh_perf) &&
>  		    (high_perf - newhigh_perf > (high_perf - low_perf)/4) &&
>  		    (high_perf - low_perf > 20)) {
> diff --git a/src/lib/include/fwts_cpu.h b/src/lib/include/fwts_cpu.h
> index b132697..7162316 100644
> --- a/src/lib/include/fwts_cpu.h
> +++ b/src/lib/include/fwts_cpu.h
> @@ -33,6 +33,12 @@ typedef struct cpuinfo_x86 {
>  	char *flags;		/* String containing flags */
>  } fwts_cpuinfo_x86;
>  
> +typedef struct cpu_benchmark_result {
> +	bool		cycles_valid;
> +	uint64_t	loops;
> +	uint64_t	cycles;
> +} fwts_cpu_benchmark_result;
> +
>  int fwts_cpu_readmsr(const int cpu, const uint32_t reg, uint64_t *val);
>  
>  int fwts_cpu_is_Intel(bool *is_intel);
> @@ -46,6 +52,9 @@ int fwts_cpu_enumerate(void);
>  int fwts_cpu_consume(const int seconds);
>  int fwts_cpu_consume_start(void);
>  void fwts_cpu_consume_complete(void);
> -int fwts_cpu_performance(fwts_framework *fw, const int cpu, uint64_t *loop_count);
> +int fwts_cpu_benchmark(fwts_framework *fw, const int cpu,
> +		fwts_cpu_benchmark_result *result);
> +
> +uint64_t fwts_cpu_benchmark_best_result(fwts_cpu_benchmark_result *res);
>  
>  #endif
> diff --git a/src/lib/src/fwts_cpu.c b/src/lib/src/fwts_cpu.c
> index 75b1100..a7cfd3d 100644
> --- a/src/lib/src/fwts_cpu.c
> +++ b/src/lib/src/fwts_cpu.c
> @@ -26,8 +26,10 @@
>  #include <limits.h>
>  #include <string.h>
>  #include <dirent.h>
> +#include <sys/ioctl.h>
>  #include <sys/stat.h>
>  #include <sys/types.h>
> +#include <sys/syscall.h>
>  #include <sys/wait.h>
>  #include <signal.h>
>  #include <fcntl.h>
> @@ -35,6 +37,8 @@
>  #include <sched.h>
>  #include <time.h>
>  
> +#include <linux/perf_event.h>
> +
>  #include "fwts_types.h"
>  #include "fwts_cpu.h"
>  #include "fwts_pipeio.h"
> @@ -312,20 +316,73 @@ static void fwts_cpu_burn_cycles(void)
>  	}
>  }
>  
> +static int perf_setup_counter(int cpu)
> +{
> +	struct perf_event_attr attr;
> +	int fd;
> +
> +	memset(&attr, 0, sizeof(attr));
> +	attr.type = PERF_TYPE_HARDWARE;
> +	attr.config = PERF_COUNT_HW_CPU_CYCLES;
> +	attr.disabled = 1;
> +	attr.size = sizeof(attr);
> +
> +	fd = syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
> +	return fd;
> +}
> +
> +static int perf_start_counter(int fd)
> +{
> +	int rc;
> +
> +	rc = ioctl(fd, PERF_EVENT_IOC_ENABLE);
> +	return rc == 0 ? FWTS_OK : FWTS_ERROR;
> +}
> +
> +static int perf_stop_counter(int fd)
> +{
> +	int rc;
> +
> +	rc = ioctl(fd, PERF_EVENT_IOC_DISABLE);
> +	return rc == 0 ? FWTS_OK : FWTS_ERROR;
> +}
> +
> +static int perf_read_counter(int fd, unsigned long long *result)
> +{
> +	unsigned long long buf;
> +	int rc;
> +
> +	rc = read(fd, &buf, sizeof(buf));
> +	if (rc == sizeof(buf)) {
> +		*result = buf;
> +		rc = FWTS_OK;
> +	} else {
> +		rc = FWTS_ERROR;
> +	}
> +
> +	close(fd);
> +	return rc;
> +}
> +
>  /*
> - *  fwts_cpu_performance()
> + *  fwts_cpu_benchmark()
>   *
>   */
> -int fwts_cpu_performance(
> +int fwts_cpu_benchmark(
>  	fwts_framework *fw,
>  	const int cpu,		/* CPU we want to measure performance */
> -	uint64_t *loop_count)	/* Returned measure of bogo compute power */
> +	fwts_cpu_benchmark_result *result)
>  {
> +	unsigned long long perfctr_result;
> +	fwts_cpu_benchmark_result tmp;
>  	cpu_set_t mask, oldset;
> +	int perfctr, ncpus, rc;
> +	static bool warned;
>  	time_t current;
> -	int ncpus = fwts_cpu_enumerate();
> +	bool perf_ok;
>  
> -	*loop_count = 0;
> +	ncpus = fwts_cpu_enumerate();
> +	memset(&tmp, 0, sizeof(tmp));
>  
>  	if (ncpus == FWTS_ERROR)
>  		return FWTS_ERROR;
> @@ -333,6 +390,20 @@ int fwts_cpu_performance(
>  	if (cpu < 0 || cpu > ncpus)
>  		return FWTS_ERROR;
>  
> +	/* setup perf counter */
> +	perf_ok = true;
> +	perfctr = perf_setup_counter(cpu);
> +	if (perfctr < 0) {
> +		if (!warned) {
> +			fwts_log_warning(fw, "Can't use linux performance "
> +					"counters (perf), falling back to "
> +					"relative measurements");
> +			warned = true;
> +		}
> +		perf_ok = false;
> +	}
> +
> +
>  	/* Pin to the specified CPU */
>  
>  	if (sched_getaffinity(0, sizeof(oldset), &oldset) < 0) {
> @@ -352,6 +423,9 @@ int fwts_cpu_performance(
>  	while (current == time(NULL))
>  		sched_yield();
>  
> +	if (perf_ok)
> +		perf_start_counter(perfctr);
> +
>  	current = time(NULL);
>  
>  	/*
> @@ -360,17 +434,38 @@ int fwts_cpu_performance(
>  	 */
>  	do {
>  		fwts_cpu_burn_cycles();
> -		(*loop_count)++;
> +		tmp.loops++;
>  	} while (current == time(NULL));
>  
> +	if (perf_ok)
> +		perf_stop_counter(perfctr);
> +
>  	if (sched_setaffinity(0, sizeof(oldset), &oldset) < 0) {
>  		fwts_log_error(fw, "Cannot restore old CPU affinity settings.");
>  		return FWTS_ERROR;
>  	}
>  
> +	if (perf_ok) {
> +		rc = perf_read_counter(perfctr, &perfctr_result);
> +		if (rc == FWTS_OK) {
> +			tmp.cycles = perfctr_result;
> +			tmp.cycles_valid = true;
> +		} else {
> +			fwts_log_warning(fw, "failed to read perf counters");
> +		}
> +
> +	}
> +
> +	*result = tmp;
> +
>  	return FWTS_OK;
>  }
>  
> +uint64_t fwts_cpu_benchmark_best_result(fwts_cpu_benchmark_result *res)
> +{
> +	return res->cycles_valid ? res->cycles : res->loops;
> +}
> +
>  /*
>   *  fwts_cpu_consume_cycles()
>   *	eat up CPU cycles
> 

Acked-by: Colin Ian King <colin.king at canonical.com




More information about the fwts-devel mailing list