Changes in uspace/app/perf/perf.c [b4a4ad94:e131833c] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/app/perf/perf.c
rb4a4ad94 re131833c 1 1 /* 2 2 * Copyright (c) 2018 Jiri Svoboda 3 * Copyright (c) 2018 Vojtech Horky4 3 * All rights reserved. 5 4 * … … 35 34 */ 36 35 37 #include <assert.h>38 #include <math.h>39 36 #include <stdio.h> 40 37 #include <stddef.h> 41 38 #include <stdlib.h> 42 39 #include <str.h> 43 #include <time.h>44 #include <errno.h>45 #include <perf.h>46 #include <types/casting.h>47 40 #include "perf.h" 48 #include "benchlist.h"49 41 50 #define MIN_DURATION_SECS 10 51 #define NUM_SAMPLES 10 52 #define MAX_ERROR_STR_LENGTH 1024 53 54 static void short_report(stopwatch_t *stopwatch, int run_index, 55 benchmark_t *bench, uint64_t workload_size) 56 { 57 usec_t duration_usec = NSEC2USEC(stopwatch_get_nanos(stopwatch)); 58 59 printf("Completed %" PRIu64 " operations in %llu us", 60 workload_size, duration_usec); 61 if (duration_usec > 0) { 62 double nanos = stopwatch_get_nanos(stopwatch); 63 double thruput = (double) workload_size / (nanos / 1000000000.0l); 64 printf(", %.0f ops/s.\n", thruput); 65 } else { 66 printf(".\n"); 67 } 68 } 69 70 /* 71 * This is a temporary solution until we have proper sqrt() implementation 72 * in libmath. 73 * 74 * The algorithm uses Babylonian method [1]. 75 * 76 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method 77 */ 78 static double estimate_square_root(double value, double precision) 79 { 80 double estimate = 1.; 81 double prev_estimate = estimate + 10 * precision; 82 83 while (fabs(estimate - prev_estimate) > precision) { 84 prev_estimate = estimate; 85 estimate = (prev_estimate + value / prev_estimate) / 2.; 86 } 87 88 return estimate; 89 } 90 91 /* 92 * Compute available statistics from given stopwatches. 93 * 94 * We compute normal mean for average duration of the workload and geometric 95 * mean for average thruput. Note that geometric mean is necessary to compute 96 * average throughput correctly - consider the following example: 97 * - we run always 60 operations, 98 * - first run executes in 30 s (i.e. 2 ops/s) 99 * - and second one in 10 s (6 ops/s). 100 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we 101 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average 102 * throughput is 3 ops/s (which is exactly what geometric mean means). 103 * 104 */ 105 static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 106 uint64_t workload_size, double precision, double *out_duration_avg, 107 double *out_duration_sigma, double *out_thruput_avg) 108 { 109 double inv_thruput_sum = 0.0; 110 double nanos_sum = 0.0; 111 double nanos_sum2 = 0.0; 112 113 for (size_t i = 0; i < stopwatch_count; i++) { 114 double nanos = stopwatch_get_nanos(&stopwatch[i]); 115 double thruput = (double) workload_size / nanos; 116 117 inv_thruput_sum += 1.0 / thruput; 118 nanos_sum += nanos; 119 nanos_sum2 += nanos * nanos; 120 } 121 *out_duration_avg = nanos_sum / stopwatch_count; 122 double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) / 123 ((double) stopwatch_count - 1); 124 // FIXME: implement sqrt properly 125 *out_duration_sigma = estimate_square_root(sigma2, precision); 126 *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count); 127 } 128 129 static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 130 benchmark_t *bench, uint64_t workload_size) 131 { 132 double duration_avg, duration_sigma, thruput_avg; 133 compute_stats(stopwatch, stopwatch_count, workload_size, 0.001, 134 &duration_avg, &duration_sigma, &thruput_avg); 135 136 printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); " 137 "%.0f ops/s; Samples: %zu\n", 138 workload_size, duration_avg / 1000.0, duration_sigma / 1000.0, 139 thruput_avg * 1000000000.0, stopwatch_count); 140 } 42 benchmark_t benchmarks[] = { 43 #include "ipc/ns_ping.def" 44 #include "ipc/ping_pong.def" 45 #include "malloc/malloc1.def" 46 #include "malloc/malloc2.def" 47 { NULL, NULL, NULL } 48 }; 141 49 142 50 static bool run_benchmark(benchmark_t *bench) 143 51 { 144 printf("Warm up and determine workload size...\n"); 52 /* Execute the benchmarl */ 53 const char *ret = bench->entry(); 145 54 146 char *error_msg = malloc(MAX_ERROR_STR_LENGTH + 1); 147 if (error_msg == NULL) { 148 printf("Out of memory!\n"); 149 return false; 150 } 151 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, ""); 152 153 bool ret = true; 154 155 if (bench->setup != NULL) { 156 ret = bench->setup(error_msg, MAX_ERROR_STR_LENGTH); 157 if (!ret) { 158 goto leave_error; 159 } 55 if (ret == NULL) { 56 printf("\nBenchmark completed\n"); 57 return true; 160 58 } 161 59 162 /* 163 * Find workload size that is big enough to last few seconds. 164 * We also check that uint64_t is big enough. 165 */ 166 uint64_t workload_size = 0; 167 for (size_t bits = 0; bits <= 64; bits++) { 168 if (bits == 64) { 169 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "Workload too small even for 1 << 63"); 170 goto leave_error; 171 } 172 workload_size = ((uint64_t) 1) << bits; 173 174 stopwatch_t stopwatch = STOPWATCH_INITIALIZE_STATIC; 175 176 bool ok = bench->entry(&stopwatch, workload_size, 177 error_msg, MAX_ERROR_STR_LENGTH); 178 if (!ok) { 179 goto leave_error; 180 } 181 short_report(&stopwatch, -1, bench, workload_size); 182 183 nsec_t duration = stopwatch_get_nanos(&stopwatch); 184 if (duration > SEC2NSEC(MIN_DURATION_SECS)) { 185 break; 186 } 187 } 188 189 printf("Workload size set to %" PRIu64 ", measuring %d samples.\n", workload_size, NUM_SAMPLES); 190 191 stopwatch_t *stopwatch = calloc(NUM_SAMPLES, sizeof(stopwatch_t)); 192 if (stopwatch == NULL) { 193 snprintf(error_msg, MAX_ERROR_STR_LENGTH, "failed allocating memory"); 194 goto leave_error; 195 } 196 for (int i = 0; i < NUM_SAMPLES; i++) { 197 stopwatch_init(&stopwatch[i]); 198 199 bool ok = bench->entry(&stopwatch[i], workload_size, 200 error_msg, MAX_ERROR_STR_LENGTH); 201 if (!ok) { 202 free(stopwatch); 203 goto leave_error; 204 } 205 short_report(&stopwatch[i], i, bench, workload_size); 206 } 207 208 summary_stats(stopwatch, NUM_SAMPLES, bench, workload_size); 209 printf("\nBenchmark completed\n"); 210 211 free(stopwatch); 212 213 goto leave; 214 215 leave_error: 216 printf("Error: %s\n", error_msg); 217 ret = false; 218 219 leave: 220 if (bench->teardown != NULL) { 221 bool ok = bench->teardown(error_msg, MAX_ERROR_STR_LENGTH); 222 if (!ok) { 223 printf("Error: %s\n", error_msg); 224 ret = false; 225 } 226 } 227 228 free(error_msg); 229 230 return ret; 60 printf("\n%s\n", ret); 61 return false; 231 62 } 232 63 233 64 static int run_benchmarks(void) 234 65 { 235 unsigned int count_ok = 0; 236 unsigned int count_fail = 0; 66 benchmark_t *bench; 67 unsigned int i = 0; 68 unsigned int n = 0; 237 69 238 70 char *failed_names = NULL; … … 240 72 printf("\n*** Running all benchmarks ***\n\n"); 241 73 242 for ( size_t it = 0; it < benchmark_count; it++) {243 printf("%s (%s)\n", bench marks[it]->name, benchmarks[it]->desc);244 if (run_benchmark(bench marks[it])) {245 count_ok++;74 for (bench = benchmarks; bench->name != NULL; bench++) { 75 printf("%s (%s)\n", bench->name, bench->desc); 76 if (run_benchmark(bench)) { 77 i++; 246 78 continue; 247 79 } 248 80 249 81 if (!failed_names) { 250 failed_names = str_dup(bench marks[it]->name);82 failed_names = str_dup(bench->name); 251 83 } else { 252 84 char *f = NULL; 253 asprintf(&f, "%s, %s", failed_names, bench marks[it]->name);85 asprintf(&f, "%s, %s", failed_names, bench->name); 254 86 if (!f) { 255 87 printf("Out of memory.\n"); … … 259 91 failed_names = f; 260 92 } 261 count_fail++;93 n++; 262 94 } 263 95 264 printf("\nCompleted, %u benchmarks run, %u succeeded.\n", 265 count_ok + count_fail, count_ok); 96 printf("\nCompleted, %u benchmarks run, %u succeeded.\n", i + n, i); 266 97 if (failed_names) 267 98 printf("Failed benchmarks: %s\n", failed_names); 268 99 269 return count_fail;100 return n; 270 101 } 271 102 … … 273 104 { 274 105 size_t len = 0; 275 for (size_t i = 0; i < benchmark_count; i++) {276 size_t len_now = str_length(benchmarks[i]->name);277 if ( len_now> len)278 len = len_now;106 benchmark_t *bench; 107 for (bench = benchmarks; bench->name != NULL; bench++) { 108 if (str_length(bench->name) > len) 109 len = str_length(bench->name); 279 110 } 280 111 281 assert(can_cast_size_t_to_int(len) && "benchmark name length overflow"); 112 unsigned int _len = (unsigned int) len; 113 if ((_len != len) || (((int) _len) < 0)) { 114 printf("Command length overflow\n"); 115 return; 116 } 282 117 283 for ( size_t i = 0; i < benchmark_count; i++)284 printf("%-*s %s\n", (int) len, benchmarks[i]->name, benchmarks[i]->desc);118 for (bench = benchmarks; bench->name != NULL; bench++) 119 printf("%-*s %s\n", _len, bench->name, bench->desc); 285 120 286 printf("%-*s Run all benchmarks\n", (int)len, "*");121 printf("%-*s Run all benchmarks\n", _len, "*"); 287 122 } 288 123 … … 300 135 } 301 136 302 for (size_t i = 0; i < benchmark_count; i++) { 303 if (str_cmp(argv[1], benchmarks[i]->name) == 0) { 304 return (run_benchmark(benchmarks[i]) ? 0 : -1); 137 benchmark_t *bench; 138 for (bench = benchmarks; bench->name != NULL; bench++) { 139 if (str_cmp(argv[1], bench->name) == 0) { 140 return (run_benchmark(bench) ? 0 : -1); 305 141 } 306 142 }
Note:
See TracChangeset
for help on using the changeset viewer.