Changes in uspace/app/perf/perf.c [e131833c:b4a4ad94] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/app/perf/perf.c
re131833c rb4a4ad94 1 1 /* 2 2 * Copyright (c) 2018 Jiri Svoboda 3 * Copyright (c) 2018 Vojtech Horky 3 4 * All rights reserved. 4 5 * … … 34 35 */ 35 36 37 #include <assert.h> 38 #include <math.h> 36 39 #include <stdio.h> 37 40 #include <stddef.h> 38 41 #include <stdlib.h> 39 42 #include <str.h> 43 #include <time.h> 44 #include <errno.h> 45 #include <perf.h> 46 #include <types/casting.h> 40 47 #include "perf.h" 41 42 benchmark_t benchmarks[] = { 43 #include "ipc/ns_ping.def" 44 #include "ipc/ping_pong.def" 45 #include "malloc/malloc1.def" 46 #include "malloc/malloc2.def" 47 { NULL, NULL, NULL } 48 }; 48 #include "benchlist.h" 49 50 #define MIN_DURATION_SECS 10 51 #define NUM_SAMPLES 10 52 #define MAX_ERROR_STR_LENGTH 1024 53 54 static void short_report(stopwatch_t *stopwatch, int run_index, 55 benchmark_t *bench, uint64_t workload_size) 56 { 57 usec_t duration_usec = NSEC2USEC(stopwatch_get_nanos(stopwatch)); 58 59 printf("Completed %" PRIu64 " operations in %llu us", 60 workload_size, duration_usec); 61 if (duration_usec > 0) { 62 double nanos = stopwatch_get_nanos(stopwatch); 63 double thruput = (double) workload_size / (nanos / 1000000000.0l); 64 printf(", %.0f ops/s.\n", thruput); 65 } else { 66 printf(".\n"); 67 } 68 } 69 70 /* 71 * This is a temporary solution until we have proper sqrt() implementation 72 * in libmath. 73 * 74 * The algorithm uses Babylonian method [1]. 75 * 76 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method 77 */ 78 static double estimate_square_root(double value, double precision) 79 { 80 double estimate = 1.; 81 double prev_estimate = estimate + 10 * precision; 82 83 while (fabs(estimate - prev_estimate) > precision) { 84 prev_estimate = estimate; 85 estimate = (prev_estimate + value / prev_estimate) / 2.; 86 } 87 88 return estimate; 89 } 90 91 /* 92 * Compute available statistics from given stopwatches. 93 * 94 * We compute normal mean for average duration of the workload and geometric 95 * mean for average thruput. Note that geometric mean is necessary to compute 96 * average throughput correctly - consider the following example: 97 * - we run always 60 operations, 98 * - first run executes in 30 s (i.e. 2 ops/s) 99 * - and second one in 10 s (6 ops/s). 100 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we 101 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average 102 * throughput is 3 ops/s (which is exactly what geometric mean means). 103 * 104 */ 105 static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 106 uint64_t workload_size, double precision, double *out_duration_avg, 107 double *out_duration_sigma, double *out_thruput_avg) 108 { 109 double inv_thruput_sum = 0.0; 110 double nanos_sum = 0.0; 111 double nanos_sum2 = 0.0; 112 113 for (size_t i = 0; i < stopwatch_count; i++) { 114 double nanos = stopwatch_get_nanos(&stopwatch[i]); 115 double thruput = (double) workload_size / nanos; 116 117 inv_thruput_sum += 1.0 / thruput; 118 nanos_sum += nanos; 119 nanos_sum2 += nanos * nanos; 120 } 121 *out_duration_avg = nanos_sum / stopwatch_count; 122 double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) / 123 ((double) stopwatch_count - 1); 124 // FIXME: implement sqrt properly 125 *out_duration_sigma = estimate_square_root(sigma2, precision); 126 *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count); 127 } 128 129 static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count, 130 benchmark_t *bench, uint64_t workload_size) 131 { 132 double duration_avg, duration_sigma, thruput_avg; 133 compute_stats(stopwatch, stopwatch_count, workload_size, 0.001, 134 &duration_avg, &duration_sigma, &thruput_avg); 135 136 printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); " 137 "%.0f ops/s; Samples: %zu\n", 138 workload_size, duration_avg / 1000.0, duration_sigma / 1000.0, 139 thruput_avg * 1000000000.0, stopwatch_count); 140 } 49 141 50 142 static bool run_benchmark(benchmark_t *bench) 51 143 { 52 /* Execute the benchmarl */ 53 const char *ret = bench->entry(); 54 55 if (ret == NULL) { 56 printf("\nBenchmark completed\n"); 57 return true; 58 } 59 60 printf("\n%s\n", ret); 61 return false; 144 printf("Warm up and determine workload size...\n"); 145 146 char *error_msg = malloc(MAX_ERROR_STR_LENGTH + 1); 147 if (error_msg == NULL) { 148 printf("Out of memory!\n"); 149 return false; 150 } 151 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, ""); 152 153 bool ret = true; 154 155 if (bench->setup != NULL) { 156 ret = bench->setup(error_msg, MAX_ERROR_STR_LENGTH); 157 if (!ret) { 158 goto leave_error; 159 } 160 } 161 162 /* 163 * Find workload size that is big enough to last few seconds. 164 * We also check that uint64_t is big enough. 165 */ 166 uint64_t workload_size = 0; 167 for (size_t bits = 0; bits <= 64; bits++) { 168 if (bits == 64) { 169 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "Workload too small even for 1 << 63"); 170 goto leave_error; 171 } 172 workload_size = ((uint64_t) 1) << bits; 173 174 stopwatch_t stopwatch = STOPWATCH_INITIALIZE_STATIC; 175 176 bool ok = bench->entry(&stopwatch, workload_size, 177 error_msg, MAX_ERROR_STR_LENGTH); 178 if (!ok) { 179 goto leave_error; 180 } 181 short_report(&stopwatch, -1, bench, workload_size); 182 183 nsec_t duration = stopwatch_get_nanos(&stopwatch); 184 if (duration > SEC2NSEC(MIN_DURATION_SECS)) { 185 break; 186 } 187 } 188 189 printf("Workload size set to %" PRIu64 ", measuring %d samples.\n", workload_size, NUM_SAMPLES); 190 191 stopwatch_t *stopwatch = calloc(NUM_SAMPLES, sizeof(stopwatch_t)); 192 if (stopwatch == NULL) { 193 snprintf(error_msg, MAX_ERROR_STR_LENGTH, "failed allocating memory"); 194 goto leave_error; 195 } 196 for (int i = 0; i < NUM_SAMPLES; i++) { 197 stopwatch_init(&stopwatch[i]); 198 199 bool ok = bench->entry(&stopwatch[i], workload_size, 200 error_msg, MAX_ERROR_STR_LENGTH); 201 if (!ok) { 202 free(stopwatch); 203 goto leave_error; 204 } 205 short_report(&stopwatch[i], i, bench, workload_size); 206 } 207 208 summary_stats(stopwatch, NUM_SAMPLES, bench, workload_size); 209 printf("\nBenchmark completed\n"); 210 211 free(stopwatch); 212 213 goto leave; 214 215 leave_error: 216 printf("Error: %s\n", error_msg); 217 ret = false; 218 219 leave: 220 if (bench->teardown != NULL) { 221 bool ok = bench->teardown(error_msg, MAX_ERROR_STR_LENGTH); 222 if (!ok) { 223 printf("Error: %s\n", error_msg); 224 ret = false; 225 } 226 } 227 228 free(error_msg); 229 230 return ret; 62 231 } 63 232 64 233 static int run_benchmarks(void) 65 234 { 66 benchmark_t *bench; 67 unsigned int i = 0; 68 unsigned int n = 0; 235 unsigned int count_ok = 0; 236 unsigned int count_fail = 0; 69 237 70 238 char *failed_names = NULL; … … 72 240 printf("\n*** Running all benchmarks ***\n\n"); 73 241 74 for ( bench = benchmarks; bench->name != NULL; bench++) {75 printf("%s (%s)\n", bench ->name, bench->desc);76 if (run_benchmark(bench )) {77 i++;242 for (size_t it = 0; it < benchmark_count; it++) { 243 printf("%s (%s)\n", benchmarks[it]->name, benchmarks[it]->desc); 244 if (run_benchmark(benchmarks[it])) { 245 count_ok++; 78 246 continue; 79 247 } 80 248 81 249 if (!failed_names) { 82 failed_names = str_dup(bench ->name);250 failed_names = str_dup(benchmarks[it]->name); 83 251 } else { 84 252 char *f = NULL; 85 asprintf(&f, "%s, %s", failed_names, bench ->name);253 asprintf(&f, "%s, %s", failed_names, benchmarks[it]->name); 86 254 if (!f) { 87 255 printf("Out of memory.\n"); … … 91 259 failed_names = f; 92 260 } 93 n++; 94 } 95 96 printf("\nCompleted, %u benchmarks run, %u succeeded.\n", i + n, i); 261 count_fail++; 262 } 263 264 printf("\nCompleted, %u benchmarks run, %u succeeded.\n", 265 count_ok + count_fail, count_ok); 97 266 if (failed_names) 98 267 printf("Failed benchmarks: %s\n", failed_names); 99 268 100 return n;269 return count_fail; 101 270 } 102 271 … … 104 273 { 105 274 size_t len = 0; 106 benchmark_t *bench; 107 for (bench = benchmarks; bench->name != NULL; bench++) { 108 if (str_length(bench->name) > len) 109 len = str_length(bench->name); 110 } 111 112 unsigned int _len = (unsigned int) len; 113 if ((_len != len) || (((int) _len) < 0)) { 114 printf("Command length overflow\n"); 115 return; 116 } 117 118 for (bench = benchmarks; bench->name != NULL; bench++) 119 printf("%-*s %s\n", _len, bench->name, bench->desc); 120 121 printf("%-*s Run all benchmarks\n", _len, "*"); 275 for (size_t i = 0; i < benchmark_count; i++) { 276 size_t len_now = str_length(benchmarks[i]->name); 277 if (len_now > len) 278 len = len_now; 279 } 280 281 assert(can_cast_size_t_to_int(len) && "benchmark name length overflow"); 282 283 for (size_t i = 0; i < benchmark_count; i++) 284 printf("%-*s %s\n", (int) len, benchmarks[i]->name, benchmarks[i]->desc); 285 286 printf("%-*s Run all benchmarks\n", (int) len, "*"); 122 287 } 123 288 … … 135 300 } 136 301 137 benchmark_t *bench; 138 for (bench = benchmarks; bench->name != NULL; bench++) { 139 if (str_cmp(argv[1], bench->name) == 0) { 140 return (run_benchmark(bench) ? 0 : -1); 302 for (size_t i = 0; i < benchmark_count; i++) { 303 if (str_cmp(argv[1], benchmarks[i]->name) == 0) { 304 return (run_benchmark(benchmarks[i]) ? 0 : -1); 141 305 } 142 306 }
Note:
See TracChangeset
for help on using the changeset viewer.