compare.ts 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. /**
  2. * Copyright (c) Jupyter Development Team.
  3. * Distributed under the terms of the Modified BSD License.
  4. */
  5. import * as quantile from '@stdlib/stats/base/dists/t/quantile';
  6. import * as meanpw from '@stdlib/stats/base/meanpw';
  7. import * as variancepn from '@stdlib/stats/base/variancepn';
  8. import * as neatCSV from 'neat-csv';
  9. import * as fs from 'fs';
  10. const OUTPUT_FILE = process.env['BENCHMARK_OUTPUT'] || 'diff.csv';
  11. const OLD_FILE = process.env['BENCHMARK_INPUT_OLD'] || 'old.csv';
  12. const NEW_FILE = process.env['BENCHMARK_INPUT_NEW'] || 'new.csv';
  13. const stream = fs.createWriteStream(OUTPUT_FILE);
  14. function writeLine(line: string): Promise<void> {
  15. return new Promise(function(resolve, reject) {
  16. stream.write(line + '\n', error => (error ? reject(error) : resolve()));
  17. });
  18. }
  19. tests();
  20. void main();
  21. async function main() {
  22. console.log(`Writing output to ${OUTPUT_FILE}`);
  23. await writeLine('mode,browser,n,type,mean,confidenceInterval');
  24. for await (const {
  25. mode,
  26. browser,
  27. n,
  28. mean,
  29. type,
  30. confidenceInterval
  31. } of compare(OLD_FILE, NEW_FILE, 0.95)) {
  32. await writeLine(
  33. [mode, browser, n, type, mean, confidenceInterval].join(',')
  34. );
  35. }
  36. }
  37. type OutputRow = {
  38. mode: string;
  39. browser: string;
  40. type: string;
  41. n: number;
  42. mean: number;
  43. confidenceInterval: number;
  44. };
  45. async function* compare(
  46. oldCSVPath: string,
  47. newCSVPath: string,
  48. confidenceInterval: number = 0.95
  49. ): AsyncIterable<OutputRow> {
  50. const collected: {
  51. // turn key into string so we can lookup easily with it
  52. [key: string]: {
  53. mode: string;
  54. browser: string;
  55. type: string;
  56. n: number;
  57. times: { [VERSION in 'old' | 'new_']: number[] };
  58. };
  59. } = {};
  60. for (const { path, version } of [
  61. { path: oldCSVPath, version: 'old' as 'old' },
  62. { path: newCSVPath, version: 'new_' as 'new_' }
  63. ]) {
  64. console.log('Parsing data', { path, version });
  65. const text = await fs.promises.readFile(path);
  66. for (const { mode, browser, n, type, time } of await neatCSV(text)) {
  67. const key = `${mode}-${browser}-${n}-${type}`;
  68. // get key if we have it, otherwise create new
  69. const data =
  70. collected[key] ||
  71. (collected[key] = {
  72. mode,
  73. browser,
  74. n: parseInt(n),
  75. type,
  76. times: { old: [], new_: [] }
  77. });
  78. data.times[version].push(parseFloat(time));
  79. }
  80. }
  81. for (const {
  82. mode,
  83. browser,
  84. type,
  85. n,
  86. times: { old, new_ }
  87. } of Object.values(collected)) {
  88. if (old.length != new_.length) {
  89. console.warn('Skipping because different lengths between runs', {
  90. mode,
  91. browser,
  92. type,
  93. n
  94. });
  95. continue;
  96. }
  97. yield {
  98. mode,
  99. browser,
  100. type,
  101. n,
  102. ...performanceChangeFromData(old, new_, confidenceInterval)
  103. };
  104. }
  105. }
  106. /**
  107. * Quantifies the performance changes between two measures systems. Assumes we gathered
  108. * n independent measurement from each, and calculated their means and varience.
  109. *
  110. * Based on the work by Tomas Kalibera and Richard Jones. See their paper
  111. * "Quantifying Performance Changes with Effect Size Confidence Intervals", section 6.2,
  112. * formula "Quantifying Performance Change".
  113. *
  114. * However, it simplifies it to only assume one level of benchmarks, not multiple levels.
  115. * If you do have multiple levels, simply use the mean of the lower levels as your data,
  116. * like they do in the paper.
  117. *
  118. * @param oldSystem The old system we measured
  119. * @param newSystem The new system we measured
  120. * @param n The number of samples from each system (must be equal)
  121. * @param confidenceInterval The confidence interval for the results.
  122. * The default is a 95% confidence interval (95% of the time the true mean will be
  123. * between the resulting mean +- the resulting CI)
  124. */
  125. export function performanceChange(
  126. { mean: y_o, variance: s_o }: { mean: number; variance: number },
  127. { mean: y_n, variance: s_n }: { mean: number; variance: number },
  128. n: number,
  129. confidenceInterval: number = 0.95
  130. ): { mean: number; confidenceInterval: number } {
  131. const dof = n - 1;
  132. const t = quantile(1 - (1 - confidenceInterval) / 2, dof);
  133. const oldFactor = sq(y_o) - (sq(t) * s_o) / n;
  134. const newFactor = sq(y_n) - (sq(t) * s_n) / n;
  135. const meanNum = y_o * y_n;
  136. const ciNum = Math.sqrt(sq(y_o * y_n) - newFactor * oldFactor);
  137. return {
  138. mean: meanNum / oldFactor,
  139. confidenceInterval: ciNum / oldFactor
  140. };
  141. }
  142. /**
  143. * Compute the performance change based on a number of old and new measurements.
  144. */
  145. export function performanceChangeFromData(
  146. old: number[],
  147. new_: number[],
  148. confidenceInterval: number = 0.95
  149. ): { mean: number; confidenceInterval: number } {
  150. const n = old.length;
  151. if (n !== new_.length) {
  152. throw new Error('Data have different length');
  153. }
  154. return performanceChange(
  155. { mean: mean(...old), variance: variance(...old) },
  156. { mean: mean(...new_), variance: variance(...new_) },
  157. n,
  158. confidenceInterval
  159. );
  160. }
  161. /**
  162. * Format a performance changes like `10∓0.3`
  163. */
  164. function formatChange({
  165. mean,
  166. confidenceInterval
  167. }: {
  168. mean: number;
  169. confidenceInterval: number;
  170. }): string {
  171. return `${mean.toFixed(1)}∓${confidenceInterval.toFixed(1)}`;
  172. }
  173. /**
  174. * Reproduce examples from paper, and verify we have implemented things correctly.
  175. */
  176. export function tests() {
  177. assertAboutEqual(quantile(1 - 0.05 / 2, 2), 4.3, 'quantile');
  178. const paperResult = {
  179. mean: 68.3 / 74.5,
  180. confidenceInterval: 60.2 / 70.2
  181. };
  182. assertResultsEqual(
  183. performanceChange(
  184. { variance: 5.8, mean: 10.5 },
  185. { variance: 4.6, mean: 6.5 },
  186. 3,
  187. 0.95
  188. ),
  189. paperResult,
  190. 'performanceChange'
  191. );
  192. // Data from table V, uses means of top level
  193. assertResultsEqual(
  194. performanceChangeFromData(
  195. [mean(9, 11, 5, 6), mean(16, 13, 12, 8), mean(15, 7, 10, 14)],
  196. [mean(10, 12, 6, 7), mean(9, 1, 11, 4), mean(8, 5, 3, 2)],
  197. 0.95
  198. ),
  199. paperResult,
  200. 'performanceChangeFromData'
  201. );
  202. }
  203. function assertResultsEqual(
  204. l: {
  205. mean: number;
  206. confidenceInterval: number;
  207. },
  208. r: { mean: number; confidenceInterval: number },
  209. message: string
  210. ) {
  211. assertAboutEqual(l.mean, r.mean, `${message}: means`);
  212. assertAboutEqual(
  213. r.confidenceInterval,
  214. r.confidenceInterval,
  215. `${message}: confidence interval`
  216. );
  217. }
  218. function assertAboutEqual(x: number, y: number, msg: string): void {
  219. console.assert(Math.abs(x - y) <= 0.005, `${msg}: ${x} != ${y}`);
  220. }
  221. function sq(x: number): number {
  222. return Math.pow(x, 2);
  223. }
  224. function mean(...x: number[]): number {
  225. return meanpw(x.length, x, 1);
  226. }
  227. function variance(...x: number[]): number {
  228. return variancepn(x.length, 1, x, 1);
  229. }