ylproj
/
jupyterlab


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
							/**
 * Copyright (c) Jupyter Development Team.
 * Distributed under the terms of the Modified BSD License.
 */
import * as quantile from '@stdlib/stats/base/dists/t/quantile';
import * as meanpw from '@stdlib/stats/base/meanpw';
import * as variancepn from '@stdlib/stats/base/variancepn';
import * as neatCSV from 'neat-csv';
import * as fs from 'fs';

const OUTPUT_FILE = process.env['BENCHMARK_OUTPUT'] || 'diff.csv';
const OLD_FILE = process.env['BENCHMARK_INPUT_OLD'] || 'old.csv';
const NEW_FILE = process.env['BENCHMARK_INPUT_NEW'] || 'new.csv';

const stream = fs.createWriteStream(OUTPUT_FILE);

function writeLine(line: string): Promise<void> {
  return new Promise(function(resolve, reject) {
    stream.write(line + '\n', error => (error ? reject(error) : resolve()));
  });
}
tests();

void main();

async function main() {
  console.log(`Writing output to ${OUTPUT_FILE}`);
  await writeLine('mode,browser,n,type,mean,confidenceInterval');

  for await (const {
    mode,
    browser,
    n,
    mean,
    type,
    confidenceInterval
  } of compare(OLD_FILE, NEW_FILE, 0.95)) {
    await writeLine(
      [mode, browser, n, type, mean, confidenceInterval].join(',')
    );
  }
}

type OutputRow = {
  mode: string;
  browser: string;
  type: string;
  n: number;
  mean: number;
  confidenceInterval: number;
};

async function* compare(
  oldCSVPath: string,
  newCSVPath: string,
  confidenceInterval: number = 0.95
): AsyncIterable<OutputRow> {
  const collected: {
    // turn key into string so we can lookup easily with it
    [key: string]: {
      mode: string;
      browser: string;
      type: string;
      n: number;
      times: { [VERSION in 'old' | 'new_']: number[] };
    };
  } = {};
  for (const { path, version } of [
    { path: oldCSVPath, version: 'old' as 'old' },
    { path: newCSVPath, version: 'new_' as 'new_' }
  ]) {
    console.log('Parsing data', { path, version });
    const text = await fs.promises.readFile(path);
    for (const { mode, browser, n, type, time } of await neatCSV(text)) {
      const key = `${mode}-${browser}-${n}-${type}`;
      // get key if we have it, otherwise create new
      const data =
        collected[key] ||
        (collected[key] = {
          mode,
          browser,
          n: parseInt(n),
          type,
          times: { old: [], new_: [] }
        });
      data.times[version].push(parseFloat(time));
    }
  }
  for (const {
    mode,
    browser,
    type,
    n,
    times: { old, new_ }
  } of Object.values(collected)) {
    if (old.length != new_.length) {
      console.warn('Skipping because different lengths between runs', {
        mode,
        browser,
        type,
        n
      });
      continue;
    }
    yield {
      mode,
      browser,
      type,
      n,
      ...performanceChangeFromData(old, new_, confidenceInterval)
    };
  }
}

/**
 * Quantifies the performance changes between two measures systems. Assumes we gathered
 * n independent measurement from each, and calculated their means and varience.
 *
 * Based on the work by Tomas Kalibera and Richard Jones. See their paper
 * "Quantifying Performance Changes with Effect Size Confidence Intervals", section 6.2,
 * formula "Quantifying Performance Change".
 *
 * However, it simplifies it to only assume one level of benchmarks, not multiple levels.
 * If you do have multiple levels, simply use the mean of the lower levels as your data,
 * like they do in the paper.
 *
 * @param oldSystem The old system we measured
 * @param newSystem The new system we measured
 * @param n The number of samples from each system (must be equal)
 * @param confidenceInterval The confidence interval for the results.
 *  The default is a 95% confidence interval (95% of the time the true mean will be
 *  between the resulting mean +- the resulting CI)
 */
export function performanceChange(
  { mean: y_o, variance: s_o }: { mean: number; variance: number },
  { mean: y_n, variance: s_n }: { mean: number; variance: number },
  n: number,
  confidenceInterval: number = 0.95
): { mean: number; confidenceInterval: number } {
  const dof = n - 1;
  const t = quantile(1 - (1 - confidenceInterval) / 2, dof);
  const oldFactor = sq(y_o) - (sq(t) * s_o) / n;
  const newFactor = sq(y_n) - (sq(t) * s_n) / n;
  const meanNum = y_o * y_n;
  const ciNum = Math.sqrt(sq(y_o * y_n) - newFactor * oldFactor);
  return {
    mean: meanNum / oldFactor,
    confidenceInterval: ciNum / oldFactor
  };
}

/**
 * Compute the performance change based on a number of old and new measurements.
 */
export function performanceChangeFromData(
  old: number[],
  new_: number[],
  confidenceInterval: number = 0.95
): { mean: number; confidenceInterval: number } {
  const n = old.length;
  if (n !== new_.length) {
    throw new Error('Data have different length');
  }
  return performanceChange(
    { mean: mean(...old), variance: variance(...old) },
    { mean: mean(...new_), variance: variance(...new_) },
    n,
    confidenceInterval
  );
}

/**
 * Format a performance changes like `10∓0.3`
 */
function formatChange({
  mean,
  confidenceInterval
}: {
  mean: number;
  confidenceInterval: number;
}): string {
  return `${mean.toFixed(1)}∓${confidenceInterval.toFixed(1)}`;
}

/**
 * Reproduce examples from paper, and verify we have implemented things correctly.
 */
export function tests() {
  assertAboutEqual(quantile(1 - 0.05 / 2, 2), 4.3, 'quantile');

  const paperResult = {
    mean: 68.3 / 74.5,
    confidenceInterval: 60.2 / 70.2
  };
  assertResultsEqual(
    performanceChange(
      { variance: 5.8, mean: 10.5 },
      { variance: 4.6, mean: 6.5 },
      3,
      0.95
    ),
    paperResult,
    'performanceChange'
  );

  //   Data from table V, uses means of top level
  assertResultsEqual(
    performanceChangeFromData(
      [mean(9, 11, 5, 6), mean(16, 13, 12, 8), mean(15, 7, 10, 14)],
      [mean(10, 12, 6, 7), mean(9, 1, 11, 4), mean(8, 5, 3, 2)],
      0.95
    ),
    paperResult,
    'performanceChangeFromData'
  );
}

function assertResultsEqual(
  l: {
    mean: number;
    confidenceInterval: number;
  },
  r: { mean: number; confidenceInterval: number },
  message: string
) {
  assertAboutEqual(l.mean, r.mean, `${message}: means`);
  assertAboutEqual(
    r.confidenceInterval,
    r.confidenceInterval,
    `${message}: confidence interval`
  );
}

function assertAboutEqual(x: number, y: number, msg: string): void {
  console.assert(Math.abs(x - y) <= 0.005, `${msg}: ${x} != ${y}`);
}

function sq(x: number): number {
  return Math.pow(x, 2);
}

function mean(...x: number[]): number {
  return meanpw(x.length, x, 1);
}

function variance(...x: number[]): number {
  return variancepn(x.length, 1, x, 1);
}