model.spec.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // Copyright (c) Jupyter Development Team.
  2. import 'jest';
  3. const fs = require('fs');
  4. import { DSVModel } from '../src';
  5. function readCSV(path: string): any {
  6. path = require.resolve(path);
  7. return fs.readFileSync(path, 'utf8');
  8. }
  9. /* tslint:disable:no-var-requires */
  10. const CSV_TEST_FILES = [
  11. [
  12. 'comma_in_quotes',
  13. readCSV('csv-spectrum/csvs/comma_in_quotes.csv'),
  14. require('csv-spectrum/json/comma_in_quotes.json')
  15. ],
  16. [
  17. 'empty',
  18. readCSV('csv-spectrum/csvs/empty.csv'),
  19. require('csv-spectrum/json/empty.json')
  20. ],
  21. [
  22. 'empty_crlf',
  23. readCSV('csv-spectrum/csvs/empty_crlf.csv'),
  24. require('csv-spectrum/json/empty_crlf.json')
  25. ],
  26. [
  27. 'escaped_quotes',
  28. readCSV('csv-spectrum/csvs/escaped_quotes.csv'),
  29. require('csv-spectrum/json/escaped_quotes.json')
  30. ],
  31. [
  32. 'json',
  33. readCSV('csv-spectrum/csvs/json.csv'),
  34. require('csv-spectrum/json/json.json')
  35. ],
  36. [
  37. 'newlines',
  38. readCSV('csv-spectrum/csvs/newlines.csv'),
  39. require('csv-spectrum/json/newlines.json')
  40. ],
  41. [
  42. 'newlines_crlf',
  43. readCSV('csv-spectrum/csvs/newlines_crlf.csv'),
  44. require('csv-spectrum/json/newlines_crlf.json')
  45. ],
  46. [
  47. 'quotes_and_newlines',
  48. readCSV('csv-spectrum/csvs/quotes_and_newlines.csv'),
  49. require('csv-spectrum/json/quotes_and_newlines.json')
  50. ],
  51. [
  52. 'simple',
  53. readCSV('csv-spectrum/csvs/simple.csv'),
  54. require('csv-spectrum/json/simple.json')
  55. ],
  56. [
  57. 'simple_crlf',
  58. readCSV('csv-spectrum/csvs/simple_crlf.csv'),
  59. require('csv-spectrum/json/simple_crlf.json')
  60. ],
  61. [
  62. 'utf8',
  63. readCSV('csv-spectrum/csvs/utf8.csv'),
  64. require('csv-spectrum/json/utf8.json')
  65. ]
  66. ];
  67. /* tslint:enable:no-var-requires */
  68. describe('csvviewer/model', () => {
  69. describe('DSVModel', () => {
  70. describe('#constructor()', () => {
  71. it('should instantiate a `DSVModel`', () => {
  72. const d = new DSVModel({ data: 'a,b,c\nd,e,f\n', delimiter: ',' });
  73. expect(d.rowCount('column-header')).toBe(1);
  74. expect(d.rowCount('body')).toBe(1);
  75. expect(d.columnCount('row-header')).toBe(1);
  76. expect(d.columnCount('body')).toBe(3);
  77. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  78. 'a',
  79. 'b',
  80. 'c'
  81. ]);
  82. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  83. 'd',
  84. 'e',
  85. 'f'
  86. ]);
  87. });
  88. });
  89. it('parses a number of test files correctly', () => {
  90. for (const [, csv, answer] of CSV_TEST_FILES) {
  91. const d = new DSVModel({ data: csv, delimiter: ',' });
  92. const labels = [];
  93. for (let i = 0; i < d.columnCount('body'); i++) {
  94. labels.push(d.data('column-header', 0, i));
  95. }
  96. const values = [];
  97. for (let r = 0; r < d.rowCount('body'); r++) {
  98. const row: { [key: string]: string } = {};
  99. for (let c = 0; c < d.columnCount('body'); c++) {
  100. row[labels[c]] = d.data('body', r, c);
  101. }
  102. values.push(row);
  103. }
  104. expect(values).toEqual(answer);
  105. }
  106. });
  107. it('handles tab-separated data', () => {
  108. const d = new DSVModel({ data: 'a\tb\tc\nd\te\tf\n', delimiter: '\t' });
  109. expect(d.rowCount('column-header')).toBe(1);
  110. expect(d.rowCount('body')).toBe(1);
  111. expect(d.columnCount('row-header')).toBe(1);
  112. expect(d.columnCount('body')).toBe(3);
  113. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  114. 'a',
  115. 'b',
  116. 'c'
  117. ]);
  118. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  119. });
  120. it('handles not having a header', () => {
  121. const d = new DSVModel({
  122. data: 'a,b,c\nd,e,f\n',
  123. delimiter: ',',
  124. header: false
  125. });
  126. expect(d.rowCount('column-header')).toBe(1);
  127. expect(d.rowCount('body')).toBe(2);
  128. expect(d.columnCount('row-header')).toBe(1);
  129. expect(d.columnCount('body')).toBe(3);
  130. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  131. '1',
  132. '2',
  133. '3'
  134. ]);
  135. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['a', 'b', 'c']);
  136. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['d', 'e', 'f']);
  137. });
  138. it('handles having only a header', () => {
  139. const d = new DSVModel({ data: 'a,b,c\n', delimiter: ',', header: true });
  140. expect(d.rowCount('column-header')).toBe(1);
  141. expect(d.rowCount('body')).toBe(0);
  142. expect(d.columnCount('row-header')).toBe(1);
  143. expect(d.columnCount('body')).toBe(3);
  144. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  145. 'a',
  146. 'b',
  147. 'c'
  148. ]);
  149. });
  150. it('handles single non-header line', () => {
  151. const d = new DSVModel({
  152. data: 'a,b,c\n',
  153. delimiter: ',',
  154. header: false
  155. });
  156. expect(d.rowCount('column-header')).toBe(1);
  157. expect(d.rowCount('body')).toBe(1);
  158. expect(d.columnCount('row-header')).toBe(1);
  159. expect(d.columnCount('body')).toBe(3);
  160. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  161. '1',
  162. '2',
  163. '3'
  164. ]);
  165. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['a', 'b', 'c']);
  166. });
  167. it('handles CRLF row delimiter', () => {
  168. const d = new DSVModel({
  169. data: 'a,b,c\r\nd,e,f\r\n',
  170. delimiter: ',',
  171. rowDelimiter: '\r\n'
  172. });
  173. expect(d.rowCount('column-header')).toBe(1);
  174. expect(d.rowCount('body')).toBe(1);
  175. expect(d.columnCount('row-header')).toBe(1);
  176. expect(d.columnCount('body')).toBe(3);
  177. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  178. 'a',
  179. 'b',
  180. 'c'
  181. ]);
  182. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  183. });
  184. it('handles CR row delimiter', () => {
  185. const d = new DSVModel({
  186. data: 'a,b,c\rd,e,f\r',
  187. delimiter: ',',
  188. rowDelimiter: '\r'
  189. });
  190. expect(d.rowCount('column-header')).toBe(1);
  191. expect(d.rowCount('body')).toBe(1);
  192. expect(d.columnCount('row-header')).toBe(1);
  193. expect(d.columnCount('body')).toBe(3);
  194. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  195. 'a',
  196. 'b',
  197. 'c'
  198. ]);
  199. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  200. });
  201. it('can guess the row delimiter', () => {
  202. const d = new DSVModel({ data: 'a,b,c\rd,e,f\r', delimiter: ',' });
  203. expect(d.rowCount('column-header')).toBe(1);
  204. expect(d.rowCount('body')).toBe(1);
  205. expect(d.columnCount('row-header')).toBe(1);
  206. expect(d.columnCount('body')).toBe(3);
  207. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  208. 'a',
  209. 'b',
  210. 'c'
  211. ]);
  212. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  213. });
  214. it('handles a given quote character', () => {
  215. const d = new DSVModel({
  216. data: `a,'b','c'\r'd',e,'f'\r`,
  217. delimiter: ',',
  218. quote: `'`
  219. });
  220. expect(d.rowCount('column-header')).toBe(1);
  221. expect(d.rowCount('body')).toBe(1);
  222. expect(d.columnCount('row-header')).toBe(1);
  223. expect(d.columnCount('body')).toBe(3);
  224. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  225. 'a',
  226. 'b',
  227. 'c'
  228. ]);
  229. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  230. });
  231. it('handles delimiters and quotes inside quotes', () => {
  232. const d = new DSVModel({
  233. data: `'a\rx',b,'c''x'\r'd,x',e,'f'\r`,
  234. delimiter: ',',
  235. quote: `'`,
  236. rowDelimiter: '\r'
  237. });
  238. expect(d.rowCount('column-header')).toBe(1);
  239. expect(d.rowCount('body')).toBe(1);
  240. expect(d.columnCount('row-header')).toBe(1);
  241. expect(d.columnCount('body')).toBe(3);
  242. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  243. 'a\rx',
  244. 'b',
  245. `c'x`
  246. ]);
  247. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  248. 'd,x',
  249. 'e',
  250. 'f'
  251. ]);
  252. });
  253. it('handles rows that are too short or too long', () => {
  254. const d = new DSVModel({ data: `a,b,c\n,c,d,e,f\ng,h`, delimiter: ',' });
  255. expect(d.rowCount('column-header')).toBe(1);
  256. expect(d.rowCount('body')).toBe(2);
  257. expect(d.columnCount('row-header')).toBe(1);
  258. expect(d.columnCount('body')).toBe(3);
  259. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  260. 'a',
  261. 'b',
  262. 'c'
  263. ]);
  264. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  265. '',
  266. 'c',
  267. 'd,e,f'
  268. ]);
  269. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['g', 'h', '']);
  270. });
  271. it('handles delayed parsing of rows past the initial rows', async () => {
  272. const d = new DSVModel({
  273. data: `a,b,c\nc,d,e\nf,g,h\ni,j,k`,
  274. delimiter: ',',
  275. initialRows: 2
  276. });
  277. expect(d.rowCount('column-header')).toBe(1);
  278. expect(d.rowCount('body')).toBe(1);
  279. expect(d.columnCount('row-header')).toBe(1);
  280. expect(d.columnCount('body')).toBe(3);
  281. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  282. 'a',
  283. 'b',
  284. 'c'
  285. ]);
  286. // Expected behavior is that all unparsed data is lumped into the final field.
  287. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  288. 'c',
  289. 'd',
  290. 'e\nf,g,h\ni,j,k'
  291. ]);
  292. // Check everything is in order after all the data has been parsed asynchronously.
  293. await d.ready;
  294. expect(d.rowCount('column-header')).toBe(1);
  295. expect(d.rowCount('body')).toBe(3);
  296. expect(d.columnCount('row-header')).toBe(1);
  297. expect(d.columnCount('body')).toBe(3);
  298. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  299. 'a',
  300. 'b',
  301. 'c'
  302. ]);
  303. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['c', 'd', 'e']);
  304. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['f', 'g', 'h']);
  305. expect([0, 1, 2].map(i => d.data('body', 2, i))).toEqual(['i', 'j', 'k']);
  306. });
  307. });
  308. });