model.spec.ts 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. // Copyright (c) Jupyter Development Team.
  2. // Distributed under the terms of the Modified BSD License.
  3. const fs = require('fs');
  4. import { DSVModel } from '../src';
  5. function readCSV(path: string): any {
  6. path = require.resolve(path);
  7. return fs.readFileSync(path, 'utf8');
  8. }
  9. /* tslint:disable:no-var-requires */
  10. const CSV_TEST_FILES = [
  11. [
  12. 'comma_in_quotes',
  13. readCSV('csv-spectrum/csvs/comma_in_quotes.csv'),
  14. require('csv-spectrum/json/comma_in_quotes.json')
  15. ],
  16. [
  17. 'empty_values',
  18. readCSV('csv-spectrum/csvs/empty.csv'),
  19. require('csv-spectrum/json/empty.json')
  20. ],
  21. [
  22. 'empty_crlf',
  23. readCSV('csv-spectrum/csvs/empty_crlf.csv'),
  24. require('csv-spectrum/json/empty_crlf.json')
  25. ],
  26. ['empty_file', '', []],
  27. [
  28. 'escaped_quotes',
  29. readCSV('csv-spectrum/csvs/escaped_quotes.csv'),
  30. require('csv-spectrum/json/escaped_quotes.json')
  31. ],
  32. [
  33. 'json',
  34. readCSV('csv-spectrum/csvs/json.csv'),
  35. require('csv-spectrum/json/json.json')
  36. ],
  37. [
  38. 'newlines',
  39. readCSV('csv-spectrum/csvs/newlines.csv'),
  40. require('csv-spectrum/json/newlines.json')
  41. ],
  42. [
  43. 'newlines_crlf',
  44. readCSV('csv-spectrum/csvs/newlines_crlf.csv'),
  45. require('csv-spectrum/json/newlines_crlf.json')
  46. ],
  47. [
  48. 'quotes_and_newlines',
  49. readCSV('csv-spectrum/csvs/quotes_and_newlines.csv'),
  50. require('csv-spectrum/json/quotes_and_newlines.json')
  51. ],
  52. [
  53. 'simple',
  54. readCSV('csv-spectrum/csvs/simple.csv'),
  55. require('csv-spectrum/json/simple.json')
  56. ],
  57. [
  58. 'simple_crlf',
  59. readCSV('csv-spectrum/csvs/simple_crlf.csv'),
  60. require('csv-spectrum/json/simple_crlf.json')
  61. ],
  62. [
  63. 'utf8',
  64. readCSV('csv-spectrum/csvs/utf8.csv'),
  65. require('csv-spectrum/json/utf8.json')
  66. ]
  67. ];
  68. /* tslint:enable:no-var-requires */
  69. describe('csvviewer/model', () => {
  70. describe('DSVModel', () => {
  71. describe('#constructor()', () => {
  72. it('should instantiate a `DSVModel`', () => {
  73. const d = new DSVModel({ data: 'a,b,c\nd,e,f\n', delimiter: ',' });
  74. expect(d.rowCount('column-header')).toBe(1);
  75. expect(d.rowCount('body')).toBe(1);
  76. expect(d.columnCount('row-header')).toBe(1);
  77. expect(d.columnCount('body')).toBe(3);
  78. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  79. 'a',
  80. 'b',
  81. 'c'
  82. ]);
  83. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  84. 'd',
  85. 'e',
  86. 'f'
  87. ]);
  88. });
  89. });
  90. it('parses a number of test files correctly', () => {
  91. for (const [, csv, answer] of CSV_TEST_FILES) {
  92. const d = new DSVModel({ data: csv, delimiter: ',' });
  93. const labels = [];
  94. for (let i = 0; i < d.columnCount('body'); i++) {
  95. labels.push(d.data('column-header', 0, i));
  96. }
  97. const values = [];
  98. for (let r = 0; r < d.rowCount('body'); r++) {
  99. const row: { [key: string]: string } = {};
  100. for (let c = 0; c < d.columnCount('body'); c++) {
  101. row[labels[c]] = d.data('body', r, c);
  102. }
  103. values.push(row);
  104. }
  105. expect(values).toEqual(answer);
  106. }
  107. });
  108. it('handles tab-separated data', () => {
  109. const d = new DSVModel({ data: 'a\tb\tc\nd\te\tf\n', delimiter: '\t' });
  110. expect(d.rowCount('column-header')).toBe(1);
  111. expect(d.rowCount('body')).toBe(1);
  112. expect(d.columnCount('row-header')).toBe(1);
  113. expect(d.columnCount('body')).toBe(3);
  114. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  115. 'a',
  116. 'b',
  117. 'c'
  118. ]);
  119. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  120. });
  121. it('handles not having a header', () => {
  122. const d = new DSVModel({
  123. data: 'a,b,c\nd,e,f\n',
  124. delimiter: ',',
  125. header: false
  126. });
  127. expect(d.rowCount('column-header')).toBe(1);
  128. expect(d.rowCount('body')).toBe(2);
  129. expect(d.columnCount('row-header')).toBe(1);
  130. expect(d.columnCount('body')).toBe(3);
  131. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  132. '1',
  133. '2',
  134. '3'
  135. ]);
  136. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['a', 'b', 'c']);
  137. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['d', 'e', 'f']);
  138. });
  139. it('handles having only a header', () => {
  140. const d = new DSVModel({ data: 'a,b,c\n', delimiter: ',', header: true });
  141. expect(d.rowCount('column-header')).toBe(1);
  142. expect(d.rowCount('body')).toBe(0);
  143. expect(d.columnCount('row-header')).toBe(1);
  144. expect(d.columnCount('body')).toBe(3);
  145. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  146. 'a',
  147. 'b',
  148. 'c'
  149. ]);
  150. });
  151. it('handles single non-header line', () => {
  152. const d = new DSVModel({
  153. data: 'a,b,c\n',
  154. delimiter: ',',
  155. header: false
  156. });
  157. expect(d.rowCount('column-header')).toBe(1);
  158. expect(d.rowCount('body')).toBe(1);
  159. expect(d.columnCount('row-header')).toBe(1);
  160. expect(d.columnCount('body')).toBe(3);
  161. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  162. '1',
  163. '2',
  164. '3'
  165. ]);
  166. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['a', 'b', 'c']);
  167. });
  168. it('handles CRLF row delimiter', () => {
  169. const d = new DSVModel({
  170. data: 'a,b,c\r\nd,e,f\r\n',
  171. delimiter: ',',
  172. rowDelimiter: '\r\n'
  173. });
  174. expect(d.rowCount('column-header')).toBe(1);
  175. expect(d.rowCount('body')).toBe(1);
  176. expect(d.columnCount('row-header')).toBe(1);
  177. expect(d.columnCount('body')).toBe(3);
  178. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  179. 'a',
  180. 'b',
  181. 'c'
  182. ]);
  183. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  184. });
  185. it('handles CR row delimiter', () => {
  186. const d = new DSVModel({
  187. data: 'a,b,c\rd,e,f\r',
  188. delimiter: ',',
  189. rowDelimiter: '\r'
  190. });
  191. expect(d.rowCount('column-header')).toBe(1);
  192. expect(d.rowCount('body')).toBe(1);
  193. expect(d.columnCount('row-header')).toBe(1);
  194. expect(d.columnCount('body')).toBe(3);
  195. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  196. 'a',
  197. 'b',
  198. 'c'
  199. ]);
  200. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  201. });
  202. it('can guess the row delimiter', () => {
  203. const d = new DSVModel({ data: 'a,b,c\rd,e,f\r', delimiter: ',' });
  204. expect(d.rowCount('column-header')).toBe(1);
  205. expect(d.rowCount('body')).toBe(1);
  206. expect(d.columnCount('row-header')).toBe(1);
  207. expect(d.columnCount('body')).toBe(3);
  208. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  209. 'a',
  210. 'b',
  211. 'c'
  212. ]);
  213. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  214. });
  215. it('handles a given quote character', () => {
  216. const d = new DSVModel({
  217. data: `a,'b','c'\r'd',e,'f'\r`,
  218. delimiter: ',',
  219. quote: `'`
  220. });
  221. expect(d.rowCount('column-header')).toBe(1);
  222. expect(d.rowCount('body')).toBe(1);
  223. expect(d.columnCount('row-header')).toBe(1);
  224. expect(d.columnCount('body')).toBe(3);
  225. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  226. 'a',
  227. 'b',
  228. 'c'
  229. ]);
  230. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['d', 'e', 'f']);
  231. });
  232. it('handles delimiters and quotes inside quotes', () => {
  233. const d = new DSVModel({
  234. data: `'a\rx',b,'c''x'\r'd,x',e,'f'\r`,
  235. delimiter: ',',
  236. quote: `'`,
  237. rowDelimiter: '\r'
  238. });
  239. expect(d.rowCount('column-header')).toBe(1);
  240. expect(d.rowCount('body')).toBe(1);
  241. expect(d.columnCount('row-header')).toBe(1);
  242. expect(d.columnCount('body')).toBe(3);
  243. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  244. 'a\rx',
  245. 'b',
  246. `c'x`
  247. ]);
  248. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  249. 'd,x',
  250. 'e',
  251. 'f'
  252. ]);
  253. });
  254. it('handles rows that are too short or too long', () => {
  255. const d = new DSVModel({ data: `a,b,c\n,c,d,e,f\ng,h`, delimiter: ',' });
  256. expect(d.rowCount('column-header')).toBe(1);
  257. expect(d.rowCount('body')).toBe(2);
  258. expect(d.columnCount('row-header')).toBe(1);
  259. expect(d.columnCount('body')).toBe(3);
  260. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  261. 'a',
  262. 'b',
  263. 'c'
  264. ]);
  265. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  266. '',
  267. 'c',
  268. 'd,e,f'
  269. ]);
  270. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['g', 'h', '']);
  271. });
  272. it('handles delayed parsing of rows past the initial rows', async () => {
  273. const d = new DSVModel({
  274. data: `a,b,c\nc,d,e\nf,g,h\ni,j,k`,
  275. delimiter: ',',
  276. initialRows: 2
  277. });
  278. expect(d.rowCount('column-header')).toBe(1);
  279. expect(d.rowCount('body')).toBe(1);
  280. expect(d.columnCount('row-header')).toBe(1);
  281. expect(d.columnCount('body')).toBe(3);
  282. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  283. 'a',
  284. 'b',
  285. 'c'
  286. ]);
  287. // Expected behavior is that all unparsed data is lumped into the final field.
  288. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual([
  289. 'c',
  290. 'd',
  291. 'e\nf,g,h\ni,j,k'
  292. ]);
  293. // Check everything is in order after all the data has been parsed asynchronously.
  294. await d.ready;
  295. expect(d.rowCount('column-header')).toBe(1);
  296. expect(d.rowCount('body')).toBe(3);
  297. expect(d.columnCount('row-header')).toBe(1);
  298. expect(d.columnCount('body')).toBe(3);
  299. expect([0, 1, 2].map(i => d.data('column-header', 0, i))).toEqual([
  300. 'a',
  301. 'b',
  302. 'c'
  303. ]);
  304. expect([0, 1, 2].map(i => d.data('body', 0, i))).toEqual(['c', 'd', 'e']);
  305. expect([0, 1, 2].map(i => d.data('body', 1, i))).toEqual(['f', 'g', 'h']);
  306. expect([0, 1, 2].map(i => d.data('body', 2, i))).toEqual(['i', 'j', 'k']);
  307. });
  308. });
  309. });