parse.spec.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. // Copyright (c) Jupyter Development Team.
  2. // Distributed under the terms of the Modified BSD License.
  3. import expect = require('expect.js');
  4. import {
  5. parseDSV as parser
  6. } from '@jupyterlab/csvviewer';
  7. describe('csvviewer/parse', () => {
  8. describe('parseDSV', () => {
  9. it('does basic parsing of csv files', () => {
  10. let data = `a,b,c,d\r\n0,1,2,3\r\n4,5,6,7`;
  11. let options = {data};
  12. let results;
  13. results = parser({...options, columnOffsets: false});
  14. expect(results.nrows).to.eql(3);
  15. expect(results.ncols).to.eql(0);
  16. expect(results.offsets).to.eql([0, 9, 18]);
  17. results = parser({...options, columnOffsets: true});
  18. expect(results.nrows).to.eql(3);
  19. expect(results.ncols).to.eql(4);
  20. expect(results.offsets).to.eql([0, 2, 4, 6, 9, 11, 13, 15, 18, 20, 22, 24]);
  21. });
  22. // For simplicity, we'll use \n as a row delimiter below.
  23. it('handles trailing row delimiter', () => {
  24. let data = `a,b,c,d\n0,1,2,3\n4,5,6,7\n`;
  25. let options = {data, rowDelimiter: '\n'};
  26. let results;
  27. results = parser({...options, columnOffsets: false});
  28. expect(results.nrows).to.eql(3);
  29. expect(results.offsets).to.eql([0, 8, 16]);
  30. results = parser({...options, columnOffsets: true});
  31. expect(results.nrows).to.eql(3);
  32. expect(results.ncols).to.eql(4);
  33. expect(results.offsets).to.eql([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22]);
  34. });
  35. it('handles changing the field delimiter', () => {
  36. let data = `a\tb\tc\td\n0\t1\t2\t3\n4\t5\t6\t7\n`;
  37. let options = {data, delimiter: '\t', rowDelimiter: '\n'};
  38. let results;
  39. results = parser({...options, columnOffsets: false});
  40. expect(results.nrows).to.eql(3);
  41. expect(results.offsets).to.eql([0, 8, 16]);
  42. results = parser({...options, columnOffsets: true});
  43. expect(results.nrows).to.eql(3);
  44. expect(results.ncols).to.eql(4);
  45. expect(results.offsets).to.eql([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22]);
  46. });
  47. it('handles starting on a new row', () => {
  48. let data = `a,b,c,d\n0,1,2,3\n4,5,6,7\n`;
  49. let options = {data, rowDelimiter: '\n', startIndex: 8};
  50. let results;
  51. results = parser({...options, columnOffsets: false});
  52. expect(results.nrows).to.eql(2);
  53. expect(results.offsets).to.eql([8, 16]);
  54. results = parser({...options, columnOffsets: true});
  55. expect(results.nrows).to.eql(2);
  56. expect(results.ncols).to.eql(4);
  57. expect(results.offsets).to.eql([8, 10, 12, 14, 16, 18, 20, 22]);
  58. });
  59. it('handles a max row argument', () => {
  60. let data = `a,b,c,d\n0,1,2,3\n4,5,6,7\n`;
  61. let options = {data, rowDelimiter: '\n', maxRows: 2};
  62. let results;
  63. results = parser({...options, columnOffsets: false});
  64. expect(results.nrows).to.eql(2);
  65. expect(results.offsets).to.eql([0, 8]);
  66. results = parser({...options, columnOffsets: true});
  67. expect(results.nrows).to.eql(2);
  68. expect(results.ncols).to.eql(4);
  69. expect(results.offsets).to.eql([0, 2, 4, 6, 8, 10, 12, 14]);
  70. });
  71. it('handles a start index and max row argument', () => {
  72. let data = `a,b,c,d\n0,1,2,3\n4,5,6,7\n`;
  73. let options = {data, rowDelimiter: '\n', startIndex: 8, maxRows: 1};
  74. let results;
  75. results = parser({...options, columnOffsets: false});
  76. expect(results.nrows).to.eql(1);
  77. expect(results.offsets).to.eql([8]);
  78. results = parser({...options, columnOffsets: true});
  79. expect(results.nrows).to.eql(1);
  80. expect(results.ncols).to.eql(4);
  81. expect(results.offsets).to.eql([8, 10, 12, 14]);
  82. });
  83. it('adjusts columns to match first row by default', () => {
  84. let data = `a,b,c,d\n0,\n1,2,3,4,5,6`;
  85. let options = {data, rowDelimiter: '\n'};
  86. let results;
  87. results = parser({...options, columnOffsets: false});
  88. expect(results.nrows).to.eql(3);
  89. expect(results.offsets).to.eql([0, 8, 11]);
  90. results = parser({...options, columnOffsets: true});
  91. expect(results.nrows).to.eql(3);
  92. expect(results.ncols).to.eql(4);
  93. expect(results.offsets).to.eql([0, 2, 4, 6, 8, 10, 10, 10, 11, 13, 15, 17]);
  94. });
  95. it('adjusts columns to match first row by default with CRLF row delimiter', () => {
  96. let data = `a,b,c,d\r\n0,\r\n1,2,3,4,5,6`;
  97. let options = {data, rowDelimiter: '\r\n'};
  98. let results;
  99. results = parser({...options, columnOffsets: false});
  100. expect(results.nrows).to.eql(3);
  101. expect(results.offsets).to.eql([0, 9, 13]);
  102. results = parser({...options, columnOffsets: true});
  103. expect(results.nrows).to.eql(3);
  104. expect(results.ncols).to.eql(4);
  105. expect(results.offsets).to.eql([ 0, 2, 4, 6, 9, 11, 11, 11, 13, 15, 17, 19 ]);
  106. });
  107. it('adjusts columns to match ncols', () => {
  108. let data = `a,b,c,d\n0,\n1,2,3,4,5,6`;
  109. let options = {data, rowDelimiter: '\n', ncols: 5};
  110. let results;
  111. results = parser({...options, columnOffsets: false});
  112. expect(results.nrows).to.eql(3);
  113. expect(results.offsets).to.eql([0, 8, 11]);
  114. results = parser({...options, columnOffsets: true});
  115. expect(results.nrows).to.eql(3);
  116. expect(results.ncols).to.eql(5);
  117. expect(results.offsets).to.eql([0, 2, 4, 6, 7, 8, 10, 10, 10, 10, 11, 13, 15, 17, 19]);
  118. });
  119. it('adjusts columns to match ncols with CRLF row delimiter', () => {
  120. let data = `a,b,c,d\r\n0,\r\n1,2,3,4,5,6`;
  121. let options = {data, rowDelimiter: '\r\n', ncols: 5};
  122. let results;
  123. results = parser({...options, columnOffsets: false});
  124. expect(results.nrows).to.eql(3);
  125. expect(results.offsets).to.eql([0, 9, 13]);
  126. results = parser({...options, columnOffsets: true});
  127. expect(results.nrows).to.eql(3);
  128. expect(results.ncols).to.eql(5);
  129. expect(results.offsets).to.eql([0, 2, 4, 6, 7, 9, 11, 11, 11, 11, 13, 15, 17, 19, 21]);
  130. });
  131. it('adjusts columns to match ncols with one row', () => {
  132. let data = `a,b,c,d`;
  133. let options = {data, rowDelimiter: '\n', ncols: 7};
  134. let results;
  135. results = parser({...options, columnOffsets: false});
  136. expect(results.nrows).to.eql(1);
  137. expect(results.offsets).to.eql([0]);
  138. results = parser({...options, columnOffsets: true});
  139. expect(results.nrows).to.eql(1);
  140. expect(results.ncols).to.eql(7);
  141. expect(results.offsets).to.eql([0, 2, 4, 6, 7, 7, 7]);
  142. });
  143. it('adjusts columns to match ncols with one row and trailing delimiter', () => {
  144. let data = `a,b,c,d\n`;
  145. let options = {data, rowDelimiter: '\n', ncols: 7};
  146. let results;
  147. results = parser({...options, columnOffsets: false});
  148. expect(results.nrows).to.eql(1);
  149. expect(results.offsets).to.eql([0]);
  150. results = parser({...options, columnOffsets: true});
  151. expect(results.nrows).to.eql(1);
  152. expect(results.ncols).to.eql(7);
  153. expect(results.offsets).to.eql([0, 2, 4, 6, 7, 7, 7]);
  154. });
  155. it('handles a single row delimiter', () => {
  156. let data = `\n`;
  157. let options = {data, rowDelimiter: '\n'};
  158. let results;
  159. results = parser({...options, columnOffsets: false});
  160. expect(results.nrows).to.eql(1);
  161. expect(results.offsets).to.eql([0]);
  162. results = parser({...options, columnOffsets: true});
  163. expect(results.nrows).to.eql(1);
  164. expect(results.ncols).to.eql(1);
  165. expect(results.offsets).to.eql([0]);
  166. });
  167. it('handles adding columns or merging columns as necessary', () => {
  168. let data = `a,b,c\n,c,d,e,f\ng,h`;
  169. let options = {data, rowDelimiter: '\n'};
  170. let results;
  171. results = parser({...options, columnOffsets: false});
  172. expect(results.nrows).to.eql(3);
  173. expect(results.offsets).to.eql([0, 6, 15]);
  174. results = parser({...options, columnOffsets: true});
  175. expect(results.nrows).to.eql(3);
  176. expect(results.ncols).to.eql(3);
  177. expect(results.offsets).to.eql([0, 2, 4, 6, 7, 9, 15, 17, 18]);
  178. });
  179. });
  180. describe('parseDSV quotes', () => {
  181. it('does basic parsing of quoted csv files', () => {
  182. let data = `first,"last",address,city,zip`;
  183. let options = {data, rowDelimiter: '\n'};
  184. let results;
  185. results = parser({...options, columnOffsets: false});
  186. expect(results.nrows).to.eql(1);
  187. expect(results.offsets).to.eql([0]);
  188. results = parser({...options, columnOffsets: true});
  189. expect(results.nrows).to.eql(1);
  190. expect(results.ncols).to.eql(5);
  191. expect(results.offsets).to.eql([0, 6, 13, 21, 26]);
  192. });
  193. it('handles quotes with field delimiters', () => {
  194. let data = `a,"b,c",d\n"e","f"`;
  195. let options = {data, rowDelimiter: '\n'};
  196. let results;
  197. results = parser({...options, columnOffsets: false});
  198. expect(results.nrows).to.eql(2);
  199. expect(results.offsets).to.eql([0, 10]);
  200. results = parser({...options, columnOffsets: true});
  201. expect(results.nrows).to.eql(2);
  202. expect(results.ncols).to.eql(3);
  203. expect(results.offsets).to.eql([0, 2, 8, 10, 14, 17]);
  204. });
  205. it('handles quotes with row delimiters', () => {
  206. let data = `a,"b\nc",d\ne,f`;
  207. let options = {data, rowDelimiter: '\n'};
  208. let results;
  209. results = parser({...options, columnOffsets: false});
  210. expect(results.nrows).to.eql(2);
  211. expect(results.offsets).to.eql([0, 10]);
  212. results = parser({...options, columnOffsets: true});
  213. expect(results.nrows).to.eql(2);
  214. expect(results.ncols).to.eql(3);
  215. expect(results.offsets).to.eql([0, 2, 8, 10, 12, 13]);
  216. });
  217. it('handles quotes with escaped quotes', () => {
  218. let data = `a,"b""c",d\ne,f`;
  219. let options = {data, rowDelimiter: '\n'};
  220. let results;
  221. results = parser({...options, columnOffsets: false});
  222. expect(results.nrows).to.eql(2);
  223. expect(results.offsets).to.eql([0, 11]);
  224. results = parser({...options, columnOffsets: true});
  225. expect(results.nrows).to.eql(2);
  226. expect(results.ncols).to.eql(3);
  227. expect(results.offsets).to.eql([0, 2, 9, 11, 13, 14]);
  228. });
  229. it('handles setting the quote character', () => {
  230. let data = `a,'b'',\nc',d\ne,f`;
  231. let options = {data, rowDelimiter: '\n', quote: `'`};
  232. let results;
  233. results = parser({...options, columnOffsets: false});
  234. expect(results.nrows).to.eql(2);
  235. expect(results.offsets).to.eql([0, 13]);
  236. results = parser({...options, columnOffsets: true});
  237. expect(results.nrows).to.eql(2);
  238. expect(results.ncols).to.eql(3);
  239. expect(results.offsets).to.eql([0, 2, 11, 13, 15, 16]);
  240. });
  241. it('handles single quoted field', () => {
  242. let data = `"a"`;
  243. let options = {data, rowDelimiter: '\n'};
  244. let results;
  245. results = parser({...options, columnOffsets: false});
  246. expect(results.nrows).to.eql(1);
  247. expect(results.offsets).to.eql([0]);
  248. results = parser({...options, columnOffsets: true});
  249. expect(results.nrows).to.eql(1);
  250. expect(results.ncols).to.eql(1);
  251. expect(results.offsets).to.eql([0]);
  252. });
  253. it('handles empty quoted field', () => {
  254. let data = `a,"",b`;
  255. let options = {data, rowDelimiter: '\n'};
  256. let results;
  257. results = parser({...options, columnOffsets: false});
  258. expect(results.nrows).to.eql(1);
  259. expect(results.offsets).to.eql([0]);
  260. results = parser({...options, columnOffsets: true});
  261. expect(results.nrows).to.eql(1);
  262. expect(results.ncols).to.eql(3);
  263. expect(results.offsets).to.eql([0, 2, 5]);
  264. });
  265. });
  266. });
  267. // Helpful debugging logging
  268. // console.log(Array.from(results.offsets));
  269. // console.log(Array.from(results.offsets).map((i, ind, arr) => data.slice(i, arr[ind + 1])));