text.ts 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. // Copyright (c) Jupyter Development Team.
  2. // Distributed under the terms of the Modified BSD License.
  3. /**
  4. * The namespace for text-related functions.
  5. */
  6. export
  7. namespace Text {
  8. // javascript stores text as utf16 and string indices use "code units",
  9. // which stores high-codepoint characters as "surrogate pairs",
  10. // which occupy two indices in the javascript string.
  11. // We need to translate cursor_pos in the Jupyter protocol (in characters)
  12. // to js offset (with surrogate pairs taking two spots).
  13. const HAS_SURROGATES: boolean = ('𝐚'.length > 1);
  14. /**
  15. * Convert a javascript string index into a unicode character offset
  16. *
  17. * @param jsIdx - The javascript string index (counting surrogate pairs)
  18. *
  19. * @param text - The text in which the offset is calculated
  20. *
  21. * @returns The unicode character offset
  22. */
  23. export
  24. function jsIndexToCharIndex (jsIdx: number, text: string): number {
  25. if (HAS_SURROGATES) {
  26. // not using surrogates, nothing to do
  27. return jsIdx;
  28. }
  29. let charIdx = jsIdx;
  30. for (let i = 0; i + 1 < text.length && i < jsIdx; i++) {
  31. let charCode = text.charCodeAt(i);
  32. // check for surrogate pair
  33. if (charCode >= 0xD800 && charCode <= 0xDBFF) {
  34. let nextCharCode = text.charCodeAt(i + 1);
  35. if (nextCharCode >= 0xDC00 && nextCharCode <= 0xDFFF) {
  36. charIdx--;
  37. i++;
  38. }
  39. }
  40. }
  41. return charIdx;
  42. }
  43. /**
  44. * Convert a unicode character offset to a javascript string index.
  45. *
  46. * @param charIdx - The index in unicode characters
  47. *
  48. * @param text - The text in which the offset is calculated
  49. *
  50. * @returns The js-native index
  51. */
  52. export
  53. function charIndexToJsIndex (charIdx: number, text: string): number {
  54. if (HAS_SURROGATES) {
  55. // not using surrogates, nothing to do
  56. return charIdx;
  57. }
  58. let jsIdx = charIdx;
  59. for (let i = 0; i + 1 < text.length && i < jsIdx; i++) {
  60. let charCode = text.charCodeAt(i);
  61. // check for surrogate pair
  62. if (charCode >= 0xD800 && charCode <= 0xDBFF) {
  63. let nextCharCode = text.charCodeAt(i + 1);
  64. if (nextCharCode >= 0xDC00 && nextCharCode <= 0xDFFF) {
  65. jsIdx++;
  66. i++;
  67. }
  68. }
  69. }
  70. return jsIdx;
  71. }
  72. }