diff --git a/README.md b/README.md index b069d40..30759ee 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ [![Join the chat at https://gitter.im/hexenq/kuroshiro](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/hexenq/kuroshiro) [![License](https://img.shields.io/github/license/lassjs/lass.svg)](LICENSE) -kuroshiro is a Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported. +kuroshiro is a Japanese language library for converting Japanese sentences to Hiragana, Katakana or Romaji with furigana, okurigana, and raw modes supported. *Read this in other languages: [English](README.md), [日本語](README.jp.md), [简体中文](README.zh-cn.md), [繁體中文](README.zh-tw.md).* @@ -17,7 +17,7 @@ You can check the demo [here](https://kuroshiro.org/#demo). ## Feature - Japanese Sentence => Hiragana, Katakana or Romaji -- Furigana and okurigana supported +- Furigana, okurigana, and raw supported - 🆕Multiple morphological analyzers supported - 🆕Multiple romanization systems supported - Useful Japanese utils @@ -132,7 +132,7 @@ __Arguments__ | Options | Type | Default | Description | |---|---|---|---| | to | String | "hiragana" | Target syllabary [`hiragana`, `katakana`, `romaji`] | -| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`] | +| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`, `raw`] | | romajiSystem* | String | "hepburn" | Romanization system [`nippon`, `passport`, `hepburn`] | | delimiter_start | String | "(" | Delimiter(Start) | | delimiter_end | String | ")" | Delimiter(End) | @@ -159,6 +159,28 @@ await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人 // result: 感(かん)じ取(と)れたら手(て)を繋(つな)ごう、重(かさ)なるのは人生(じんせい)のライン and レミリア最高(さいこう)! ``` +```js +// raw +await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"raw", to:"hiragana"}); +// result: +// [ +// { text: '感', reading: 'かん' }, +// { text: 'じ' }, +// { text: '取', reading: 'と' }, +// { text: 'れたら' }, +// { text: '手', reading: 'て' }, +// { text: 'を' }, +// { text: '繋', reading: 'つな' }, +// { text: 'ごう、' }, +// { text: '重', reading: 'かさ' }, +// { text: 'なるのは' }, +// { text: '人生', reading: 'じんせい' }, +// { text: 'のライン and レミリア' }, +// { text: '最高', reading: 'さいこう' }, +// { text: '!' } +// ] +``` +
 // furigana
 await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"furigana", to:"hiragana"});
diff --git a/src/core.js b/src/core.js
index 74d1222..400f488 100644
--- a/src/core.js
+++ b/src/core.js
@@ -63,13 +63,19 @@ class Kuroshiro {
      * @param {string} str Given String
      * @param {Object} [options] Settings Object
      * @param {string} [options.to="hiragana"] Target syllabary ["hiragana"|"katakana"|"romaji"]
-     * @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"]
+     * @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"|"raw"]
      * @param {string} [options.romajiSystem="hepburn"] Romanization System ["nippon"|"passport"|"hepburn"]
      * @param {string} [options.delimiter_start="("] Delimiter(Start)
      * @param {string} [options.delimiter_end=")"] Delimiter(End)
      * @returns {Promise} Promise object represents the result of conversion
      */
     async convert(str, options) {
+        function flushPendingText(rawResult, pendingText) {
+            if (pendingText) {
+                rawResult.push({ text: pendingText });
+            }
+        }
+
         options = options || {};
         options.to = options.to || "hiragana";
         options.mode = options.mode || "normal";
@@ -82,7 +88,7 @@ class Kuroshiro {
             throw new Error("Invalid Target Syllabary.");
         }
 
-        if (["normal", "spaced", "okurigana", "furigana"].indexOf(options.mode) === -1) {
+        if (["normal", "spaced", "okurigana", "furigana", "raw"].indexOf(options.mode) === -1) {
             throw new Error("Invalid Conversion Mode.");
         }
 
@@ -164,7 +170,7 @@ class Kuroshiro {
                     throw new Error("Unknown option.to param");
             }
         }
-        else if (options.mode === "okurigana" || options.mode === "furigana") {
+        else if (options.mode === "okurigana" || options.mode === "furigana" || options.mode === "raw") {
             const notations = []; // [basic, basic_type[1=kanji,2=kana,3=others], notation, pronunciation]
             for (let i = 0; i < tokens.length; i++) {
                 const strType = getStrType(tokens[i].surface_form);
@@ -225,71 +231,123 @@ class Kuroshiro {
                         throw new Error("Unknown strType");
                 }
             }
-            let result = "";
+            const rawResult = [];
+            let stringResult = "";
             switch (options.to) {
                 case "katakana":
                     if (options.mode === "okurigana") {
                         for (let n0 = 0; n0 < notations.length; n0++) {
                             if (notations[n0][1] !== 1) {
-                                result += notations[n0][0];
+                                stringResult += notations[n0][0];
                             }
                             else {
-                                result += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end;
+                                stringResult += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end;
                             }
                         }
                     }
-                    else { // furigana
+                    else if (options.mode === "furigana") {
                         for (let n1 = 0; n1 < notations.length; n1++) {
                             if (notations[n1][1] !== 1) {
-                                result += notations[n1][0];
+                                stringResult += notations[n1][0];
                             }
                             else {
-                                result += `${notations[n1][0]}${options.delimiter_start}${toRawKatakana(notations[n1][2])}${options.delimiter_end}`;
+                                stringResult += `${notations[n1][0]}${options.delimiter_start}${toRawKatakana(notations[n1][2])}${options.delimiter_end}`;
                             }
                         }
                     }
-                    return result;
+                    else { // raw
+                        let pendingText = "";
+                        for (let n6 = 0; n6 < notations.length; n6++) {
+                            if (notations[n6][1] !== 1) {
+                                pendingText += notations[n6][0];
+                            }
+                            else {
+                                flushPendingText(rawResult, pendingText);
+                                pendingText = "";
+                                rawResult.push({
+                                    text: notations[n6][0],
+                                    reading: toRawKatakana(notations[n6][2])
+                                });
+                            }
+                        }
+                        flushPendingText(rawResult, pendingText);
+                        return rawResult;
+                    }
+                    return stringResult;
                 case "romaji":
                     if (options.mode === "okurigana") {
                         for (let n2 = 0; n2 < notations.length; n2++) {
                             if (notations[n2][1] !== 1) {
-                                result += notations[n2][0];
+                                stringResult += notations[n2][0];
                             }
                             else {
-                                result += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end;
+                                stringResult += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end;
                             }
                         }
                     }
-                    else { // furigana
-                        result += "";
+                    else if (options.mode === "furigana") {
+                        stringResult += "";
                         for (let n3 = 0; n3 < notations.length; n3++) {
-                            result += `${notations[n3][0]}${options.delimiter_start}${toRawRomaji(notations[n3][3], options.romajiSystem)}${options.delimiter_end}`;
+                            stringResult += `${notations[n3][0]}${options.delimiter_start}${toRawRomaji(notations[n3][3], options.romajiSystem)}${options.delimiter_end}`;
+                        }
+                        stringResult += "";
+                    }
+                    else { // raw
+                        let pendingText = "";
+                        for (let n7 = 0; n7 < notations.length; n7++) {
+                            if (notations[n7][1] !== 1) {
+                                pendingText += notations[n7][0];
+                            }
+                            else {
+                                flushPendingText(rawResult, pendingText);
+                                pendingText = "";
+                                rawResult.push({
+                                    text: notations[n7][0],
+                                    reading: toRawRomaji(notations[n7][3], options.romajiSystem)
+                                });
+                            }
                         }
-                        result += "";
+                        flushPendingText(rawResult, pendingText);
+                        return rawResult;
                     }
-                    return result;
+                    return stringResult;
                 case "hiragana":
                     if (options.mode === "okurigana") {
                         for (let n4 = 0; n4 < notations.length; n4++) {
                             if (notations[n4][1] !== 1) {
-                                result += notations[n4][0];
+                                stringResult += notations[n4][0];
                             }
                             else {
-                                result += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end;
+                                stringResult += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end;
                             }
                         }
                     }
-                    else { // furigana
+                    else if (options.mode === "furigana") {
                         for (let n5 = 0; n5 < notations.length; n5++) {
                             if (notations[n5][1] !== 1) {
-                                result += notations[n5][0];
+                                stringResult += notations[n5][0];
+                            }
+                            else {
+                                stringResult += `${notations[n5][0]}${options.delimiter_start}${notations[n5][2]}${options.delimiter_end}`;
+                            }
+                        }
+                    }
+                    else { // raw
+                        let pendingText = "";
+                        for (let n8 = 0; n8 < notations.length; n8++) {
+                            if (notations[n8][1] !== 1) {
+                                pendingText += notations[n8][0];
                             }
                             else {
-                                result += `${notations[n5][0]}${options.delimiter_start}${notations[n5][2]}${options.delimiter_end}`;
+                                flushPendingText(rawResult, pendingText);
+                                pendingText = "";
+                                rawResult.push({ text: notations[n8][0], reading: notations[n8][2] });
                             }
                         }
+                        flushPendingText(rawResult, pendingText);
+                        return rawResult;
                     }
-                    return result;
+                    return stringResult;
                 default:
                     throw new Error("Invalid Target Syllabary.");
             }
diff --git a/test/node.spec.js b/test/node.spec.js
index d08bb04..5668d38 100644
--- a/test/node.spec.js
+++ b/test/node.spec.js
@@ -290,4 +290,19 @@ describe("Kuroshiro Node Funtional Test", () => {
         const result = await kuroshiro.convert(ori, { mode: "furigana", to: "romaji" });
         expect(result).toEqual("(kan)(ji)(to)(re)(ta)(ra)(te)(o)(tsuna)(go)(u)(,)(kasa)(na)(ru)(no)(wa)人生(jinsei)(no)(ra)(i)(n) ( )a(a)n(n)d(d) ( )(re)(mi)(ri)(a)最高(saikō)(!)");
     });
+    it("Kanji to Hiragana with raw", async () => {
+        const ori = EXAMPLE_TEXT;
+        const result = await kuroshiro.convert(ori, { mode: "raw", to: "hiragana" });
+        expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"かん\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"と\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"て\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"つな\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"かさ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"じんせい\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"さいこう\"},{\"text\":\"!\"}]"));
+    });
+    it("Kanji to Katakana with raw", async () => {
+        const ori = EXAMPLE_TEXT;
+        const result = await kuroshiro.convert(ori, { mode: "raw", to: "katakana" });
+        expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"カン\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"ト\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"テ\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"ツナ\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"カサ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"ジンセイ\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"サイコウ\"},{\"text\":\"!\"}]"));
+    });
+    it("Kanji to Romaji with raw", async () => {
+        const ori = EXAMPLE_TEXT;
+        const result = await kuroshiro.convert(ori, { mode: "raw", to: "romaji" });
+        expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"kan\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"to\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"te\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"tsuna\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"kasa\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"jinsei\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"saikō\"},{\"text\":\"!\"}]"));
+    });
 });