Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add raw output mode #72

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
[![Join the chat at https://gitter.im/hexenq/kuroshiro](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/hexenq/kuroshiro)
[![License](https://img.shields.io/github/license/lassjs/lass.svg)](LICENSE)

kuroshiro is a Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported.
kuroshiro is a Japanese language library for converting Japanese sentences to Hiragana, Katakana or Romaji with furigana, okurigana, and raw modes supported.

*Read this in other languages: [English](README.md), [日本語](README.jp.md), [简体中文](README.zh-cn.md), [繁體中文](README.zh-tw.md).*

Expand All @@ -17,7 +17,7 @@ You can check the demo [here](https://kuroshiro.org/#demo).

## Feature
- Japanese Sentence => Hiragana, Katakana or Romaji
- Furigana and okurigana supported
- Furigana, okurigana, and raw supported
- 🆕Multiple morphological analyzers supported
- 🆕Multiple romanization systems supported
- Useful Japanese utils
Expand Down Expand Up @@ -132,7 +132,7 @@ __Arguments__
| Options | Type | Default | Description |
|---|---|---|---|
| to | String | "hiragana" | Target syllabary [`hiragana`, `katakana`, `romaji`] |
| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`] |
| mode | String | "normal" | Convert mode [`normal`, `spaced`, `okurigana`, `furigana`, `raw`] |
| romajiSystem<sup>*</sup> | String | "hepburn" | Romanization system [`nippon`, `passport`, `hepburn`] |
| delimiter_start | String | "(" | Delimiter(Start) |
| delimiter_end | String | ")" | Delimiter(End) |
Expand All @@ -159,6 +159,28 @@ await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人
// result: 感(かん)じ取(と)れたら手(て)を繋(つな)ごう、重(かさ)なるのは人生(じんせい)のライン and レミリア最高(さいこう)!
```

```js
// raw
await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"raw", to:"hiragana"});
// result:
// [
// { text: '感', reading: 'かん' },
// { text: 'じ' },
// { text: '取', reading: 'と' },
// { text: 'れたら' },
// { text: '手', reading: 'て' },
// { text: 'を' },
// { text: '繋', reading: 'つな' },
// { text: 'ごう、' },
// { text: '重', reading: 'かさ' },
// { text: 'なるのは' },
// { text: '人生', reading: 'じんせい' },
// { text: 'のライン and レミリア' },
// { text: '最高', reading: 'さいこう' },
// { text: '!' }
// ]
```

<pre>
// furigana
await kuroshiro.convert("感じ取れたら手を繋ごう、重なるのは人生のライン and レミリア最高!", {mode:"furigana", to:"hiragana"});
Expand Down
104 changes: 81 additions & 23 deletions src/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,19 @@ class Kuroshiro {
* @param {string} str Given String
* @param {Object} [options] Settings Object
* @param {string} [options.to="hiragana"] Target syllabary ["hiragana"|"katakana"|"romaji"]
* @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"]
* @param {string} [options.mode="normal"] Convert mode ["normal"|"spaced"|"okurigana"|"furigana"|"raw"]
* @param {string} [options.romajiSystem="hepburn"] Romanization System ["nippon"|"passport"|"hepburn"]
* @param {string} [options.delimiter_start="("] Delimiter(Start)
* @param {string} [options.delimiter_end=")"] Delimiter(End)
* @returns {Promise} Promise object represents the result of conversion
*/
async convert(str, options) {
function flushPendingText(rawResult, pendingText) {
if (pendingText) {
rawResult.push({ text: pendingText });
}
}

options = options || {};
options.to = options.to || "hiragana";
options.mode = options.mode || "normal";
Expand All @@ -82,7 +88,7 @@ class Kuroshiro {
throw new Error("Invalid Target Syllabary.");
}

if (["normal", "spaced", "okurigana", "furigana"].indexOf(options.mode) === -1) {
if (["normal", "spaced", "okurigana", "furigana", "raw"].indexOf(options.mode) === -1) {
throw new Error("Invalid Conversion Mode.");
}

Expand Down Expand Up @@ -164,7 +170,7 @@ class Kuroshiro {
throw new Error("Unknown option.to param");
}
}
else if (options.mode === "okurigana" || options.mode === "furigana") {
else if (options.mode === "okurigana" || options.mode === "furigana" || options.mode === "raw") {
const notations = []; // [basic, basic_type[1=kanji,2=kana,3=others], notation, pronunciation]
for (let i = 0; i < tokens.length; i++) {
const strType = getStrType(tokens[i].surface_form);
Expand Down Expand Up @@ -225,71 +231,123 @@ class Kuroshiro {
throw new Error("Unknown strType");
}
}
let result = "";
const rawResult = [];
let stringResult = "";
switch (options.to) {
case "katakana":
if (options.mode === "okurigana") {
for (let n0 = 0; n0 < notations.length; n0++) {
if (notations[n0][1] !== 1) {
result += notations[n0][0];
stringResult += notations[n0][0];
}
else {
result += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end;
stringResult += notations[n0][0] + options.delimiter_start + toRawKatakana(notations[n0][2]) + options.delimiter_end;
}
}
}
else { // furigana
else if (options.mode === "furigana") {
for (let n1 = 0; n1 < notations.length; n1++) {
if (notations[n1][1] !== 1) {
result += notations[n1][0];
stringResult += notations[n1][0];
}
else {
result += `<ruby>${notations[n1][0]}<rp>${options.delimiter_start}</rp><rt>${toRawKatakana(notations[n1][2])}</rt><rp>${options.delimiter_end}</rp></ruby>`;
stringResult += `<ruby>${notations[n1][0]}<rp>${options.delimiter_start}</rp><rt>${toRawKatakana(notations[n1][2])}</rt><rp>${options.delimiter_end}</rp></ruby>`;
}
}
}
return result;
else { // raw
let pendingText = "";
for (let n6 = 0; n6 < notations.length; n6++) {
if (notations[n6][1] !== 1) {
pendingText += notations[n6][0];
}
else {
flushPendingText(rawResult, pendingText);
pendingText = "";
rawResult.push({
text: notations[n6][0],
reading: toRawKatakana(notations[n6][2])
});
}
}
flushPendingText(rawResult, pendingText);
return rawResult;
}
return stringResult;
case "romaji":
if (options.mode === "okurigana") {
for (let n2 = 0; n2 < notations.length; n2++) {
if (notations[n2][1] !== 1) {
result += notations[n2][0];
stringResult += notations[n2][0];
}
else {
result += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end;
stringResult += notations[n2][0] + options.delimiter_start + toRawRomaji(notations[n2][3], options.romajiSystem) + options.delimiter_end;
}
}
}
else { // furigana
result += "<ruby>";
else if (options.mode === "furigana") {
stringResult += "<ruby>";
for (let n3 = 0; n3 < notations.length; n3++) {
result += `${notations[n3][0]}<rp>${options.delimiter_start}</rp><rt>${toRawRomaji(notations[n3][3], options.romajiSystem)}</rt><rp>${options.delimiter_end}</rp>`;
stringResult += `${notations[n3][0]}<rp>${options.delimiter_start}</rp><rt>${toRawRomaji(notations[n3][3], options.romajiSystem)}</rt><rp>${options.delimiter_end}</rp>`;
}
stringResult += "</ruby>";
}
else { // raw
let pendingText = "";
for (let n7 = 0; n7 < notations.length; n7++) {
if (notations[n7][1] !== 1) {
pendingText += notations[n7][0];
}
else {
flushPendingText(rawResult, pendingText);
pendingText = "";
rawResult.push({
text: notations[n7][0],
reading: toRawRomaji(notations[n7][3], options.romajiSystem)
});
}
}
result += "</ruby>";
flushPendingText(rawResult, pendingText);
return rawResult;
}
return result;
return stringResult;
case "hiragana":
if (options.mode === "okurigana") {
for (let n4 = 0; n4 < notations.length; n4++) {
if (notations[n4][1] !== 1) {
result += notations[n4][0];
stringResult += notations[n4][0];
}
else {
result += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end;
stringResult += notations[n4][0] + options.delimiter_start + notations[n4][2] + options.delimiter_end;
}
}
}
else { // furigana
else if (options.mode === "furigana") {
for (let n5 = 0; n5 < notations.length; n5++) {
if (notations[n5][1] !== 1) {
result += notations[n5][0];
stringResult += notations[n5][0];
}
else {
stringResult += `<ruby>${notations[n5][0]}<rp>${options.delimiter_start}</rp><rt>${notations[n5][2]}</rt><rp>${options.delimiter_end}</rp></ruby>`;
}
}
}
else { // raw
let pendingText = "";
for (let n8 = 0; n8 < notations.length; n8++) {
if (notations[n8][1] !== 1) {
pendingText += notations[n8][0];
}
else {
result += `<ruby>${notations[n5][0]}<rp>${options.delimiter_start}</rp><rt>${notations[n5][2]}</rt><rp>${options.delimiter_end}</rp></ruby>`;
flushPendingText(rawResult, pendingText);
pendingText = "";
rawResult.push({ text: notations[n8][0], reading: notations[n8][2] });
}
}
flushPendingText(rawResult, pendingText);
return rawResult;
}
return result;
return stringResult;
default:
throw new Error("Invalid Target Syllabary.");
}
Expand Down
15 changes: 15 additions & 0 deletions test/node.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -290,4 +290,19 @@ describe("Kuroshiro Node Funtional Test", () => {
const result = await kuroshiro.convert(ori, { mode: "furigana", to: "romaji" });
expect(result).toEqual("<ruby>感<rp>(</rp><rt>kan</rt><rp>)</rp>じ<rp>(</rp><rt>ji</rt><rp>)</rp>取<rp>(</rp><rt>to</rt><rp>)</rp>れ<rp>(</rp><rt>re</rt><rp>)</rp>た<rp>(</rp><rt>ta</rt><rp>)</rp>ら<rp>(</rp><rt>ra</rt><rp>)</rp>手<rp>(</rp><rt>te</rt><rp>)</rp>を<rp>(</rp><rt>o</rt><rp>)</rp>繋<rp>(</rp><rt>tsuna</rt><rp>)</rp>ご<rp>(</rp><rt>go</rt><rp>)</rp>う<rp>(</rp><rt>u</rt><rp>)</rp>、<rp>(</rp><rt>,</rt><rp>)</rp>重<rp>(</rp><rt>kasa</rt><rp>)</rp>な<rp>(</rp><rt>na</rt><rp>)</rp>る<rp>(</rp><rt>ru</rt><rp>)</rp>の<rp>(</rp><rt>no</rt><rp>)</rp>は<rp>(</rp><rt>wa</rt><rp>)</rp>人生<rp>(</rp><rt>jinsei</rt><rp>)</rp>の<rp>(</rp><rt>no</rt><rp>)</rp>ラ<rp>(</rp><rt>ra</rt><rp>)</rp>イ<rp>(</rp><rt>i</rt><rp>)</rp>ン<rp>(</rp><rt>n</rt><rp>)</rp> <rp>(</rp><rt> </rt><rp>)</rp>a<rp>(</rp><rt>a</rt><rp>)</rp>n<rp>(</rp><rt>n</rt><rp>)</rp>d<rp>(</rp><rt>d</rt><rp>)</rp> <rp>(</rp><rt> </rt><rp>)</rp>レ<rp>(</rp><rt>re</rt><rp>)</rp>ミ<rp>(</rp><rt>mi</rt><rp>)</rp>リ<rp>(</rp><rt>ri</rt><rp>)</rp>ア<rp>(</rp><rt>a</rt><rp>)</rp>最高<rp>(</rp><rt>saikō</rt><rp>)</rp>!<rp>(</rp><rt>!</rt><rp>)</rp></ruby>");
});
it("Kanji to Hiragana with raw", async () => {
const ori = EXAMPLE_TEXT;
const result = await kuroshiro.convert(ori, { mode: "raw", to: "hiragana" });
expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"かん\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"と\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"て\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"つな\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"かさ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"じんせい\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"さいこう\"},{\"text\":\"!\"}]"));
});
it("Kanji to Katakana with raw", async () => {
const ori = EXAMPLE_TEXT;
const result = await kuroshiro.convert(ori, { mode: "raw", to: "katakana" });
expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"カン\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"ト\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"テ\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"ツナ\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"カサ\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"ジンセイ\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"サイコウ\"},{\"text\":\"!\"}]"));
});
it("Kanji to Romaji with raw", async () => {
const ori = EXAMPLE_TEXT;
const result = await kuroshiro.convert(ori, { mode: "raw", to: "romaji" });
expect(result).toEqual(JSON.parse("[{\"text\":\"感\",\"reading\":\"kan\"},{\"text\":\"じ\"},{\"text\":\"取\",\"reading\":\"to\"},{\"text\":\"れたら\"},{\"text\":\"手\",\"reading\":\"te\"},{\"text\":\"を\"},{\"text\":\"繋\",\"reading\":\"tsuna\"},{\"text\":\"ごう、\"},{\"text\":\"重\",\"reading\":\"kasa\"},{\"text\":\"なるのは\"},{\"text\":\"人生\",\"reading\":\"jinsei\"},{\"text\":\"のライン and レミリア\"},{\"text\":\"最高\",\"reading\":\"saikō\"},{\"text\":\"!\"}]"));
});
});