From bc4cc870948f449a7e552172f968c0c6a052b27b Mon Sep 17 00:00:00 2001 From: Marley Mulvin Broome Date: Wed, 30 Aug 2023 13:55:28 +0900 Subject: [PATCH] Fixed bug where json export wouldn't pass the combined argument. Fixed bug where WordSlot doesn't add the frequency of the word if the surface is the same (only adding one) --- src/jpfreq/exporters/iexporter.py | 4 +- src/jpfreq/exporters/json.py | 15 +++- src/jpfreq/word_slot.py | 18 ++++- test.sh | 9 ++- tests/exporters/test_json.py | 117 +++++++++++++++++++++++++----- tests/test_json.py | 0 tests/test_word.py | 28 +++++++ tests/test_wordslots.py | 80 +++++++++++++++++--- 8 files changed, 231 insertions(+), 40 deletions(-) delete mode 100644 tests/test_json.py create mode 100644 tests/test_word.py diff --git a/src/jpfreq/exporters/iexporter.py b/src/jpfreq/exporters/iexporter.py index 142cb3b..94d1684 100644 --- a/src/jpfreq/exporters/iexporter.py +++ b/src/jpfreq/exporters/iexporter.py @@ -10,9 +10,9 @@ class IExporter(metaclass=abc.ABCMeta): @abc.abstractmethod - def export(self, frequency_list: JapaneseFrequencyList, limit: int=100, combine: bool=True) -> str: + def export(self, frequency_list: JapaneseFrequencyList, limit: int = 100, combine: bool = True) -> str: raise NotImplementedError @abc.abstractmethod - def export_lazy(self, frequency_list: JapaneseFrequencyList, limit: int=100, combine: bool=True) -> Generator[str, None, None]: + def export_lazy(self, frequency_list: JapaneseFrequencyList, limit: int = 100, combine: bool = True) -> Generator[str, None, None]: raise NotImplementedError diff --git a/src/jpfreq/exporters/json.py b/src/jpfreq/exporters/json.py index 0c59e9d..ea1d2ca 100644 --- a/src/jpfreq/exporters/json.py +++ b/src/jpfreq/exporters/json.py @@ -39,24 +39,31 @@ def _create_export_dictionary(frequency_list: JapaneseFrequencyList, limit: int return dictionary - def export(self, frequency_list: JapaneseFrequencyList, limit: int = 100, combine: bool = False) -> str: + def export(self, frequency_list: JapaneseFrequencyList, limit: int = 100, combine: bool = False, as_dict: bool = False) -> [str|dict]: """ Exports the frequency list to JSON. Parameters ---------- frequency_list : JapaneseFrequencyList The frequency list to export - limit + limit : int The MAX number of words to export - combine + combine : bool Whether to combine the word slots or not + as_dict : bool + Whether to return the dictionary or the JSON string Returns ------- str The JSON string """ - return dumps(self._create_export_dictionary(frequency_list, limit=limit), ensure_ascii=False, indent=4) + result = self._create_export_dictionary(frequency_list, limit=limit, combine=combine) + + if as_dict: + return result + + return dumps(result, ensure_ascii=False, indent=4) def export_lazy(self, frequency_list: JapaneseFrequencyList, limit: int = 100, combine: bool = False) -> Generator[str, None, None]: return super().export_lazy(frequency_list, limit) \ No newline at end of file diff --git a/src/jpfreq/word_slot.py b/src/jpfreq/word_slot.py index 87e80a9..eba3ae5 100644 --- a/src/jpfreq/word_slot.py +++ b/src/jpfreq/word_slot.py @@ -8,10 +8,22 @@ from .word import Word -@dataclass +@dataclass(init=False) class WordSlot: words: list[Word] + def __init__(self, words: Iterable[Word]): + """ + Creates a WordSlot from a list of words. + Parameters + ---------- + words : Iterable[Word] + The words to create the word slot from. + """ + self.words = [] + + [self.add_word(word) for word in words] + def __contains__(self, item: [Word | str]): """ Checks if the word slot contains the item. @@ -34,6 +46,8 @@ def __contains__(self, item: [Word | str]): if isinstance(item, str): return item in [word.surface for word in self.words] + return False + def __len__(self): """ Returns the frequency of the word slot. @@ -104,7 +118,7 @@ def add_word(self, word: Word) -> None: """ for old_word in self.words: if old_word.surface == word.surface: - old_word.frequency += 1 + old_word.frequency += word.frequency return self.words.append(word) diff --git a/test.sh b/test.sh index 1465ee6..39d5410 100755 --- a/test.sh +++ b/test.sh @@ -9,14 +9,16 @@ open_report=false generate_report=false report="" save_report=false +exit_first="" -while getopts "r:c:hogs" opt; do +while getopts "r:c:hogsx" opt; do case $opt in h) echo "Usage: [-ogs] test.sh [-r ] [-c ]" echo "-o opens the coverage report in a browser after running tests. Requires -g" echo "-g generates a coverage report" echo "-s saves test coverage report to 'pytest-coverage.txt and pytest.xml" + echo "-x exits on first failure" exit 0 ;; r) @@ -34,6 +36,9 @@ while getopts "r:c:hogs" opt; do s) save_report=true ;; + x) + exit_first="-x" + ;; \?) echo "Invalid option: -$OPTARG" >&2 ;; @@ -55,7 +60,7 @@ exit_code=0 # runs coverage with a html report # fails if cov < required_coverage -result=$(pytest -n auto $report tests/) +result=$(pytest -n auto "$exit_first" $report tests/) exit_code=$? if [ "$save_report" = true ] ; then diff --git a/tests/exporters/test_json.py b/tests/exporters/test_json.py index 07b0fc9..46cc5fb 100644 --- a/tests/exporters/test_json.py +++ b/tests/exporters/test_json.py @@ -1,6 +1,6 @@ from jpfreq.exporters.json import JsonExporter from jpfreq.jp_frequency_list import JapaneseFrequencyList -from jpfreq.word import WordType +from json import dumps import pytest @@ -15,20 +15,101 @@ def freq_list(): return JapaneseFrequencyList() -# test_to_dict_data = [ -# ("", {}), -# ("あ", {"words": [{"surface": "あ", "types": [WordType.VERB], "frequency": 1}]}), -# ] -# -# -# @pytest.mark.parametrize("text, expected", test_to_dict_data) -# def test_to_dict(exporter, freq_list: JapaneseFrequencyList, text, expected): -# freq_list.process_text(text) -# assert exporter._create_export_dictionary(freq_list) == expected -# - -# -# @pytest.mark.parametrize("text, expected", test_export_data) -# def test_export(exporter, freq_list: JapaneseFrequencyList, text, expected): -# freq_list.process_text(text) -# assert exporter.export(freq_list).strip() == expected.strip() +test_to_dict_keys_data = [ + ("", ["text_info", "word_slots"], False, 100), + ("あ", ["text_info", "word_slots"], False, 100), + ("あ", ["text_info", "word_slots"], False, 1), + ("あ", ["text_info", "word_slots"], False, -1), + ("", ["text_info", "word_slots"], False, 100), + ("あ", ["text_info", "word_slots"], True, 100), + ("あ", ["text_info", "word_slots"], True, 1), + ("あ", ["text_info", "word_slots"], True, -1), +] + + +@pytest.mark.parametrize("text, expected_keys, combine, limit", test_to_dict_keys_data) +def test_to_dict_keys( + exporter, freq_list: JapaneseFrequencyList, text, expected_keys, combine, limit +): + freq_list.process_text(text) + result = exporter._create_export_dictionary(freq_list, combine=combine, limit=limit) + + for key in expected_keys: + assert key in result.keys() + + +test_to_dict_word_slots_count_data = [ + ("", 0, False, 100), + ("あ", 1, False, 100), + ("あ", 1, False, 1), + ("あ", 1, False, -1), + ("猫が好き", 2, False, 100), + ("猫が好き", 1, False, 1), + ("猫が好き", 2, False, -1), + ("猫が猫", 1, False, 100), + ("猫が猫", 1, False, 1), + ("猫が猫", 1, False, -1), + ("あ", 1, True, 100), + ("あ", 1, True, 1), + ("あ", 1, True, -1), + ("猫が好き", 2, True, 100), + ("猫が好き", 1, True, 1), + ("猫が好き", 2, True, -1), + ("猫が猫", 1, True, 100), + ("猫が猫", 1, True, 1), + ("猫が猫", 1, True, -1), +] + + +@pytest.mark.parametrize( + "text, expected_count, combine, limit", test_to_dict_word_slots_count_data +) +def test_to_dict_word_slots_count( + exporter, freq_list: JapaneseFrequencyList, text, expected_count, combine, limit +): + freq_list.process_text(text) + result = exporter._create_export_dictionary(freq_list, combine=combine, limit=limit) + + assert len(result["word_slots"]) == expected_count + + +test_export_data = [ + ("", False, 100), + ("あ", False, 100), + ("あ", False, 1), + ("あ", False, -1), + ("猫が好き", False, 100), + ("猫が好き", False, 1), + ("猫が好き", False, -1), + ("", True, 100), + ("あ", True, 100), + ("あ", True, 1), + ("あ", True, -1), + ("猫が好き", True, 100), + ("猫が好き", True, 1), + ("猫が好き", True, -1), +] + + +@pytest.mark.parametrize("text, combine, limit", test_export_data) +def test_export_string(exporter, freq_list, text, combine, limit): + freq_list.process_text(text) + result = exporter.export(freq_list, combine=combine, limit=limit) + + assert isinstance(result, str) + assert result == dumps( + exporter._create_export_dictionary(freq_list, combine=combine, limit=limit), + ensure_ascii=False, + indent=4, + ) + + +@pytest.mark.parametrize("text, combine, limit", test_export_data) +def test_export_dict(exporter, freq_list, text, combine, limit): + freq_list.process_text(text) + result = exporter.export(freq_list, combine=combine, limit=limit, as_dict=True) + + assert isinstance(result, dict) + assert result == exporter._create_export_dictionary( + freq_list, combine=combine, limit=limit + ) diff --git a/tests/test_json.py b/tests/test_json.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/test_word.py b/tests/test_word.py new file mode 100644 index 0000000..2d5f807 --- /dev/null +++ b/tests/test_word.py @@ -0,0 +1,28 @@ +from jpfreq.word import Word, WordType + +import pytest + +to_dict_data = [ + ( + Word("test", "test", [], 1), + {"representation": "test", "surface": "test", "types": [], "frequency": 1}, + ), + ( + Word("test", "test", [WordType.NOUN, WordType.GENERAL], 1), + { + "representation": "test", + "surface": "test", + "types": ["名詞", "一般"], + "frequency": 1, + }, + ), + ( + Word("test", "test", [WordType.VERB], 2), + {"representation": "test", "surface": "test", "types": ["動詞"], "frequency": 2}, + ), +] + + +@pytest.mark.parametrize("word, expected_dict", to_dict_data) +def test_to_dict(word, expected_dict): + assert word.to_dict() == expected_dict diff --git a/tests/test_wordslots.py b/tests/test_wordslots.py index 1e37c15..4e3a1c5 100644 --- a/tests/test_wordslots.py +++ b/tests/test_wordslots.py @@ -45,6 +45,26 @@ def test_wordslot_contains(): assert "test2" not in word_slot +def test_wordslot_contains_string(): + word_slot = WordSlot([Word("test", "test", [], 1)]) + + assert "test" in word_slot + assert "test2" not in word_slot + + +def test_wordslot_contains_string_surface(): + word_slot = WordSlot([Word("test", "test100", [], 1)]) + + assert "test100" in word_slot + assert "test" not in word_slot + + +def test_wordslot_contains_none(): + word_slot = WordSlot([Word("test", "test", [], 1)]) + + assert None not in word_slot + + def test_wordslot_len(): word_slot = WordSlot([Word("test", "test", [], 1)]) @@ -53,6 +73,14 @@ def test_wordslot_len(): assert word_slot.frequency == len(word_slot) +def wordslot_idfn(val): + if isinstance(val, WordSlot): + return f"WordSlot({val.words})" + if isinstance(val, bool): + return f"combined={val}" + return val + + wordslot_to_dict_data = [ ( WordSlot([Word("test", "test", [], 1)]), @@ -101,7 +129,7 @@ def test_wordslot_len(): WordSlot( [ Word("test", "test", [WordType.NOUN, WordType.GENERAL], 1), - Word("test2", "test", [WordType.NOUN, WordType.GENERAL], 1), + Word("test", "test2", [WordType.NOUN, WordType.GENERAL], 1), ] ), False, @@ -114,8 +142,8 @@ def test_wordslot_len(): "frequency": 1, }, { - "representation": "test2", - "surface": "test", + "representation": "test", + "surface": "test2", "types": ["名詞", "一般"], "frequency": 1, }, @@ -125,7 +153,7 @@ def test_wordslot_len(): ( WordSlot( [ - Word("test2", "test", [WordType.NOUN, WordType.GENERAL], 1), + Word("test", "test2", [WordType.NOUN, WordType.GENERAL], 1), Word("test", "test", [WordType.NOUN, WordType.GENERAL], 1), ] ), @@ -190,14 +218,6 @@ def test_wordslot_len(): ] -def wordslot_idfn(val): - if isinstance(val, WordSlot): - return f"WordSlot({val.words})" - if isinstance(val, bool): - return f"combined={val}" - return val - - @pytest.mark.parametrize( "word_slot,combined,expected", wordslot_to_dict_data, ids=wordslot_idfn ) @@ -210,3 +230,39 @@ def test_wordslot_to_dict_empty(): assert word_slot.to_dict() == {} assert word_slot.to_dict(combine=True) == {} + + +wordslot_add_word_data = [ + ([], [], 0, 0), + ([], [Word("test", "test", [], 1)], 1, 1), + ([Word("test", "test", [], 1)], [Word("test", "test", [], 2)], 3, 1), + ([Word("test", "test", [], 1)], [Word("test", "test", [], 1)], 2, 1), + ([Word("repr", "surface", [], 1)], [Word("test", "test", [], 1)], 2, 2), +] + + +def wordslot_add_word_idfn(val): + if isinstance(val, WordSlot): + return f"WordSlot({val.words})" + if isinstance(val, Word): + return f"Word({val.representation})" + return val + + +@pytest.mark.parametrize( + "starting_words, added_words, expected_frequency, expected_len", + wordslot_add_word_data, + ids=wordslot_add_word_idfn, +) +def test_wordslot_add_word( + starting_words, added_words, expected_frequency, expected_len +): + word_slot = WordSlot(starting_words) + + [word_slot.add_word(word) for word in added_words] + + assert word_slot.frequency == expected_frequency + + assert len(word_slot.words) == expected_len + + assert word_slot == WordSlot(starting_words + added_words)