Skip to content

Commit

Permalink
Code improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
richardpaulhudson committed Jul 2, 2020
1 parent 8a867aa commit ba17bd2
Show file tree
Hide file tree
Showing 31 changed files with 5,174 additions and 4,173 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1776,8 +1776,8 @@ cv_threshold -- the minimum coefficient of variation with which a word or relati
mlp_* -- see https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html.
oneshot -- whether the same word or relationship matched multiple times within a single
document should be counted once only (value 'True') or multiple times (value 'False')
overlap_memory_size -- how many non-word phraselet matches to the left should be
checked for words in common with a current match.
overlap_memory_size -- No longer has any effect - the value defined in __init__()
is used instead. Retained for backwards compatibility.
hidden_layer_sizes -- a list where each entry is the size of a hidden layer, or 'None'
if the topology should be determined automatically.
```
Expand Down
6 changes: 3 additions & 3 deletions holmes_extractor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
logging.getLogger("rdflib").setLevel(logging.WARNING) # avoid INFO console message on startup
from holmes_extractor.manager import Manager as Manager
from holmes_extractor.manager import MultiprocessingManager as MultiprocessingManager
from holmes_extractor.ontology import Ontology as Ontology
from holmes_extractor.manager import Manager
from holmes_extractor.manager import MultiprocessingManager
from holmes_extractor.ontology import Ontology
96 changes: 56 additions & 40 deletions holmes_extractor/consoles.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@ def _match_description(self, match_dict):
if match_dict['negated']:
match_description_to_return = '; negated'
if match_dict['uncertain']:
match_description_to_return = ''.join((match_description_to_return, '; uncertain'))
match_description_to_return = ''.join((
match_description_to_return, '; uncertain'))
if match_dict['involves_coreference']:
match_description_to_return = ''.join((match_description_to_return,
'; involves coreference'))
match_description_to_return = ''.join((
match_description_to_return, '; involves coreference'))
overall_similarity_measure = float(match_dict['overall_similarity_measure'])
if overall_similarity_measure < 1.0:
match_description_to_return = ''.join((match_description_to_return,
'; overall similarity measure=', str(overall_similarity_measure)))
match_description_to_return = ''.join((
match_description_to_return, '; overall similarity measure=',
str(overall_similarity_measure)))
return match_description_to_return

def _string_representation_of_word_match(self, word_match):
Expand All @@ -30,8 +32,9 @@ def _string_representation_of_word_match(self, word_match):
extracted_word = ''.join(("(refers to '", word_match['extracted_word'], "')"))
else:
extracted_word = ''
string = ''.join(("'", word_match['document_phrase'], "'", extracted_word, "->'",
word_match['search_phrase_word'], "' (", word_match['match_type']))
string = ''.join((
"'", word_match['document_phrase'], "'", extracted_word, "->'",
word_match['search_phrase_word'], "' (", word_match['match_type']))
if float(word_match['similarity_measure']) < 1.0:
string = ''.join((string, ': ', word_match['similarity_measure']))
string = ''.join((string, ")"))
Expand All @@ -42,7 +45,7 @@ def _common(self):
print("Holmes version 2.2 written by [email protected]")
print("Language is", self._semantic_analyzer.language_name)
print("Model is", self._semantic_analyzer.model)
if self._structural_matcher.ontology == None:
if self._structural_matcher.ontology is None:
print("No ontology is being used")
else:
print("Ontology is", self._structural_matcher.ontology.path)
Expand All @@ -58,7 +61,8 @@ def _common(self):
print("Derivational morphology analysis is ON")
else:
print("Derivational morphology analysis is OFF")
print("Overall similarity threshold is", str(
print(
"Overall similarity threshold is", str(
self._structural_matcher.overall_similarity_threshold))
if self._structural_matcher.overall_similarity_threshold < 1.0:
if self._structural_matcher.embedding_based_matching_on_root_words:
Expand All @@ -81,11 +85,11 @@ def start_chatbot_mode(self):
print(''.join(("Search phrase '", search_phrase.doc.text, "'")))
# only has an effect when debug==True
self._semantic_analyzer.debug_structures(search_phrase.doc)
if self._structural_matcher.ontology != None:
if self._structural_matcher.ontology is not None:
for token in search_phrase.matchable_tokens:
lemma = token._.holmes.lemma
matching_terms = self._structural_matcher.ontology.get_words_matching(
lemma)
lemma)
if len(matching_terms) > 0:
print(lemma, 'also matches', matching_terms)
print()
Expand All @@ -101,11 +105,12 @@ def start_chatbot_mode(self):
match_dicts = self._holmes.match_search_phrases_against(entry=search_sentence)
for match_dict in match_dicts:
print()
print(''.join(("Matched search phrase '",
match_dict['search_phrase'], "'", self._match_description(match_dict),
":")))
word_matches_string = '; '.join(
map(self._string_representation_of_word_match, match_dict['word_matches']))
print(''.join((
"Matched search phrase '",
match_dict['search_phrase'], "'", self._match_description(match_dict),
":")))
word_matches_string = '; '.join(map(
self._string_representation_of_word_match, match_dict['word_matches']))
print(word_matches_string)

def start_structural_search_mode(self):
Expand All @@ -131,40 +136,52 @@ def start_structural_search_mode(self):
if search_phrase in ('exit', 'exit()', 'bye'):
break
print()
match_dicts=[]
match_dicts = []
try:
match_dicts = self._holmes.match_documents_against(search_phrase_text=search_phrase)
if len(match_dicts) == 0:
print('No structural matching results were returned.')
else:
print('Structural matching results:')
except SearchPhraseContainsNegationError:
print('Structural matching was not attempted because the search phrase contained negation (not, never).')
print(
'Structural matching was not attempted because the search phrase contained '\
'negation (not, never).')
print()
except SearchPhraseContainsConjunctionError:
print('Structural matching was not attempted because the search phrase contained conjunction (and, or).')
print(
'Structural matching was not attempted because the search phrase contained '\
'conjunction (and, or).')
print()
except SearchPhraseContainsCoreferringPronounError:
print('Structural matching was not attempted because the search phrase contained a pronoun that referred back to a noun.')
print(
'Structural matching was not attempted because the search phrase contained a '\
'pronoun that referred back to a noun.')
print()
except SearchPhraseWithoutMatchableWordsError:
print('Structural matching was not attempted because the search phrase did not contain any words that could be matched.')
print(
'Structural matching was not attempted because the search phrase did not '\
' contain any words that could be matched.')
print()
except SearchPhraseContainsMultipleClausesError:
print('Structural matching was not attempted because the search phrase contained multiple clauses.')
print(
'Structural matching was not attempted because the search phrase contained '\
'multiple clauses.')
print()
print()
for match_dict in match_dicts:
print()
print(''.join(("Matched document '", match_dict['document'],
"' at index ", str(match_dict['index_within_document']),
self._match_description(match_dict), ":")))
print(''.join((
"Matched document '", match_dict['document'],
"' at index ", str(match_dict['index_within_document']),
self._match_description(match_dict), ":")))
print(''.join(('"', match_dict['sentences_within_document'], '"')))
word_matches_string = '; '.join(map(self._string_representation_of_word_match,
match_dict['word_matches']))
word_matches_string = '; '.join(
map(self._string_representation_of_word_match, match_dict['word_matches']))
print(word_matches_string)

def start_topic_matching_search_mode(self, only_one_result_per_document,
def start_topic_matching_search_mode(
self, only_one_result_per_document,
maximum_number_of_single_word_matches_for_relation_matching,
maximum_number_of_single_word_matches_for_embedding_matching):
"""Starts a topic matching search mode console enabling the matching of pre-registered
Expand Down Expand Up @@ -195,21 +212,20 @@ def start_topic_matching_search_mode(self, only_one_result_per_document,
break
print()
print('Performing topic matching ...')
topic_matches = {}
try:
print()
topic_match_dicts = \
self._holmes.topic_match_documents_returning_dictionaries_against(
search_text,
number_of_results = 5,
only_one_result_per_document=only_one_result_per_document,
maximum_number_of_single_word_matches_for_relation_matching =
maximum_number_of_single_word_matches_for_relation_matching,
maximum_number_of_single_word_matches_for_embedding_matching =
maximum_number_of_single_word_matches_for_embedding_matching)
search_text,
number_of_results=5,
only_one_result_per_document=only_one_result_per_document,
maximum_number_of_single_word_matches_for_relation_matching=
maximum_number_of_single_word_matches_for_relation_matching,
maximum_number_of_single_word_matches_for_embedding_matching=
maximum_number_of_single_word_matches_for_embedding_matching)
except NoSearchPhraseError:
pass
if topic_match_dicts == None or len(topic_match_dicts) == 0:
if topic_match_dicts is None or len(topic_match_dicts) == 0:
print('No topic match results were returned.')
print()
continue
Expand All @@ -218,7 +234,7 @@ def start_topic_matching_search_mode(self, only_one_result_per_document,
else:
print('Topic matching results:')
print()
for index, topic_match_dict in enumerate(topic_match_dicts):
for topic_match_dict in topic_match_dicts:
output = ''.join((
topic_match_dict['rank'],
'. Document ',
Expand All @@ -231,8 +247,8 @@ def start_topic_matching_search_mode(self, only_one_result_per_document,
str(topic_match_dict['score']),
':'
))
print (output)
print(output)
print()
print (topic_match_dict['text'])
print(topic_match_dict['text'])
print()
print()
6 changes: 3 additions & 3 deletions holmes_extractor/examples/example_chatbot_DE_insurance.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import holmes_extractor as holmes
import os
import holmes_extractor as holmes

script_directory = os.path.dirname(os.path.realpath(__file__))
ontology = holmes.Ontology(os.sep.join((script_directory,
'example_chatbot_DE_insurance_ontology.owl')))
ontology = holmes.Ontology(os.sep.join((
script_directory, 'example_chatbot_DE_insurance_ontology.owl')))
holmes_manager = holmes.Manager(model='de_core_news_md', ontology=ontology)
holmes_manager.register_search_phrase('Jemand benötigt eine Versicherung')
holmes_manager.register_search_phrase('Ein ENTITYPER schließt eine Versicherung ab')
Expand Down
8 changes: 4 additions & 4 deletions holmes_extractor/examples/example_chatbot_EN_insurance.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import holmes_extractor as holmes
import os
import holmes_extractor as holmes

script_directory = os.path.dirname(os.path.realpath(__file__))
ontology = holmes.Ontology(os.sep.join((
script_directory,'example_chatbot_EN_insurance_ontology.owl')))
holmes_manager = holmes.Manager(model='en_core_web_lg', ontology=ontology,
perform_coreference_resolution = True)
script_directory, 'example_chatbot_EN_insurance_ontology.owl')))
holmes_manager = holmes.Manager(
model='en_core_web_lg', ontology=ontology, perform_coreference_resolution=True)
holmes_manager.register_search_phrase('Somebody requires insurance')
holmes_manager.register_search_phrase('An ENTITYPERSON takes out insurance')
holmes_manager.register_search_phrase('A company buys payment insurance')
Expand Down
Loading

0 comments on commit ba17bd2

Please sign in to comment.