-
Notifications
You must be signed in to change notification settings - Fork 0
/
source1.py
51 lines (37 loc) · 1.64 KB
/
source1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from fuzzywuzzy import process
from fuzzywuzzy import fuzz
import json
data = dict()
with open("DatasetGovProc.json", encoding='UTF-8') as file:
data = file.read()
data = json.loads(data)
def get_matched_procedure(procedure :str):
"""This function takes the name of the procedure as input and returns
the maximum matching string extracted from `data`.
`data` contains whole JSON file, saved globaly.
Args:
procedure (str): The name of the procedure
Returns:
_type_: `None` if the matching proportion is less than 30,
Otherwise, the `str` with the maximum matching ratio.
"""
if procedure == None: return None
# Stroring all the names of the procedures in a list
all_procedure_names = list()
for i in range(len(data['data'])):
proc_name = data["data"][i]["subThematics"][0]["govprocedure"][0]["title"]
all_procedure_names.append(proc_name)
# Finding the only one string with maximum matching ratio and
# its matching ratio all available strings
sentence, matching_ratio = process.extractOne(procedure, all_procedure_names, scorer=fuzz.token_sort_ratio)
if matching_ratio >= 90:
return sentence, matching_ratio
else:
return None
# print(get_matched_procedure("Medical certificate dr for license"))
# print(get_matched_procedure("passport"))
# str_list = ['Joe Biden', 'Joseph Biden', 'Joseph R Biden']
# match_ratios = process.extract('joe r biden', str_list, scorer=fuzz.token_sort_ratio)
# print(match_ratios)
# best_match = process.extractOne('', str_list, scorer=fuzz.token_sort_ratio)
# print(best_match)