-
Notifications
You must be signed in to change notification settings - Fork 0
/
ptree_parsing.py
96 lines (71 loc) · 3.08 KB
/
ptree_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import re
from itertools import zip_longest
import os
# helper method
def grouper(iterable, n, fillvalue=None):
args = [iter(iterable)] * n
return zip_longest(*args, fillvalue=fillvalue)
# Classes that present nodes of the tree
class Node:
pass
class Leaf(Node):
def __init__(self, n_pos, n_neg):
self.n_pos:float = n_pos
self.n_neg:float = n_neg
def to_rule(self, head="positive", body=[]):
return head + " :- " + ", ".join(body) + ".\n"
def __str__(self):
return str({'+':self.n_pos, '-': self.n_neg})
def __repr__(self):
return str(self)
class InternalNode(Node):
def __init__(self, condition, child_yes, child_no):
self.condition:str=condition
self.child_yes: Node = child_yes
self.child_no: Node = child_no
def to_rule(self, head="positive", body=[]):
return self.child_yes.to_rule(head, body + [self.condition]) + self.child_no.to_rule(head, body + ["\\+" + self.condition])
def __str__(self):
return str({'condition':self.condition, 'yes': self.child_yes, 'no': self.child_no})
def __repr__(self):
return str(self)
# Parser class
class PTreeParser:
def __init__(self, pos:str, neg:str):
self.pos = pos
self.neg = neg
def parse(self, text:str)->Node:
if os.path.isfile(text):
with open(text) as f:
text = f.read()
lines = text.split('\n')
lines = list(map(lambda line: line.strip(), lines))
return self._parse(lines)
def _parse(self, lines)->Node:
if len(lines)==0:
return None
elif lines[0].endswith('?'):
return self._parse_internal_node(lines)
elif lines[0].startswith('['):
return self._parse_leaf(lines)
else:
return self._parse(lines[1:])
def _parse_leaf(self, lines)->Leaf:
leaf_regex = r"\[[^\[]*\[\[([\w_]*):([\d\.]*)(?:,([\w_]*):([\d\.]*))*\]\]"
name_num_list = re.match(leaf_regex, lines[0]).groups()
name_num_dict = dict(grouper(name_num_list,2))
name_num_dict = {name: float(num) for name,num in name_num_dict.items()}
return Leaf(name_num_dict[self.pos], name_num_dict[self.neg])
def _parse_internal_node(self, lines)->InternalNode:
index_no = [n for n, l in enumerate(lines) if l.startswith('+--no:')][0] # find the first line that starts with '+--no'
condition_line = lines[0]
yes_lines = lines[1:index_no]
no_lines = lines[index_no:]
condition = condition_line.strip().strip('?').strip()
yes_lines[0] = yes_lines[0][len('+--yes:'):]
yes_lines = list(map(lambda line: line.strip().strip('|').strip(), yes_lines))
no_lines[0] = no_lines[0][len('+--no:'):]
no_lines = list(map(lambda line: line.strip(), no_lines))
return InternalNode(condition, self._parse(yes_lines), self._parse(no_lines))
# p = PTreeParser("la", "un").parse("./mutagenesis_mutagenic_yes_probabilisticgap_0.ptree")
# print(p.to_rule(head="t(_)::get(_)"))