-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluation.py
178 lines (137 loc) · 7.77 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
#Use your sentiment classifier to evaluate the reviews in the friendships.reviews.txt file, which can be
#found in the Moodle resource folder. (Not all people in this file have given reviews!). Record the scores;
#It may be helpful to treat the scores of 1-2 as negative or 4-5 as positive! It may (or may not) be helpful
#to remember that there is one community where scores are particularly positive, because Amazon has
#run a very convincing mass-advertising campaign in that particular community.
from loguru import logger
from communities import create_communities
from sentiment import naive_bayes
from sklearn.metrics import precision_recall_fscore_support
score_classes = {
'0': None,
'1': 0,
'2': 0,
'3': None,
'4': 1,
'5': 1,
}
number_of_communities = 4
class User:
def __init__(self, name, friends, review):
self.name = name
self.friends = friends
self.review = review
self.score = 0
class Result:
def __init__(self, name, cluster, purchase, score):
self.name = name
self.cluster = int(cluster)
self.would_purchase = purchase
self.score = score
def load_data(filename):
users_with_reviews = []
users_without_reviews = []
with open(filename, "r") as file:
for line in file.readlines():
split = [x.strip() for x in line.split(':')]
if split[0] == 'user':
name = split[1]
elif split[0] == 'friends':
friends = split[1].split('\t')
elif split[0] == 'review':
review = split[1]
if review == '*':
users_without_reviews.append(User(name,friends,review))
else:
users_with_reviews.append(User(name,friends,review))
return users_without_reviews, users_with_reviews
def load_results(filename):
results = []
with open(filename, "r") as file:
for line in file.readlines():
split = [x.strip() for x in line.split(':')]
if split[0] == 'user':
name = split[1]
if split[0] == 'cluster':
cluster = split[1]
if split[0] == 'score':
score = score_classes[split[1]]
elif split[0] == 'purchase':
purchase = 1 if split[1] == 'yes' else 0
results.append(Result(name,cluster,purchase,score))
return results
def will_they_buy(community, users_without_reviews, users_with_reviews):
model = naive_bayes()
for user in users_with_reviews:
user.score = model.predict(user.review)
logger.info('Computed predictions')
for user in users_without_reviews:
user.score = infer_score_from_friends(user, users_with_reviews, community)
logger.info('Infered predictions')
return users_without_reviews, users_with_reviews
def infer_score_from_friends(user, users_with_reviews, community):
sum_score = 0
# All the user's friends that have given a review.
friends_with_reviews = [friend for friend in users_with_reviews if friend.name in user.friends]
num_friends_with_reviews = len(friends_with_reviews)
# Determines the community of the user
user_community = [com for com, names in community.items() if user.name in names]
for friend in friends_with_reviews:
friend_score = friend.score
# If the friend is from another community the score counts 10 times as much
# If the friend is from Kyle the score counts 10 times as much
friend_community = [com for com, names in community.items() if friend.name in names]
if(friend.name == 'kyle' or user_community != friend_community):
num_friends_with_reviews += 10
friend_score *= 10
sum_score += friend_score
# Score is the average score of all friends scores
score = sum_score if num_friends_with_reviews == 0 else round(sum_score / num_friends_with_reviews)
return score
def compute_measures(actual, users_without_reviews, users_with_reviews, community, scipy_community):
'''
To compute accuracy measures we still differentiate between users who
gave a review a review and those who didn't. For those who did we compare
on the score given. For those who did not we compare on wheter they
purchased the product or not.
'''
# Sort the lists according to the name of the user, s.t. all lists
# are in the same order.
actual.sort(key=lambda x: x.name)
users_with_reviews.sort(key=lambda x: x.name)
users_without_reviews.sort(key=lambda x: x.name)
compute_review_scores(actual, users_with_reviews)
compute_review_purchaces(actual, users_without_reviews)
compute_cluster_accuracy(actual, community, scipy_community)
def compute_review_scores(actual, users_with_reviews):
results = [act for act in actual if act.score != None]
user_scores = [user.score for user in users_with_reviews if user.name in [act.name for act in results]]
results = [act.score for act in results]
accuracy = len([1 for pred, res in zip(user_scores, results) if pred == res]) / len(results)
precision, recall, fscore, support = precision_recall_fscore_support(results, user_scores)
write_results_to_file("SCORE ACCURACY", "Accuracy: " + str(accuracy), "Precision: " + str(precision), "Recall: " + str(recall), "Fscore: " + str(fscore), "Support: " + str(support))
def compute_review_purchaces(actual, users_without_reviews):
results = [act.would_purchase for act in actual if act.name in [user.name for user in users_without_reviews]]
user_scores = [user.score for user in users_without_reviews]
accuracy = len([1 for pred, res in zip(user_scores, results) if pred == res]) / len(results)
precision, recall, fscore, support = precision_recall_fscore_support(results, user_scores)
write_results_to_file("PURCHASE ACCURACY", "Accuracy: " + str(accuracy), "Precision: " + str(precision), "Recall: " + str(recall), "Fscore: " + str(fscore), "Support: " + str(support))
def compute_cluster_accuracy(actual, community, scipy_community):
my_communities = f"My communities 1: {len(community[0])}, 2: {len(community[1])}, 3: {len(community[2])}, 4: {len(community[3])}, 5: {len(community[4])}, 6: {len(community[5])}, 7: {len(community[6])}, 8: {len(community[7])}, 9: {len(community[8])}, 10: {len(community[8])}"
scipy_communities = f"My communities 1: {len(scipy_community[0])}, 2: {len(scipy_community[1])}, 3: {len(scipy_community[2])}, 4: {len(scipy_community[3])}, 5: {len(scipy_community[4])}, 6: {len(scipy_community[5])}, 7: {len(scipy_community[6])}, 8: {len(scipy_community[7])}, 9: {len(scipy_community[8])}, 10: {len(scipy_community[9])}"
actual_communities = f"Actual community 1: {sum(map(lambda x : x.cluster == 1, actual))}, 2: {sum(map(lambda x : x.cluster == 2, actual))}, 3: {sum(map(lambda x : x.cluster == 3, actual))}, 4: {sum(map(lambda x : x.cluster == 4, actual))} "
write_results_to_file("COMMUNITY ACCURACY", my_communities, scipy_communities, actual_communities)
def write_results_to_file(*args):
with open('results2.txt', "a") as file:
for arg in args:
file.write(arg + '\n')
file.write('\n')
if __name__ == "__main__":
users_without_reviews, users_with_reviews = load_data('data/friendships.reviews.txt')
actual = load_results('data/friendships.reviews.results.txt')
logger.info('Loaded friendships data')
community = create_communities(number_of_communities, type='spectral')
community_scipy = create_communities(number_of_communities, type='scipy')
logger.info('Created communities')
users_without_reviews, users_with_reviews = will_they_buy(community,users_without_reviews, users_with_reviews)
compute_measures(actual, users_without_reviews, users_with_reviews, community, community_scipy)