-
Notifications
You must be signed in to change notification settings - Fork 0
/
ib.py
executable file
·128 lines (119 loc) · 3.6 KB
/
ib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import csv
import random
import string
import numpy as np
#build the people's scores
def indexbuilder(weight,movie_file,index_file):
#the path for the input file here
f=open(movie_file,'rb')
csv_file = csv.DictReader(f, delimiter=',',quotechar='"')
index = {}
for line in csv_file:
people = []
mark = float(line['score'])
directors_str = line['directors'][1:-1]
directors = directors_str.split(", ")
for person in directors:
people.append([person.strip()[1:-1],weight[0]])
writers_str = line['writers'][1:-1]
writers = set(writers_str.split(", "))
for person in writers:
people.append([person.strip()[1:-1],weight[1]])
actors_str = line['actors'][1:-1]
actors = actors_str.split(", ")
for i in range(len(actors)):
if (i<2):
people.append([actors[i].strip()[1:-1],weight[2]])
elif i<4:
people.append([actors[i].strip()[1:-1],weight[3]])
else:
people.append([actors[i].strip()[1:-1],weight[4]])
for person in people:
key = person[0]
if ('more credit' in key):
continue
if ('unknown' in key):
continue
if (not index.has_key(key)):
index.update({key:[mark,person[1]]})
else:
old_mark = index[key][0]
old_weight = index[key][1]
new_mark = (old_mark*old_weight+person[1]*mark)/(old_weight+person[1])
new_weight = old_weight+person[1]
index[key]=[new_mark,new_weight]
f.close()
return index
#Transform the movies' feature from people's name to the mean value of different kinds of people
#(directors,writers,main actors,secondary actors,the rest actors)
def vectorbuilder(test_file,ppl):
f=open(test_file,'rb')
csv_file = csv.DictReader(f, delimiter=',',quotechar='"')
movie={}
#generate movie's vector
for line in csv_file:
directors_str = line['directors'][1:-1]
directors=[]
for s in directors_str.split(", "):
if not ('more credit' in s[1:-1]):
if ppl.has_key(s[1:-1]):
directors.append( string.atof( ppl[s[1:-1]][0]) )#if the person in known, give its value from ppl
else:
directors.append(random.gauss(5.9,0.764323))# if the person is unknown, give it a random value generated by normal distribution
writers_str = line['writers'][1:-1]
writers=[]
for s in writers_str.split(", "):
if not ('more credit' in s[1:-1]):
if ppl.has_key(s[1:-1]):
writers.append( string.atof(ppl[s[1:-1]][0]) )
else:
writers.append(random.gauss(5.9,0.764323))
actors_str = line['actors'][1:-1]
actors=[]
for s in actors_str.split(", "):
if not ('more credit' in s[1:-1]):
if ppl.has_key(s[1:-1]):
actors.append(string.atof(ppl[s[1:-1]][0]))
else:
actors.append(random.gauss(5.9,0.764323))
director=np.mean(np.array(directors))
writer=np.mean(np.array(writers))
actor12=0
actor34=0
n12=0
n34=0
ao=0
nao=0
for i in range(len(actors)):
if actors[i]!=0:
if (i<2):
actor12+=actors[i]
n12+=1
elif i<4:
actor34+=actors[i]
n34+=1
else:
ao+=actors[i]
nao+=1
if n12!=0:
actor12=actor12/n12
if n34!=0:
actor34=actor34/n34
if nao!=0:
ao=ao/nao
movie.update( {line['imdb_id']:[director,writer,actor12,actor34,ao,string.atof(line['score'])]} )
# print movie
# write_movie(movie)
return movie
def write_index(index,index_file="index.csv"):
f = open(index_file,'w')
f.write('Name,Score,Weight\n')
for key in index.keys():
f.write(key+','+str(index[key][0])+','+str(index[key][1])+'\n')
f.close()
#def write_movie(movie,movie_file="movie.csv"):
# f = open(movie_file,'w')
# f.write('id,director,writer,actor\n')
# for key in movie.keys():
# f.write(key+','+str(movie[key][0])+','+str(movie[key][1])+','+str(movie[key][2])+'\n')
# f.close()