-
Notifications
You must be signed in to change notification settings - Fork 1
/
mf.py
109 lines (62 loc) · 2.4 KB
/
mf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd
from pymongo import MongoClient
import numpy as np
import warnings
from sklearn.decomposition import TruncatedSVD
def mf(username):
client = MongoClient()
person_username = username
# point the client at mongo URI
client = MongoClient('mongodb://db_username:[email protected]:27017,moviers-shard-00-01-3hnlg.mongodb.net:27017,moviers-shard-00-02-3hnlg.mongodb.net:27017/moviers?ssl=true&replicaSet=moviers-shard-0&authSource=admin&retryWrites=true')
# select database
db = client['moviers']
global person
# select the collection within the database
ratings_mongodb = db.user_ratings_4570_new
# convert entire collection to Pandas dataframe
ratings = pd.DataFrame(list(ratings_mongodb.find()))
ratings_only = ratings.drop(['_id', 'userId'], axis = 1)
a = list(ratings.T.loc["userId"])
for i in range(len(a)):
if a[i] == person_username:
person = i
ratings_only = ratings_only.apply(pd.to_numeric)
ratings_only = ratings_only.replace(np.nan, 0)
ratings_only_t = ratings_only.T
list_of_all_movies = list(ratings_only)
user_rated_movies = []
for movie in ratings_only_t.T:
if ratings_only.iloc[person][movie] != 0.0 and str(movie) in list_of_all_movies:
user_rated_movies.append(str(movie))
X = ratings_only.values.T
SVD = TruncatedSVD(n_components=12, random_state=17)
matrix = SVD.fit_transform(X)
matrix.shape
warnings.filterwarnings("ignore",category =RuntimeWarning)
corr = np.corrcoef(matrix)
corr.shape
movie_title = ratings_only.index
movie_title_list = list(ratings_only)
fact = []
for i in user_rated_movies:
for j in movie_title_list:
if i == j:
fact.append(movie_title_list.index(i))
corr_fact = corr[fact]
final_predict = []
for i in range(len(corr_fact)):
for j in range(len(corr_fact[i])):
if corr_fact[i][j] >= 0.9:
final_predict.append([i, j, corr_fact[i][j]])
final_predict.sort(key=lambda x: x[2], reverse=True)
movie_values = {}
j = -1
for i in ratings_only:
j += 1
movie_values.update({j: i})
final_list = []
for i in final_predict:
if i[2] != 1:
final_list.append(movie_values[i[1]])
mf = final_list[:10]
return mf