-
Notifications
You must be signed in to change notification settings - Fork 0
/
testBookAvg.py
74 lines (50 loc) · 2.13 KB
/
testBookAvg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd
import numpy as np
from surprise import NormalPredictor
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise import KNNBasic , KNNWithMeans , KNNWithZScore , KNNBaseline
from surprise import accuracy
from surprise.model_selection import train_test_split
import scipy.sparse
from random import randint
print("Should work now")
sparseMatrix = scipy.sparse.load_npz('Data/simCosine.npz')
similarity = sparseMatrix.todense()
userDict = np.load('Data/userDict.npy').item()
b_cols = ['book_id' , 'books_count' , 'isbn' , 'isbn13' , 'authors' , 'original_publication_year' , 'original_title' , 'average_rating' , 'ratings_count' , 'image_url']
books = pd.read_csv('Data/books.csv' , sep=',' , usecols=b_cols , encoding='latin-1' , low_memory = False)
tr_cols = ['user_id' , 'book_id']
to_read = pd.read_csv('Data/to_read.csv' , sep=',' , usecols=tr_cols , encoding='latin-1' , low_memory = False)
br_cols = ['book_id' , 'user_id' , 'rating']
bookRatings = pd.read_csv('Data/ratings.csv' , sep=',' , names = br_cols , encoding='latin-1' , low_memory=False , skiprows=[0])
bookRatings = bookRatings[['user_id' , 'book_id' , 'rating']]
bookRatings = bookRatings.drop_duplicates(['user_id' , 'book_id'] , 'first')
bookRatings.groupby('user_id').filter(lambda x: len(x) >= 4)
print(to_read.head())
print(bookRatings.shape)
List=[]
oldID = -1
rowlist=[]
cnt = 0
print("Saving dataframe bro pls")
for row in to_read.iterrows():
dict1 = dict()
user_id = row[1][0]
book_id = row[1][1]
if book_id <10000:
userCount = books.loc[book_id]['ratings_count']
if userCount >= 100000:
calcRating = books.loc[book_id]['average_rating']
print(calcRating)
dict1.update({'user_id':user_id , 'book_id':book_id , 'rating':calcRating})
#print(dict1)
rowlist.append(dict1)
#print(rowlist
#print(rowlist)
print("Saving now")
df = pd.DataFrame(rowlist)
bookRatings = bookRatings.append(df , ignore_index=True)
print(bookRatings.shape)
bookRatings.to_csv('Data/implicitRatingsCosine.csv' , sep=',')