.Rhistory

#in sparse format
r <- as(m, "realRatingMatrix")
r
as(r, "list")
as(r, "data.frame")
#Normalize rating matrix entries
#e.g. remove rating bias by subtracting the row mean fromm
#all ratings in the row.
r_m <- normalize(r)
#Visually inspect small portions of rating matrix
image(r, main = "Raw_Ratings")
image(r_m, main = "Normalized_Ratings")
#Which ratings are above a certain threshold
r_b <- binarize(r, minRating = 4)
as(r_b, "matrix")
#####################
#Data Understanind and Preparation
#We will use 100k ratings from 943 users on 1664 movies
#collected from sept 1997 - april 1998 through the
#movie lens website
#Load the data
data("MovieLense")
MovieLense
#Visualize a sample of this
image(MovieLense, main = "Raw_ratings")
summary(getRatings(MovieLense))
#It appears some movies were not rated at all by first few users
#Visualizing ratings
qplot(getRatings(MovieLense), binwidth=1,
main="Histogram_of_ratings", xlab="Rating")
#The rating plot looks skewed to the right
#Normalization
qplot(getRatings(normalize(MovieLense, method = "Z-score")),
main="Histogram_of_normalized_raings", xlab="Rating")
summary(getRatings(normalize(MovieLense, method="Z-score")))
#Seems better, normalization will take care of user bias
#How many movies did people rate on average?
qplot(rowCounts(MovieLense), binwidth=10,
main="Movies_rated_on_average",
xlab = "#_of_users",
ylab = "#_of_movies_rated")
#Some people get tired of rating movies at a logarithmic pace.
#What is the mean rating of each movie?
qplot(colMeans(MovieLense), binwidth=.1,
main = "Mean_rating_of_movies",
xlab = "Rating",
ylab = "#_of_movies")
#The big spike on 1 suggest that this could be interpreted as binary.
#In other words, some people don't want to see certain movies at all
#Same on 5 and 3
##################
#Modeling
recommenderRegistry$get_entries(dataType = "realRatingMatrix")
scheme <- evaluationScheme(MovieLense, method="split", train=.9,
k=1, given=10, goodRating=4)
scheme
algorithms <- list(
"random_items" = list(name="RANDOM", param=list(normalize="Z-score")),
"popular_items" = list(name="POPULAR", param=list(normalize="Z-score")),
"user_based_CF" = list(name="UBCF", param=list(normalize="Z-score",
method="Cosine",
nn=50, minRating=3)),
"item_based_CF" = list(name="IBCF", param=list(normalize="Z-score"))
)
results <- evaluate(scheme, algorithms, n=c(1,3,5,10,15,20))
plot(results, annotat 1:4, legend="topleft")
plot(results, annotate=1:4, legend="topleft")
plot(results, "prec/rec", annotate=3)
scheme2 <- evaluationScheme(MovieLense, method="cross", k=4, given=3,
goodRating=5)
scheme2
results2  <- evaluate(scheme2, method="POPULAR", n=c(1,3,5,10,15,20))
results2
getConfusionMatrix(results2)[[1]]
avg(getConfusionMatrix(results2)[[1]])
library(rstudioapi)
crime.data <- read.csv("crime.data.csv")
memory.limit()
memory.limit(size=16000)
m
mean_r = function(x) {
m = 0
n = length(x)
for(i in seq_len(n))
m = m + x[i]/n
m
}
install.packages("compiler")
library(compiler)
cmp_mean_r = cmpfun(mean_r)
x = rnorm(1000)
install.packages("microbenchmark")
library(microbenchmark)
microbenchmark(times=10, unit="ms",
mean_r(x), cmp_mean_r(x), mean(x))
install.packages("bigmemory")
library(bigmemory)
install.packages("sqldf")
library(sqldf)
write.csv(iris, "iris.csv", quote=FALSE, row.names = FALSE)
iris2 <- read.csv.sql("iris.csv",
sql="selec_*_from_file_where_Species_=_'setosa'_")
iris2 <- read.csv.sql("iris.csv",
sql="select_*_from_file_where_Species_=_'setosa'_")
iris2 <- read.csv.sql("iris.csv",
sql="select * from file where Species = 'setosa' ")
library(compiler)
install.packages("hflights")
library(hflights)
write.csv(hflights, 'hflights.csv', row.names = FALSE)
DecJFKflights <- read.csv.sql('hflights.csv',
sql="select * from file where Dest='\"JFK\"' and Month = 12)
DecJFKflights <- read.csv.sql('hflights.csv',
sql="select * from file where Dest='\"JFK\"' and Month = 12')
DecJFKflights <- read.csv.sql('hflights.csv',
sql="select * from file where Dest='\"JFK\"' and Month = 12")
View(DecJFKflights)
memory.limit(size=24000)
memory.limit(size=16000)
library(compiler)
install.packages("RMySQL")
library(RMySQL)
conn <- dbConnect(MySQL(), dbname="cities", username="root",
password="")
library(RMySQL)
conn <- dbConnect(MySQL(), dbname="cities", username="root",
password="")
dbListTables(conn)
dbListFields(conn, "villes_france_free")
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free'")
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free")
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free'")
conn <- dbConnect(MySQL(), dbname="cities", username="root",
password="")
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free'")
cities <- dbGetQuery(conn, "selet * from 'villes_france_free'")
cities <- dbGetQuery(conn, "select * from 'villes_france_free'")
'
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free'")
cities <- dbGetQuery(conn, "SELECT * FROM 'villes_france_free'")
cities <- dbGetQuery(conn, "SELECT * FROM villes_france_free")
View(cities)
summary(cities)
head(cities)
install.packages('devtools')
library(devtools)
Sys.setenv(TENSORFLOW_PYTHON="C:/Users/schwi/Anaconda3/envs/tensorflow")
library(tensorflow)
install.packages('reticulate')
library(reticulate)
use_condaenv(tensorflow, conda = "auto", required = FALSE)
use_condaenv('tensorflow', conda = "auto", required = FALSE)
library(tensorflow)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
devtools::install_github("rstudio/tensorflow")
library(reticulate)
library(devtools)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
devtools::install_github("rstudio/tensorflow")
install.packages('RTools')
R.Version()
install.packages('devtools')
library(reticulate)
library(devtools)
dev_mode(on=T)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
dev_mode(on=F)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
sess$run(hello)
library("RTools")
install.packages('RTools')
# installing/loading the package:
if(!require(installr)) { install.packages("installr"); require(installr)} #load / install+load installr
updateR(F, T, T, F, T, F, T) # install, move, update.package, quit R.
library(rstudioapi)
install.packages('rstudioapi')
library(rstudioapi)
install.packages('devtools')
install.packages('reticulate')
library(reticulate)
library(devtools)
dev_mode(on=T)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
dev_mode(on=F)
library(tensorflow)
library(tensorflow)
dev_mode(on=T)
devtools::install_github("rstudio/tensorflow", force=TRUE)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
library(tensorflowR)
dev_mode(on=F)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
dev_mode(on=T)
devtools::install_github("rstudio/tensorflow", force=TRUE)
library(tensorflow)
install.packages('reticulate')
install.packages("reticulate")
library(reticulate)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
dev_mode(on=F)
dev_mode(on=F)
library(devtools)
dev_mode(on=F)
library(reticulate)
devtools::install_github("rstudio/tensorflow", force=TRUE)
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
sess$run(hello)
remove.packages(tensorflow)
remove.packages('tensorflow')
library(tensorflow)
sess = tf$Session()
library(tensorflow)
library(tensorflow)
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
sess$run(hello)
library(tensorflow)
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
devtools::install_github("rstudio/tensorflow")
devtools::install_github("rstudio/tensorflow")
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
install.packages('reticulate')
install.packages('reticulate')
install.packages("reticulate")
library(reticulate)
sess = tf$Session()
library(tensorflow)
sess = tf$Session()
install.packages("reticulate")
library(reticulate)
sess = tf$Session()
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
install.packages('reticulate')
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
library(reticulate)
sess = tf$Session()
library(tensorflow)
hello <- tf$constant('Hello, TensorFlow!')
sess = tf$Session()
library(reticulate)
library(tensorflow)
sess = tf$Session()
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
install.packages('devtools')
install.packages("devtools")
install.packages('reticulate')
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
install.packages('reticulate')
library(reticulate)
install.packages("reticulate")
install.packages('reticulate')
install.packages('reticulate')
library(reticulate)
library(devtools)
devtools::install_github("rstudio/tensorflow")
use_condaenv('tensorflow', conda = "auto", required = TRUE)
library(tensorflow)
sess = tf$Session()
hello <- tf$constant('Hello, TensorFlow!')
library(rstudioapi)
#Set working directory
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#Load our datasets
features    <- read.csv('dengue_features_train.csv')
labels      <- read.csv('dengue_labels_train.csv')
submission  <- read.csv('dengue_features_test.csv')
cities <- list(sj,iq,sj_sub,iq_sub)
features$total_cases    <- labels$total_cases
#Create feature DF with 2 weeks features
w2_features <- features
features2 <- features
#remove rows with missing data
features              <- features[complete.cases(features),]
rownames(features)    <- NULL
#looks like we lose about 115 observations in the original feature set
#Break into 2 datasets by location
sj      <- features[features$city=='sj',]
iq      <- features[features$city=='iq',]
sj_sub  <- submission[submission$city=='sj',]
iq_sub  <- submission[submission$city=='iq',]
cities <- list(sj,iq,sj_sub,iq_sub)
for(j in 1:length(cities)){
df          <- cities[[j]]
df$batch    <- NA
x           <- 1
df$batch[1] <- x
for(i in 2:nrow(df)){
if(df$city[i]==df$city[i-1] &
((as.Date(df$week_start_date[i])
-as.Date(df$week_start_date[i-1]))<10)){
df$batch[i] <- x
}
else{
x <- x+1
df$batch[i] <- x
}
}
if(j==1){
sj          <- df
cities[[j]] <- sj
}
if(j==2){
iq          <- df
cities[[j]] <- iq
}
if(j==3){
sj_sub      <- df
cities[[j]] <- sj_sub
}
if(j==4){
iq_sub      <- df
cities[[j]] <- iq_sub
}
}
View(sj_sub)
View(features)
features    <- read.csv('dengue_features_train.csv')
View(features)
sj      <- features[features$city=='sj',]
iq      <- features[features$city=='iq',]
for(j in 1:length(cities)){
df          <- cities[[j]]
df$batch    <- NA
x           <- 1
df$batch[1] <- x
for(i in 2:nrow(df)){
if(df$city[i]==df$city[i-1] &
((as.Date(df$week_start_date[i])
-as.Date(df$week_start_date[i-1]))<10)){
df$batch[i] <- x
}
else{
x <- x+1
df$batch[i] <- x
}
}
if(j==1){
sj          <- df
cities[[j]] <- sj
}
if(j==2){
iq          <- df
cities[[j]] <- iq
}
if(j==3){
sj_sub      <- df
cities[[j]] <- sj_sub
}
if(j==4){
iq_sub      <- df
cities[[j]] <- iq_sub
}
}
View(sj)
features    <- read.csv('dengue_features_train.csv')
sj      <- features[features$city=='sj',]
iq      <- features[features$city=='iq',]
cities <- list(sj,iq,sj_sub,iq_sub)
for(j in 1:length(cities)){
df          <- cities[[j]]
df$batch    <- NA
x           <- 1
df$batch[1] <- x
for(i in 2:nrow(df)){
if(df$city[i]==df$city[i-1] &
((as.Date(df$week_start_date[i])
-as.Date(df$week_start_date[i-1]))<10)){
df$batch[i] <- x
}
else{
x <- x+1
df$batch[i] <- x
}
}
if(j==1){
sj          <- df
cities[[j]] <- sj
}
if(j==2){
iq          <- df
cities[[j]] <- iq
}
if(j==3){
sj_sub      <- df
cities[[j]] <- sj_sub
}
if(j==4){
iq_sub      <- df
cities[[j]] <- iq_sub
}
}
View(sj_sub)
View(sj_sub)
View(features2)
j_nb   <- features2[features2$city=='sj',]
iq_nb   <- features2[features2$city=='iq',]
sj_sub  <- submission[submission$city=='sj',]
iq_sub  <- submission[submission$city=='iq',]
#List to iterate through
tot_obs <- list(sj_nb,iq_nb,sj_sub,iq_sub)
sj_nb   <- features2[features2$city=='sj',]
iq_nb   <- features2[features2$city=='iq',]
sj_sub  <- submission[submission$city=='sj',]
iq_sub  <- submission[submission$city=='iq',]
#List to iterate through
tot_obs <- list(sj_nb,iq_nb,sj_sub,iq_sub)
tot_obs[1]
tot_obs[1] <- 'a'
tot_obs[1]
for(j in 1:length(tot_obs)){
df  <- tot_obs[[j]]
#Iterate through cols and use mean for NAs
for(i in 1:ncol(df)){
df[is.na(df[,i]), i] <- mean(df[,i], na.rm = TRUE)
}
#Rewrite back to list objects
tot_obs[[j]] <- df
}
sj_nb   <- features2[features2$city=='sj',]
iq_nb   <- features2[features2$city=='iq',]
sj_sub  <- submission[submission$city=='sj',]
iq_sub  <- submission[submission$city=='iq',]
#List to iterate through
tot_obs <- list(sj_nb,iq_nb,sj_sub,iq_sub)
for(j in 1:length(tot_obs)){
df  <- tot_obs[[j]]
#Iterate through cols and use mean for NAs
for(i in 1:ncol(df)){
df[is.na(df[,i]), i] <- mean(df[,i], na.rm = TRUE)
}
#Rewrite back to list objects
tot_obs[[j]] <- df
}
tot_names <- list('sj_nb','iq_nb','sj_sub','iq_sub')
#Replace NA vals with col means
for(j in 1:length(tot_obs)){
df  <- tot_obs[[j]]
#Iterate through cols and use mean for NAs
for(i in 1:ncol(df)){
df[is.na(df[,i]), i] <- mean(df[,i], na.rm = TRUE)
}
#Rewrite back to list objects
tot_obs[[j]] <- df
#Write DF to CSV to read into Python model
write.csv(df,paste(tot_names[[j]],".csv",sep=""))
}
for(j in 1:length(tot_obs)){
df  <- tot_obs[[j]]
#Iterate through cols and use mean for NAs
for(i in 1:ncol(df)){
df[is.na(df[,i]), i] <- mean(df[,i], na.rm = TRUE)
}
#Rewrite back to list objects
tot_obs[[j]] <- df
#Write DF to CSV to read into Python model
write.csv(df,paste(paste(tot_names[[j]],".csv",sep="")))
}
#Replace NA vals with col means
for(j in 1:length(tot_obs)){
df  <- tot_obs[[j]]
#Iterate through cols and use mean for NAs
for(i in 1:ncol(df)){
df[is.na(df[,i]), i] <- mean(df[,i], na.rm = TRUE)
}
#Rewrite back to list objects
tot_obs[[j]] <- df
#Write DF to CSV to read into Python model
write.csv(df,paste("Data/",paste(tot_names[[j]],".csv",sep=""),sep=""))
}