-
Notifications
You must be signed in to change notification settings - Fork 0
/
R_ranking.R
44 lines (30 loc) · 1.93 KB
/
R_ranking.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# TPO R script for HR_Project - For Ranking Attrition Risk
# By: Jeff Schwartzentruber
# Date: Oct 11th 2017
#Laptop
#PW <- "C:\\Users\\Jeff\\Google Drive\\Tailored Process Optimization\\TPO\\HR_Project\\HR_Sales\\"
#Dekstop
PW <- "D:\\Users\\Jeff\\Google Drive\\Tailored Process Optimization\\TPO\\HR_Project\\HR_Sales\\"
library(h2o) #Import h2o library (must be installed prior)
localH2O = h2o.init(ip = "localhost", port = 54321, startH2O = TRUE,min_mem_size = "3g", nthreads = 4) #Initalize h2o instance,
#*** nthreads has to be to set to 4, or we get and error from H2o about unhealthy nodes after several iterations (i think it has something to do with vitrual cores and their cluster computing infrastructure)
setwd = PW # set working directory where file repo is
library(readr)
HR_Sales <- read_csv(paste(PW, "HR_Sales.csv", sep=""))
Risk <- data.frame() #initalize ranking
# Loop through HR data to generate ranks
for (row in 1:nrow(HR_Sales)) {
#Create input.csv
inputFile <- HR_Sales[row,] #get the ith row
write.table(as.matrix(inputFile), file=paste(PW,"input.csv", sep=""), row.names=FALSE, sep=",") #Write to csv file
input <- h2o.importFile(path = paste(PW, "input.csv", sep=""), destination_frame = "input") #Load generated predict input.csv file, must be an h2o frame
## Attrition Model - GBM
modelPath_A = paste(PW, "Model_Exports\\Attrition_GBM\\gbm-dc422bcf-62bd-40a3-8adc-5f8c9fc1b2f0", sep="") # Location of the exported model from flow
model_A <- h2o.loadModel(modelPath_A) #Load exported model
pred_A_h2o <- h2o.predict(model_A, input) #Predict with input.csv with generated model
pred_A=as.data.frame(pred_A_h2o) #convert h2o from to r frame
Risk <- rbind(Risk, pred_A) #Create ranking data frame
print(row)
}
write.table(as.matrix(Risk), file=paste(PW,"Predictions\\Risk.csv", sep=""), row.names=FALSE, sep=",") #Write ranking to file
# h2o.shutdown(prompt = FALSE) #Turn to TRUE for paralle computing