Skip to content

Commit

Permalink
Rstudio Support
Browse files Browse the repository at this point in the history
  • Loading branch information
gibchikafa committed Aug 19, 2021
1 parent 019cf29 commit 59ca67f
Show file tree
Hide file tree
Showing 16 changed files with 660 additions and 46 deletions.
47 changes: 47 additions & 0 deletions attributes/default.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@

default['hopsworks']['jupyter_dir'] = node['hopsworks']['dir'] + "/jupyter"

default['hopsworks']['rstudio_dir'] = node['hopsworks']['dir'] + "/rstudio"

default['hopsworks']['max_mem'] = "3000"
default['glassfish']['max_mem'] = node['hopsworks']['max_mem'].to_i
default['hopsworks']['min_mem'] = "1024"
Expand Down Expand Up @@ -371,6 +373,51 @@
default['rstudio']['rpm'] = "rstudio-server-rhel-1.1.447-x86_64.rpm"
default['rstudio']['enabled'] = "false"


#rstudio configuration variables
default["hopsworks"]['rstudio_host'] = "localhost"
default["hopsworks"]['rstudio_origin_scheme'] = "https"
default["hopsworks"]["rstudio_www_address"] = "127.0.0.1"
default["hopsworks"]["rstudio_session_timeout_minutes"] = 360
default["hopsworks"]["rstudio_logging_level"] = "info"
default["hopsworks"]["rstudio_logger_type"] = "file"
default["hopsworks"]["rstudio_log_file_max_size"] = 512
default["hopsworks"]["rstudio_default_cran_repo"] = "https://cloud.r-project.org/"

default['rstudio']['base_dir'] = node['install']['dir'].empty? ? node['hopsworks']['dir'] + "/rstudio" : node['install']['dir'] + "/rstudio"
default['rstudio']['shutdown_timer_interval'] = "30m"

# CRAN
default['rstudio']['cran']['mirror'] = 'http://cran.rstudio.com/'

# APT configuration for Ubuntu or Debian installs.
case node["platform"].downcase
when "ubuntu"
default['rstudio']['apt']['key'] = 'E084DAB9'
default['rstudio']['apt']['keyserver'] = 'keyserver.ubuntu.com'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/ubuntu'
when "debian"
default['rstudio']['apt']['key'] = '381BA480'
default['rstudio']['apt']['keyserver'] = 'subkeys.pgp.net'
default['rstudio']['apt']['uri'] = 'http://cran.stat.ucla.edu/bin/linux/debian'
end

# You can define a simple array of packages in your role/environment/node and the
# CRAN recipe will install them.
default['rstudio']['cran']['packages'] = []

# RStudio Server
default['rstudio']['server']['www_port'] = '8787'
default['rstudio']['server']['www_address'] = '127.0.0.1'
default['rstudio']['server']['ld_library_path'] = ''
default['rstudio']['server']['r_binary_path'] = ''
default['rstudio']['server']['user_group'] = ''

# RStudio Session
default['rstudio']['session']['timeout'] = '30'
default['rstudio']['session']['package_path'] = ''
default['rstudio']['session']['cran_repo'] = 'http://cran.case.edu/'

default['hopsworks']['kafka_max_num_topics'] = '100'

default['hopsworks']['audit_log_dump_enabled'] = "false"
Expand Down
10 changes: 10 additions & 0 deletions files/default/hopsworks_templates/rstudio_logging_config_template
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[*]
log-level=warn
logger-type=syslog

[@rserver]
log-level=${conf.logLevel}
logger-type=${conf.loggerType}
max-size-mb=${conf.maxSizeMb}
log-dir=${conf.logDir}
log-file-include-pid=${conf.includePid}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rsession-which-r=${conf.versionPath}
www-address=${conf.ipAddress}
www-port=${conf.port}
www-root-path=${conf.rootPath}
server-user=${conf.serverUser}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
session-timeout-minutes=${conf.sessionTimeoutMinutes}
r-cran-repos=${conf.cranRepo}
9 changes: 9 additions & 0 deletions files/default/hopsworks_templates/sparklyr_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
default:
livy.driverCores: ${conf.driverCores}
livy.driverMemory: "${conf.driverMemory}"
livy.numExecutors: ${conf.numExecutors}
livy.executorCores: ${conf.executorCores}
livy.executorMemory: "${conf.executorMemory}"
livy.proxyUser: "${conf.proxyUser}"
livy.queue: "${conf.yarnQueue}"
${conf.sparkConfiguration}
77 changes: 32 additions & 45 deletions files/default/sql/ddl/3.0.0__initial_tables.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1056,8 +1056,9 @@ CREATE TABLE `rstudio_interpreter` (
`name` varchar(255) COLLATE latin1_general_cs NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`port`,`name`),
CONSTRAINT `FK_575_582` FOREIGN KEY (`port`) REFERENCES `rstudio_project` (`port`) ON DELETE CASCADE ON UPDATE NO ACTION
PRIMARY KEY (`port`,`name`),
CONSTRAINT `FK_575_582` FOREIGN KEY (`port`) REFERENCES `rstudio_project` (`port`) ON DELETE CASCADE ON UPDATE NO
ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand All @@ -1068,20 +1069,21 @@ CREATE TABLE `rstudio_interpreter` (
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `rstudio_project` (
`port` int(11) NOT NULL,
`hdfs_user_id` int(11) NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL,
`token` varchar(255) COLLATE latin1_general_cs NOT NULL,
`secret` varchar(64) COLLATE latin1_general_cs NOT NULL,
`pid` bigint(20) NOT NULL,
`project_id` int(11) NOT NULL,
PRIMARY KEY (`port`),
KEY `hdfs_user_idx` (`hdfs_user_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
`port` int NOT NULL,
`hdfs_user_id` int NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`last_accessed` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`secret` varchar(64) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`pid` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs NOT NULL,
`project_id` int NOT NULL,
`login_password` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
`login_username` varchar(255) CHARACTER SET latin1 COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`port`),
KEY `hdfs_user_idx` (`hdfs_user_id`),
KEY `project_id` (`project_id`),
CONSTRAINT `FK_103_577` FOREIGN KEY (`hdfs_user_id`) REFERENCES `hops`.`hdfs_users` (`id`) ON DELETE CASCADE,
CONSTRAINT `FK_284_578` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand All @@ -1092,35 +1094,20 @@ CREATE TABLE `rstudio_project` (
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `rstudio_settings` (
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`num_tf_ps` int(11) DEFAULT '1',
`num_tf_gpus` int(11) DEFAULT '0',
`num_mpi_np` int(11) DEFAULT '1',
`appmaster_cores` int(11) DEFAULT '1',
`appmaster_memory` int(11) DEFAULT '1024',
`num_executors` int(11) DEFAULT '1',
`num_executor_cores` int(11) DEFAULT '1',
`executor_memory` int(11) DEFAULT '1024',
`dynamic_initial_executors` int(11) DEFAULT '1',
`dynamic_min_executors` int(11) DEFAULT '1',
`dynamic_max_executors` int(11) DEFAULT '1',
`secret` varchar(255) COLLATE latin1_general_cs NOT NULL,
`log_level` varchar(32) COLLATE latin1_general_cs DEFAULT 'INFO',
`mode` varchar(32) COLLATE latin1_general_cs NOT NULL,
`umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022',
`advanced` tinyint(1) DEFAULT '0',
`archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
`spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO ACTION,
CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
`project_id` int(11) NOT NULL,
`team_member` varchar(150) COLLATE latin1_general_cs NOT NULL,
`secret` varchar(255) COLLATE latin1_general_cs NOT NULL,
`advanced` tinyint(1) DEFAULT '0',
`shutdown_level` int(11) NOT NULL DEFAULT '6',
`base_dir` varchar(255) COLLATE latin1_general_cs DEFAULT NULL,
`job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT NULL,
`docker_config` varchar(1000) COLLATE latin1_general_cs DEFAULT NULL,
PRIMARY KEY (`project_id`,`team_member`),
KEY `team_member` (`team_member`),
KEY `secret_idx` (`secret`),
CONSTRAINT `RS_FK_USERS` FOREIGN KEY (`team_member`) REFERENCES `users` (`email`) ON DELETE CASCADE ON UPDATE NO
ACTION,
CONSTRAINT `RS_FK_PROJS` FOREIGN KEY (`project_id`) REFERENCES `project` (`id`) ON DELETE CASCADE ON UPDATE NO ACTION
) ENGINE=ndbcluster DEFAULT CHARSET=latin1 COLLATE=latin1_general_cs;
/*!40101 SET character_set_client = @saved_cs_client */;

Expand Down
23 changes: 23 additions & 0 deletions files/default/sql/ddl/updates/3.0.0.sql
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,26 @@ ALTER TABLE `hopsworks`.`dataset_shared_with` ADD COLUMN `accepted_by` INT(11) D

ALTER TABLE `hopsworks`.`dataset_shared_with` ADD CONSTRAINT `fk_shared_by` FOREIGN KEY (`shared_by`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION;
ALTER TABLE `hopsworks`.`dataset_shared_with` ADD CONSTRAINT `fk_accepted_by` FOREIGN KEY (`accepted_by`) REFERENCES `users` (`uid`) ON DELETE NO ACTION ON UPDATE NO ACTION;

ALTER TABLE `hopsworks`.`rstudio_settings` DROP `num_tf_ps`, DROP `num_tf_gpus`, DROP `num_mpi_np`,
DROP `appmaster_cores`, DROP `appmaster_memory`, DROP `num_executors`, DROP `num_executor_cores`,
DROP `executor_memory`, DROP `dynamic_initial_executors`,DROP `dynamic_min_executors`, DROP `dynamic_max_executors`,
DROP `log_level`, DROP `mode`, DROP `umask`, DROP `archives`, DROP `jars`, DROP `files`,DROP `py_files`, DROP `spark_params`;

ALTER TABLE `hopsworks`.`rstudio_project` DROP `host_ip`, DROP `token`;

ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `expires` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP;

ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_username` varchar(255) COLLATE latin1_general_cs DEFAULT
NULL;

ALTER TABLE `hopsworks`.`rstudio_project` ADD COLUMN `login_password` varchar(255) COLLATE latin1_general_cs DEFAULT
NULL;

ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` varchar(255) COLLATE latin1_general_cs NOT NULL;

ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `job_config` varchar(11000) COLLATE latin1_general_cs DEFAULT
NULL;

ALTER TABLE `hopsworks`.`rstudio_settings` ADD COLUMN `docker_config` varchar(11000) COLLATE latin1_general_cs DEFAULT
NULL;
36 changes: 36 additions & 0 deletions files/default/sql/ddl/updates/undo/3.0.0__undo.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,39 @@ ALTER TABLE `hopsworks`.`dataset_shared_with` DROP COLUMN `shared_by`;
ALTER TABLE `hopsworks`.`dataset_shared_with` DROP COLUMN `accepted_by`;

DROP TABLE IF EXISTS `feature_store_code`;

ALTER TABLE `hopsworks`.`rstudio_settings`
ADD COLUMN `num_tf_ps` int(11) DEFAULT '1',
ADD COLUMN `num_tf_gpus` int(11) DEFAULT '0',
ADD COLUMN `num_mpi_np` int(11) DEFAULT '1',
ADD COLUMN `appmaster_cores` int(11) DEFAULT '1',
ADD COLUMN `appmaster_memory` int(11) DEFAULT '1024',
ADD COLUMN `num_executors` int(11) DEFAULT '1',
ADD COLUMN `num_executor_cores` int(11) DEFAULT '1',
ADD COLUMN `executor_memory` int(11) DEFAULT '1024',
ADD COLUMN `dynamic_initial_executors` int(11) DEFAULT '1',
ADD COLUMN `dynamic_min_executors` int(11) DEFAULT '1',
ADD COLUMN `dynamic_max_executors` int(11) DEFAULT '1',
ADD COLUMN `mode` varchar(32) COLLATE latin1_general_cs NOT NULL,
ADD COLUMN `umask` varchar(32) COLLATE latin1_general_cs DEFAULT '022',
ADD COLUMN `advanced` tinyint(1) DEFAULT '0',
ADD COLUMN `archives` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
ADD COLUMN `jars` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
ADD COLUMN `files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
ADD COLUMN `py_files` varchar(1500) COLLATE latin1_general_cs DEFAULT '',
ADD COLUMN `spark_params` varchar(6500) COLLATE latin1_general_cs DEFAULT '';

ALTER TABLE `hopsworks`.`rstudio_project`
ADD COLUMN `host_ip` varchar(255) COLLATE latin1_general_cs NOT NULL,
ADD COLUMN `token` varchar(255) COLLATE latin1_general_cs NOT NULL;

ALTER TABLE `hopsworks`.`rstudio_project`
DROP COLUMN `expires`,
DROP COLUMN `login_username`,
DROP COLUMN `login_password`;

ALTER TABLE `hopsworks`.`rstudio_project` MODIFY COLUMN `pid` bigint(20) NOT NULL;

ALTER TABLE `hopsworks`.`rstudio_settings` DROP COLUMN `job_config`;

ALTER TABLE `hopsworks`.`rstudio_settings` DROP COLUMN `docker_config`;
35 changes: 35 additions & 0 deletions metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,41 @@
:description => "Set to 'true' to enable RStudio in Hopsworks. Default 'false'.",
:type => 'string'

attribute "hopsworks/rstudio_host",
:description => "Host for the RStudio server e.g localhost",
:type => "string"

attribute "hopsworks/rstudio_origin_scheme",
:description => "The origin scheme for the RStudio server e.g https",
:type => "string"

attribute "hopsworks/rstudio_www_address",
:description => "The network address that RStudio Server will listen on for incoming connections.",
:type => "string"

attribute "hopsworks/rstudio_session_timeout_minutes",
:description => "The amount of minutes before a session times out, at which point the session will either suspend or exit.",
:type => "string"

attribute "hopsworks/rstudio_logging_level",
:description => "The minimum log level to capture. Can be one of debug, info, warn, or error",
:type => "string"

attribute "hopsworks/rstudio_logger_type",
:description => "The type of logger to use. Can be one of stderr, syslog, or file.",
:type => "string"

attribute "hopsworks/rstudio_log_file_max_size",
:description => "Maximum allowable size of the file before it is rotated. Only applicable if rotate is enabled.",
:type => "string"

attribute "hopsworks/rstudio_dir",
:description => "Default installation directory for rstudio server",
:type => "string"

attribute "rstudio/shutdown_timer_interval",
:description => "rstudio interval for shutting down expired rstudio servers",
:type => "string"
### PyPi

attribute "hopsworks/pypi_rest_endpoint",
Expand Down
36 changes: 35 additions & 1 deletion recipes/install.rb
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@
action :create
end

#update permissions of base_dir for rstudio to 770
directory node['rstudio']['base_dir'] do
owner node['hops']['yarnapp']['user']
group node['hops']['group']
mode "770"
action :create
end

directory node['hopsworks']['dir'] do
owner node['hopsworks']['user']
group node['hopsworks']['group']
Expand Down Expand Up @@ -646,6 +654,14 @@
not_if { node['install']['kubernetes'].casecmp("true") == 0 }
end

kagent_sudoers "rstudio" do
user node['glassfish']['user']
group "root"
script_name "rstudio.sh"
template "rstudio.sh.erb"
run_as "ALL" # run this as root - inside we change to different users
end

kagent_sudoers "convert-ipython-notebook" do
user node['glassfish']['user']
group "root"
Expand Down Expand Up @@ -689,6 +705,15 @@
not_if { node['install']['kubernetes'].casecmp("true") == 0 }
end

kagent_sudoers "rstudio-project-cleanup" do
user node['glassfish']['user']
group "root"
script_name "rstudio-project-cleanup.sh"
template "rstudio-project-cleanup.sh.erb"
run_as "ALL"
not_if { node['install']['kubernetes'].casecmp("true") == 0 }
end

kagent_sudoers "global-ca-sign-csr" do
user node['glassfish']['user']
group "root"
Expand Down Expand Up @@ -740,7 +765,8 @@

["zip-hdfs-files.sh", "zip-background.sh", "unzip-background.sh", "tensorboard-launch.sh",
"tensorboard-cleanup.sh", "condasearch.sh", "list_environment.sh", "jupyter-kill.sh",
"jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh"].each do |script|
"jupyter-launch.sh", "tfserving-kill.sh", "sklearn_serving-launch.sh", "sklearn_serving-kill.sh", "rstudio-kill.sh",
"rstudio-launch.sh"].each do |script|
template "#{theDomain}/bin/#{script}" do
source "#{script}.erb"
owner node['glassfish']['user']
Expand Down Expand Up @@ -812,6 +838,14 @@
action :create
end

#update permissions of base_dir to 770
directory node["rstudio"]["base_dir"] do
owner node["rstudio"]["user"]
group node["rstudio"]["group"]
mode "770"
action :create
end

directory node["hopssite"]["certs_dir"] do
owner node["glassfish"]["user"]
group node['kagent']['certs_group']
Expand Down
Loading

0 comments on commit 59ca67f

Please sign in to comment.