-
Notifications
You must be signed in to change notification settings - Fork 8
/
ray1.1.yaml
102 lines (84 loc) · 3.79 KB
/
ray1.1.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# A unique identifier for this cluster.
cluster_name: hyperopt
# The initial number of worker nodes to launch in addition to the head
# node. When the cluster is first brought up (or when it is refreshed with a
# subsequent `ray up`) this number of nodes will be started.
initial_workers: 31
# The minimum number of workers nodes to launch in addition to the head
# node. This number should be >= 0.
min_workers: 31
# The maximum number of workers nodes to launch in addition to the head
# node.
max_workers: 31
# The autoscaler will scale up the cluster to this target fraction of resource
# usage. For example, if a cluster of 10 nodes is 100% busy and
# target_utilization is 0.8, it would resize the cluster to 13. This fraction
# can be decreased to increase the aggressiveness of upscaling.
# This max value allowed is 1.0, which is the most conservative setting.
target_utilization_fraction: 0.75
# If a node is idle for this many minutes, it will be removed.
idle_timeout_minutes: 5
# Cloud-provider specific configuration.
provider:
type: aws
region: us-east-1
# Availability zone(s), comma-separated, that nodes may be launched in.
# Nodes are currently spread between zones by a round-robin approach,
# however this implementation detail should not be relied upon.
availability_zone: us-east-1a, us-east-1b, us-east-1c, us-east-1d, us-east-1e, us-east-1f
# Whether to allow node reuse. If set to False, nodes will be terminated
# instead of stopped.
cache_stopped_nodes: False # If not present, the default is True.
# How Ray will authenticate with newly launched nodes.
auth:
ssh_user: ubuntu
head_node:
InstanceType: m5.large
ImageId: ami-01aad86525617098d # base DL AMI 35
# ImageId: ami-0b206e33498f6acbe # my own Deep Learning AMI 18.04 + ray 1.0 etc.
# SubnetId: subnet-78d4bd53 # my subnet if default doesn't work
# You can provision additional disk space with a conf as follows
BlockDeviceMappings:
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 100
# Additional options in the boto docs.
# Provider-specific config for worker nodes, e.g. instance type. By default
# Ray will auto-configure unspecified fields such as SubnetId and KeyName.
# For more documentation on available fields, see:
# http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
worker_nodes:
InstanceType: m5.large
ImageId: ami-01aad86525617098d # base DL AMI 35
# ImageId: ami-0b206e33498f6acbe # Deep Learning AMI 18.04 + ray 1.0 etc.
# SubnetId: subnet-78d4bd53
# Run workers on spot by default. Comment this out to use on-demand.
InstanceMarketOptions:
MarketType: spot
# Additional options can be found in the boto docs, e.g.
# SpotOptions:
# MaxPrice: MAX_HOURLY_PRICE
# Additional options in the boto docs.
file_mounts: {
# "/path1/on/remote/machine": "/path1/on/local/machine",
# "/path2/on/remote/machine": "/path2/on/local/machine",
}
cluster_synced_files: []
setup_commands:
- echo "setup"
- python --version
# - pip install ray[all]
- wget https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-1.1.0.dev0-cp37-cp37m-manylinux1_x86_64.whl
- pip install -U ray-1.1.0.dev0-cp37-cp37m-manylinux1_x86_64.whl
- git clone https://github.com/druce/iowa
- pip install -r ~/iowa/requirements.txt
# The following line demonstrate that you can specify arbitrary
# startup scripts on the cluster.
- touch /tmp/hyperopt.txt
# Custom commands that will be run on the head node after common setup.
head_setup_commands:
- echo "head setup"
# - pip install boto3>=1.4.8 # 1.4.8 adds InstanceMarketOptions
# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands:
- echo "worker setup"