-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_evolved_agent.py
120 lines (77 loc) · 2.1 KB
/
run_evolved_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import random
import copy
import time
import numpy as np
import scipy.misc
import matplotlib.pyplot as plt
import torch
import torch.multiprocessing as mp
import retro
from baselines.common.atari_wrappers import WarpFrame, FrameStack
import sonic_util
from agent_network import Evolution_Agent_Simple
def env_proc(net, pipe, max_steps = 15*60, game_states=[{'game':'SonicTheHedgehog-Genesis','state':'GreenHillZone.Act1'}], frame_stack = 4, use_cuda = True):
def format_obs(obs):
obs = np.array(obs)
os = []
for i in range(frame_stack):
o = obs[:,:,i*3:(i+1)*3]
o = scipy.misc.imresize(o,(64,64))
os.append(o)
obs = np.concatenate(os, axis=2)
obs = torch.tensor(obs).float()
obs.requires_grad=False
obs = obs.permute([2,0,1]).unsqueeze(0)
if use_cuda:
obs = obs.cuda()
return obs
game_state_idx = random.randrange(len(game_states))
env = retro.make(game=game_states[game_state_idx]['game'], state=game_states[game_state_idx]['state'])
env = sonic_util.SonicDiscretizer(env)
env = FrameStack(env, frame_stack)
obs = env.reset()
obs = format_obs(obs)
prev_info = None
t_reward = 0
i = 0
max_x = 0
while(True):
ins = {}
ins['obs'] = obs
a = net(ins)
action = a
for itr in range(frame_stack):
obs, rew, done, info = env.step(action)
env.render()
if(done):
break
obs = format_obs(obs)
x_loc = info['x'] + info['screen_x']
if(x_loc > max_x):
rew = 0.02 * (x_loc - max_x)
max_x =info['x']+info['screen_x']
else:
rew = 0
if (prev_info is None):
prev_info = info
if (info['lives'] < prev_info['lives']):
done = True
if i >= max_steps:
done = True
t_reward += rew
if (done == True):
break
if(i%50 == 0):
print(i,t_reward)
i+=1
time.sleep(0.016)
pipe.send({"reward":t_reward})
return
if __name__ == '__main__':
p = {}
p['load_file_name'] = "evolved_agent_16.pth"
mp.set_start_method('spawn')
net=torch.load(p['load_file_name']).cuda()
pipe_main, pipe_sub = mp.Pipe()
proc = mp.Process(target = env_proc, args = (net, pipe_sub, 15*60*3) )#, [{'game':'SpaceInvaders-v0'}]))
proc.start()