-
Notifications
You must be signed in to change notification settings - Fork 0
/
Genentic_Algorithm.py
264 lines (210 loc) · 11.5 KB
/
Genentic_Algorithm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
import retro
import numpy as np
import sys
import random
import os
relu = lambda X: np.maximum(0, X) # 단층, Hidden_layer
sigmoid = lambda X: 1.0 / (1.0 + np.exp(-X)) # Output_layer
class Chromosome: # 염색체
def __init__(self):
# 4개의 유전자가 모여 하나의 염색체가 됨
self.w1 = np.random.uniform(low=-1, high=1, size=(80, 9))
self.b1 = np.random.uniform(low=-1, high=1, size=(9,))
self.w2 = np.random.uniform(low=-1, high=1, size=(9, 6))
self.b2 = np.random.uniform(low=-1, high=1, size=(6,))
# L1 = data X W1 + b1
# [a, b] * [b, c] = [a, c]
self.l1 = None
self.distance = 0 # 거리
self.max_distance = 0 #최대 거리
self.frames = 0 # 프레임
self.stop_frames = 0 # 정지 프레임
self.win = 0 # 승리
def predict(self, data):
# data = Input_layer;
self.l1 = relu(np.matmul(data, self.w1) + self.b1) # 행렬곱
# sigmoid = Output_layer
output = sigmoid(np.matmul(self.l1, self.w2) + self.b2) # 행렬곱
# output = [-0 ~ 0, -0 ~ 0, -0 ~ 0, -0 ~ 0, -0 ~ 0, -0 ~ 0]
# -0 ~ 0 사이의 값을 가진 6개의 output 중 0.5보다 크면 1로 바꾸고 아니면 0으로 출력
result = (output > 0.5).astype(np.int)
return result
def fitness(self): # 적합도
'''
d^0.8 - f^0.5 +{ 2500 x d (d > 50) | + { 100,000 (win or clear)
{ 0 (d <= 50)| + { 0 (x)
# 적합도 기준
# 0. 많은 거리를 이동했다면 높은 적합도
# 2. 같은 거리라도 더 짧은 시간에 도달했다면 높은 적합도
# 3. 클리어한 AI는 가장 높은 적합도
'''
return int(max(self.distance ** 1.8 - self.frames ** 1.5 + min(max(self.distance - 50, 0), 1) * 2500 + self.win * 1000000, 1))
# 반환, 정수(최대(거리^0.8 - 프레임^0.5 + 최저(최고(거리 - 50, 0), 0) * 2500 + 승리 * 1000000, 0))
class GeneticAlgorithm: # 유전_알고리즘
def __init__(self):
self.generation = 0 # 세대
self.chromosomes = [Chromosome() for _ in range(10)]
# 염색체 = [Chromosome(), ------ Chromosome()] -> 10개
self.current_chromosome_index = 0 # 현재 염색체 인덱스
def elitist_preserve_selection(self): # 엘리트 보존 선택
# 염색체 정렬(내림 차순) -> 정렬(염색체_배열, 적합도 함수 호출, )
sort_chromosomes = sorted(self.chromosomes, key=lambda x: x.fitness(), reverse=True)
return sort_chromosomes[:2] # 가장큰 2개의 숫자 출력
def roulette_wheel_selection(self): # 룰렛 휠 선택? 적합도 ->
#
result = [] # 결과 배열
fitness_sum = sum(c.fitness() for c in self.chromosomes) #
# 10개의 염색체 중 하나 선택
# 적합도를 뽑아서 리스트에 넣고 그것을 더함
for _ in range(2): # 2번 돌려 선택
pick = random.uniform(0, fitness_sum) # 0 ~ 10개의 염색체의 적합도의 합(랜덤 선택)
# 0 ~ sum -0
# uniform = 균등확률분포 값을 생성해주는 함수
current = 0 # 현재
for chromosome in self.chromosomes:
current += chromosome.fitness()
if current > pick:
result.append(chromosome)
break
return result
def SBX(self, p1, p2): # Simulated binary crossover // 일점 교차
# 숫자를 비율로 쪼갬
rand = np.random.random(p1.shape)
gamma = np.empty(p1.shape)
gamma[rand <= 0.5] = (2 * rand[rand <= 0.5]) ** (1.0 / (100 + 1))
gamma[rand > 0.5] = (1.0 / (2.0 * (1.0 - rand[rand > 0.5]))) ** (1.0 / (100 + 1))
c1 = 0.5 * ((1 + gamma) * p1 + (1 - gamma) * p2)
c2 = 0.5 * ((1 - gamma) * p1 + (1 + gamma) * p2)
return c1, c2
def crossover(self, chromosome1, chromosome2): # 교배 과정
child1 = Chromosome()
child2 = Chromosome()
# 선택된 2개의 높은 염색체 2개의 자식 만듭
child1.w1, child2.w1 = self.SBX(chromosome1.w1, chromosome2.w1)
child1.b1, child2.b1 = self.SBX(chromosome1.b1, chromosome2.b1)
child1.w2, child2.w2 = self.SBX(chromosome1.w2, chromosome2.w2)
child1.b2, child2.b2 = self.SBX(chromosome1.b2, chromosome2.b2)
# 어떤 문제를 하냐에 따라 결과가 달라짐 -> 실험// 문제마다 선택 교배 변이 내가 고르는 것
return child1, child2
def static_mutation(self, data): # 정적 돌연 변이
mutation_array = np.random.random(data.shape) < 0.05
# 가우시안 돌연 변이
gaussian_mutation = np.random.normal(size=data.shape) # 정규 분포
data[mutation_array] += gaussian_mutation[mutation_array]
def mutation(self, chromosome): # 변이
# 2번 호출됨
self.static_mutation(chromosome.w1)
self.static_mutation(chromosome.b1)
self.static_mutation(chromosome.w2)
self.static_mutation(chromosome.b2)
def next_generation(self): # 다음 세대
if not os.path.exists('../data'):
os.mkdir('../data')
if not os.path.exists('../data/' + str(self.generation)):
os.mkdir('../data/' + str(self.generation))
for i in range(10):
if not os.path.exists('../data/' + str(self.generation) + '/' + str(i)):
os.mkdir('../data/' + str(self.generation) + '/' + str(i))
np.save('../data/' + str(self.generation) + '/' + str(i) + '/w1.npy', self.chromosomes[i].w1)
np.save('../data/' + str(self.generation) + '/' + str(i) + '/w2.npy', self.chromosomes[i].w2)
np.save('../data/' + str(self.generation) + '/' + str(i) + '/b1.npy', self.chromosomes[i].b1)
np.save('../data/' + str(self.generation) + '/' + str(i) + '/b2.npy', self.chromosomes[i].b2)
np.save('../data/' + str(self.generation) + '/' + str(i) + '/fitness.npy', np.array([self.chromosomes[i].fitness()]))
print(f'{self.generation}세대 시뮬레이션 완료.')
next_chromosomes = []
next_chromosomes.extend(self.elitist_preserve_selection())
print(f'엘리트 적합도: {next_chromosomes[0].fitness()}')
for i in range(4):
selected_chromosome = self.roulette_wheel_selection()
child_chromosome1, child_chromosome2 = self.crossover(selected_chromosome[0], selected_chromosome[1])
self.mutation(child_chromosome1)
self.mutation(child_chromosome2)
next_chromosomes.append(child_chromosome1)
next_chromosomes.append(child_chromosome2)
self.chromosomes = next_chromosomes
for c in self.chromosomes:
c.distance = 0
c.max_distance = 0
c.frames = 0
c.stop_frames = 0
c.win = 0
self.generation += 1
self.current_chromosome_index = 0
class Mario:
def __init__(self):
super().__init__()
self.env = retro.make(game='SuperMarioBros-Nes', state=f'Level1-0')
self.env.reset()
self.ga = GeneticAlgorithm()
self.nogui()
def nogui(self):
while True:
ram = self.env.get_ram()
full_screen_tiles = ram[0x0500:0x069F + 1]
full_screen_tile_count = full_screen_tiles.shape[0]
full_screen_page1_tiles = full_screen_tiles[:full_screen_tile_count // 2].reshape((-1, 16))
full_screen_page2_tiles = full_screen_tiles[full_screen_tile_count // 2:].reshape((-1, 16))
full_screen_tiles = np.concatenate((full_screen_page1_tiles, full_screen_page2_tiles), axis=1).astype(
np.int)
# 한 번에 최대 5 명의 적.
enemy_drawn = ram[0x000F:0x0014]
enemy_horizontal_position_in_level = ram[0x006E:0x0072 + 1]
enemy_x_position_on_screen = ram[0x0087:0x008B + 1]
enemy_y_position_on_screen = ram[0x00CF:0x00D3 + 1]
# 5명의 적
for i in range(5):
if enemy_drawn[i] == 1:
ex = (((enemy_horizontal_position_in_level[i] * 256) + enemy_x_position_on_screen[
i]) % 512 + 8) // 16
ey = (enemy_y_position_on_screen[i] - 8) // 16 - 1
if 0 <= ex < full_screen_tiles.shape[1] and 0 <= ey < full_screen_tiles.shape[0]:
# 적의 위치
full_screen_tiles[ey][ex] = -1
# 현재 화면을 구현하는 단계
current_screen_in_level = ram[0x071A] # 어디에 존재하냐(나의 화면)
screen_x_position_in_level = ram[0x071C] # 몇 페이지의 몇번쨰 줄이냐..
screen_x_position_offset = (256 * current_screen_in_level + screen_x_position_in_level) % 512 # 기준점
sx = screen_x_position_offset // 16 # 타일 좌표 변환
# 두 페이지를 합쳐서 자르기
screen_tiles = np.concatenate((full_screen_tiles, full_screen_tiles), axis=1)[:, sx:sx + 16]
# 최종 가공
for i in range(screen_tiles.shape[0]):
for j in range(screen_tiles.shape[1]):
if screen_tiles[i][j] > 0: # 장애물(통합)
screen_tiles[i][j] = 1
if screen_tiles[i][j] == -1: # 적
screen_tiles[i][j] = 2
player_x_position_current_screen_offset = ram[0x03AD]
player_y_position_current_screen_offset = ram[0x03B8]
px = (player_x_position_current_screen_offset + 8) // 16
py = (player_y_position_current_screen_offset + 8) // 16 - 1
ix = px
iy = 2
input_data = screen_tiles[iy:iy + 10, ix:ix + 8]
if 2 <= py <= 11:
input_data[py - 2][0] = 2 # 플레이어
input_data = input_data.flatten() # 13 X 16 -> 0 X 208
current_chromosome = self.ga.chromosomes[self.ga.current_chromosome_index]
current_chromosome.frames += 1
current_chromosome.distance = ram[0x006D] * 256 + ram[0x0086]
if current_chromosome.max_distance < current_chromosome.distance:
current_chromosome.max_distance = current_chromosome.distance
current_chromosome.stop_frame = 0
else:
current_chromosome.stop_frame += 1
if ram[0x001D] == 3 or ram[0x0E] in (0x0B, 0x06) or ram[0xB5] == 2 or current_chromosome.stop_frame > 180:
if ram[0x001D] == 3:
current_chromosome.win = 1
print(f'{self.ga.current_chromosome_index + 1}번 마리오: {current_chromosome.fitness()}')
self.ga.current_chromosome_index += 1
if self.ga.current_chromosome_index == 10:
self.ga.next_generation()
print(f'== {self.ga.generation}세대 ==')
self.env.reset()
else:
predict = current_chromosome.predict(input_data) # U, D, L, R, A, B
press_buttons = np.array(
[predict[5], 0, 0, 0, predict[0], predict[1], predict[2], predict[3], predict[4]])
self.env.step(press_buttons)
if __name__ == '__main__':
mario = Mario()