对比 LSTM 与 MLP 结合的 A2C 在 CartPole-v1 环境下解决部分刻观测问题的效果

背景

参考

https://github.com/HaiyinPiao/pytorch-a2clstm-DRQN/tree/master

https://blog.csdn.net/hhy_csdn/article/details/106560875

实验 lstm 与 a2c 结合的构造过程

并且删去一维状态(速度)将原问题转换为一部分可观测问题

实验结果表明,在 CartPole-v1 环境中,在当前参数设置下

使用 mlp 的 a2c 相较于 使用 lstm 的 a2c 能够更快收敛到最佳

代码的部分

导包


import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import math
import random
import os
import gym

构造包含有 LSTM 的 actor 和 critic 网络

class ActorNetwork(nn.Module):# lstm + fc
  def __init__(self,in_size,hidden_size,out_size):
    super(ActorNetwork,self).__init__()
    self.lstm=nn.LSTM(in_size,hidden_size,batch_first=True)
    self.fc=nn.Linear(hidden_size,out_size)

  def forward(self,x,hidden):
    x,hidden=self.lstm(x,hidden)
    x=self.fc(x)
    x=F.log_softmax(x,2)
    return x,hidden

/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.
  and should_run_async(code)
class ValueNetwork(nn.Module):
  def __init__(self,in_size,hidden_size,out_size):
    super(ValueNetwork,self).__init__()
    self.lstm=nn.LSTM(in_size,hidden_size,batch_first=True)
    self.fc=nn.Linear(hidden_size,out_size)

  def forward(self,x,hidden):
    x,hidden=self.lstm(x,hidden)
    x=self.fc(x)
    return x,hidden
class A2CLSTM():
  def __init__(self,env_name,ValueNetwork=ValueNetwork,ActorNetwork=ActorNetwork,STATE_DIM=4-1,ACTION_DIM=2,NUM_EPISODE=10000,EPISODE_LEN=500,A_HIDDEN=40,C_HIDDEN=40):
    self.env_name=env_name
    self.count=0
    self.steps=[]
    self.task_episodes=[]
    self.test_results=[]


    self.STATE_DIM=STATE_DIM
    self.ACTION_DIM=ACTION_DIM
    self.NUM_EPISODE=NUM_EPISODE
    self.EPISODE_LEN=EPISODE_LEN
    self.A_HIDDEN=A_HIDDEN
    self.C_HIDDEN=C_HIDDEN

    self.value_network=ValueNetwork(in_size=self.STATE_DIM,hidden_size=self.C_HIDDEN,out_size=1)
    self.value_network_optim=torch.optim.Adam(self.value_network.parameters(),lr=0.005)

    self.actor_network=ActorNetwork(in_size=self.STATE_DIM,hidden_size=self.A_HIDDEN,out_size=self.ACTION_DIM)
    self.actor_network_optim=torch.optim.Adam(self.actor_network.parameters(),lr=0.001)



  def discount_reward(self,r,gamma,final_r):
    discounted_r=np.zeros_like(r)
    running_add=final_r
    for t in reversed(range(0,len(r))):
      running_add=running_add*gamma+r[t]
      discounted_r[t]=running_add

    return discounted_r

  def roll_out(self,actor_network,env,episode_len,value_network,init_state):
    states=[]
    actions=[]
    rewards=[]
    os_done=False
    final_r=0
    state=init_state
    a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
    a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
    c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
    c_cx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)

    for j in range(episode_len):
      states.append(state)
      log_softmax_action,(a_hx,a_cx)=actor_network(Variable(torch.Tensor([state]).unsqueeze(0)),(a_hx,a_cx))

      softmax_action=torch.exp(log_softmax_action)
      action=np.random.choice(self.ACTION_DIM,p=softmax_action.cpu().data.numpy()[0][0])
      # action=[torch.distributiona.Categorical(softmax_action).sample().item()]

      one_hot_action=[int(k==action) for k in range(self.ACTION_DIM)]

      next_state,reward,done,_=env.step(action)
      # 删去一维状态,作为部分可观测问题
      next_state=np.delete(next_state,1)

      actions.append(one_hot_action)
      rewards.append(reward)
      final_state=next_state
      state=next_state
      if done:
        is_done=True
        state=env.reset()
        state=np.delete(state,1)
        a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_cx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)

        # print(j+1)
        if episode_len==j+1:
          self.count+=1
        else:
          self.count=0
        break

    if not is_done:
      c_out,(c_hx,c_cx)=value_network(Variable(torch.Tensor([final_state])),(c_hx,c_cx))
      final_r=c_out.cpu().data.numpy()

    return states,actions,rewards,final_r,state
  def train(self):
    env=gym.make(self.env_name)
    init_state=env.reset()
    init_state=np.delete(init_state,1)



    for episode in range(self.NUM_EPISODE):
      if self.count > 50 and self.actor_network.training and self.value_network.training :
        self.actor_network.eval()
        self.value_network.eval()
        print('evals')
      states,actions,rewards,final_r,current_state=self.roll_out(self.actor_network,env,self.EPISODE_LEN,self.value_network, init_state)

      init_state=current_state
      actions_var=Variable(torch.Tensor(actions).view(-1,self.ACTION_DIM)).unsqueeze(0)
      states_var=Variable(torch.Tensor(states).view(-1,self.STATE_DIM)).unsqueeze(0)

      if self.actor_network.training and self.value_network.training :
        a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_cx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)

        self.actor_network_optim.zero_grad()
        log_softmax_actions,(a_hx,a_cx) = self.actor_network(states_var,(a_hx,a_cx))
        vs,(c_hx,c_cx)=self.value_network(states_var,(c_hx,c_cx))
        vs.detach()

        qs=Variable(torch.Tensor(self.discount_reward(rewards,0.99,final_r)))
        qs=qs.view(1,-1,1)
        advantages=qs-vs

        actor_network_loss=-torch.mean(torch.sum(log_softmax_actions*actions_var,1)*advantages)
        actor_network_loss.backward()
        torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
        self.actor_network_optim.step()

        self.value_network_optim.zero_grad()
        target_values=qs
        a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_cx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
        c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
        values,(c_hx,c_cx)=self.value_network(states_var,(c_hx,c_cx))
        criterion=nn.MSELoss()
        value_network_loss=criterion(values,target_values)
        value_network_loss.backward(retain_graph=True )
        torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)
        self.value_network_optim.step()

      if (episode+1)%50==0:
        step,result=self.test(episode)
        self.steps.append(step)
        self.test_results.append(result)

  def test(self,episode):
    steps=[]
    test_results=[]
    result=0
    test_task=gym.make(self.env_name)
    for test_epi in range(10):
      state=test_task.reset()
      state=np.delete(state,1)

      a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
      a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
      c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
      c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)

      for test_step in range(self.EPISODE_LEN):
        log_softmax_actions,(a_hx,a_cx)=self.actor_network(Variable(torch.Tensor([state]).view(1,1,3)),(a_hx,a_cx))
        softmax_action=torch.exp(log_softmax_actions)
        action=np.argmax(softmax_action.data.numpy()[0])
        next_state,reward,done,_=test_task.step(action)
        next_state=np.delete(next_state,1)

        result+=reward
        state=next_state
        if done:
          break

    print("episode:",episode+1,"test result:",result/10.)
    steps.append(episode+1)
    test_results.append(result/10.)
    return episode+1,result/10.


构造 MLP 的 actor 和 critic 网络


class ActorNetworkMLP(nn.Module):# lstm + fc
  def __init__(self,in_size,hidden_size,out_size):
    super(ActorNetworkMLP,self).__init__()
    self.inlayer=nn.Linear(in_size,hidden_size)
    self.hidden=nn.Linear(hidden_size,hidden_size)
    self.out=nn.Linear(hidden_size,out_size)
    nn.init.kaiming_normal_(self.inlayer.weight.data)
    nn.init.kaiming_normal_(self.hidden.weight.data)
    nn.init.kaiming_normal_(self.out.weight.data)

  def forward(self,x):

    x=F.relu(self.inlayer(x))
    x=F.relu(self.hidden(x))
    return F.log_softmax(self.out(x),2)

class ValueNetworkMLP(nn.Module):
  def __init__(self,in_size,hidden_size,out_size):
    super(ValueNetworkMLP,self).__init__()
    self.inlayer=nn.Linear(in_size,hidden_size)
    self.hidden=nn.Linear(hidden_size,hidden_size)
    self.out=nn.Linear(hidden_size,out_size)
    nn.init.kaiming_normal_(self.inlayer.weight.data)
    nn.init.kaiming_normal_(self.hidden.weight.data)
    nn.init.kaiming_normal_(self.out.weight.data)
  def forward(self,x):
    x=F.relu(self.inlayer(x))
    x=F.relu(self.hidden(x))
    return self.out(x)
class A2CMLP():
  def __init__(self,env_name,ValueNetwork=ValueNetworkMLP,ActorNetwork=ActorNetworkMLP,STATE_DIM=4-1,ACTION_DIM=2,NUM_EPISODE=10000,EPISODE_LEN=500,A_HIDDEN=40,C_HIDDEN=40):
    self.env_name=env_name
    self.count=0
    self.steps=[]
    self.task_episodes=[]
    self.test_results=[]


    self.STATE_DIM=STATE_DIM
    self.ACTION_DIM=ACTION_DIM
    self.NUM_EPISODE=NUM_EPISODE
    self.EPISODE_LEN=EPISODE_LEN
    self.A_HIDDEN=A_HIDDEN
    self.C_HIDDEN=C_HIDDEN

    self.value_network=ValueNetwork(in_size=self.STATE_DIM,hidden_size=self.C_HIDDEN,out_size=1)
    self.value_network_optim=torch.optim.Adam(self.value_network.parameters(),lr=0.005)

    self.actor_network=ActorNetwork(in_size=self.STATE_DIM,hidden_size=self.A_HIDDEN,out_size=self.ACTION_DIM)
    self.actor_network_optim=torch.optim.Adam(self.actor_network.parameters(),lr=0.001)



  def discount_reward(self,r,gamma,final_r):
    discounted_r=np.zeros_like(r)
    running_add=final_r
    for t in reversed(range(0,len(r))):
      running_add=running_add*gamma+r[t]
      discounted_r[t]=running_add

    return discounted_r

  def roll_out(self,actor_network,env,episode_len,value_network,init_state):
    states=[]
    actions=[]
    rewards=[]
    os_done=False
    final_r=0

    state=init_state
    a_hx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
    a_cx=torch.zeros(self.A_HIDDEN).unsqueeze(0).unsqueeze(0)
    c_hx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)
    c_cx=torch.zeros(self.C_HIDDEN).unsqueeze(0).unsqueeze(0)

    for j in range(episode_len):
      states.append(state)
      log_softmax_action=actor_network(Variable(torch.Tensor([state]).unsqueeze(0)))

      softmax_action=torch.exp(log_softmax_action)
      # action=np.random.choice(self.ACTION_DIM,p=softmax_action.cpu().data.numpy()[0][0])
      action=torch.distributions.Categorical(softmax_action).sample().item()

      one_hot_action=[int(k==action) for k in range(self.ACTION_DIM)]
      # print(action)
      next_state,reward,done,_=env.step(action)
      # 删去一维状态,作为部分可观测问题
      next_state=np.delete(next_state,1)

      actions.append(one_hot_action)
      rewards.append(reward)
      final_state=next_state
      state=next_state
      if done:
        is_done=True
        state=env.reset()
        state=np.delete(state,1)
        # print(j+1)
        if episode_len==j+1:
          self.count+=1
        else:
          self.count=0

        break

    if not is_done:
      c_out=value_network(Variable(torch.Tensor([final_state])))
      final_r=c_out.cpu().data.numpy()

    return states,actions,rewards,final_r,state
  def train(self):
    env=gym.make(self.env_name)
    init_state=env.reset()
    init_state=np.delete(init_state,1)



    for episode in range(self.NUM_EPISODE):
      if self.count > 50 and self.actor_network.training and self.value_network.training :
        self.actor_network.eval()
        self.value_network.eval()
        print('evals')
      states,actions,rewards,final_r,current_state=self.roll_out(self.actor_network,env,self.EPISODE_LEN,self.value_network, init_state)

      init_state=current_state
      actions_var=Variable(torch.Tensor(actions).view(-1,self.ACTION_DIM)).unsqueeze(0)
      states_var=Variable(torch.Tensor(states).view(-1,self.STATE_DIM)).unsqueeze(0)


      if self.actor_network.training and self.value_network.training :
        self.actor_network_optim.zero_grad()
        log_softmax_actions= self.actor_network(states_var)
        vs=self.value_network(states_var)
        vs.detach()

        qs=Variable(torch.Tensor(self.discount_reward(rewards,0.99,final_r)))
        qs=qs.view(1,-1,1)
        advantages=qs-vs

        actor_network_loss=-torch.mean(torch.sum(log_softmax_actions*actions_var,1)*advantages)
        actor_network_loss.backward()
        torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
        self.actor_network_optim.step()

        self.value_network_optim.zero_grad()
        target_values=qs

        values=self.value_network(states_var)
        criterion=nn.MSELoss()
        value_network_loss=criterion(values,target_values)
        value_network_loss.backward(retain_graph=True )
        torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)
        self.value_network_optim.step()

      if (episode+1)%50==0:
        step,result=self.test(episode)
        self.steps.append(step)
        self.test_results.append(result)

  def test(self,episode):
    steps=[]
    test_results=[]
    result=0
    test_task=gym.make(self.env_name)
    for test_epi in range(10):
      state=test_task.reset()
      state=np.delete(state,1)


      for test_step in range(self.EPISODE_LEN):
        log_softmax_actions=self.actor_network(Variable(torch.Tensor([state]).view(1,1,3)))
        softmax_action=torch.exp(log_softmax_actions)
        action=np.argmax(softmax_action.data.numpy()[0])
        next_state,reward,done,_=test_task.step(action)
        next_state=np.delete(next_state,1)

        result+=reward
        state=next_state
        if done:
          break

    print("episode:",episode+1,"test result:",result/10.)
    steps.append(episode+1)
    test_results.append(result/10.)
    return episode+1,result/10.

# ac2mlp = A2CMLP('CartPole-v1')
# ac2mlp.train()
# plt.plot(ac2mlp.steps,ac2mlp.test_results)


# plt.show()

对比两种不同构造方法的效果

多次实验表明 mlp 相较于 lstm 构造的网络在本次实验中能够更快取得较好的效果

且收敛后继续训练会有较大的波动

ac2lstmagent = A2CLSTM('CartPole-v1')
ac2lstmagent.train()
plt.plot(ac2lstmagent.steps,ac2lstmagent.test_results,label='rl+lstm')

ac2mlp = A2CMLP('CartPole-v1')
ac2mlp.train()
plt.plot(ac2mlp.steps,ac2mlp.test_results,label='rl+mlp')

plt.legend()

plt.show()
12
12
11
35


<ipython-input-120-83055f2fe68b>:119: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-120-83055f2fe68b>:132: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


Streaming output truncated to the last 5000 lines.
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
467
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 5150 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
374
500
500
500
500
500
500
500
500
500
500
500
500
499
500
366
480
500
500
159
500
193
370
500
500
500
387
446
178
500
219
387
episode: 5200 test result: 500.0
196
393
203
500
500
500
500
360
479
485
500
500
489
425
500
493
464
500
398
500
500
500
500
500
500
500
500
493
500
500
500
500
500
500
357
500
500
408
337
384
500
500
500
500
498
380
500
475
464
340
episode: 5250 test result: 498.8
392
496
384
392
500
484
500
500
378
500
500
397
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
467
435
500
episode: 5300 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
332
500
500
500
500
499
500
445
390
500
500
500
500
248
500
500
382
189
219
204
184
210
195
153
189
150
135
167
136
153
132
146
episode: 5350 test result: 180.9
145
170
153
116
152
136
143
155
146
159
145
173
190
187
154
167
159
186
180
190
187
227
178
153
152
159
500
200
500
352
165
500
377
500
425
469
389
193
500
323
500
500
481
500
415
500
500
393
481
356
episode: 5400 test result: 500.0
371
337
403
500
500
500
381
445
500
500
378
500
500
347
388
500
500
261
500
365
500
485
500
422
464
421
500
473
500
392
414
466
291
359
440
312
431
499
276
378
393
437
294
415
500
470
422
481
438
500
episode: 5450 test result: 492.4
356
483
500
500
500
461
482
440
500
500
500
500
432
500
500
473
500
460
399
298
434
245
484
500
457
194
481
393
458
460
454
390
500
436
473
498
389
500
362
417
176
432
467
371
220
403
234
146
324
500
episode: 5500 test result: 380.0
269
131
136
108
187
195
500
371
500
347
257
242
288
46
156
228
296
272
331
352
314
139
370
500
401
173
289
365
315
500
500
56
361
309
315
287
500
500
327
419
183
500
320
329
148
332
138
415
315
309
episode: 5550 test result: 358.1
168
448
321
182
190
135
185
191
186
167
183
170
150
167
196
307
171
217
167
212
182
125
301
193
139
149
163
155
154
145
158
185
136
166
156
122
154
163
150
145
133
149
137
164
147
148
141
128
141
144
episode: 5600 test result: 146.9
144
133
135
138
140
126
134
128
149
127
144
146
148
159
131
163
130
172
131
121
207
179
170
308
338
338
145
170
141
152
136
180
328
331
313
310
337
304
321
195
181
231
339
385
184
362
298
306
177
338
episode: 5650 test result: 442.2
312
358
440
358
327
385
319
350
326
463
389
334
408
335
436
445
381
476
351
500
500
345
495
239
443
332
444
379
316
443
341
447
351
348
437
463
375
234
388
319
329
169
360
332
359
333
481
447
365
368
episode: 5700 test result: 460.1
376
322
498
328
359
448
335
445
148
314
467
352
336
351
192
376
441
315
356
325
330
359
184
232
226
349
205
382
157
459
346
338
348
322
320
397
349
500
378
339
341
387
345
340
314
380
156
360
195
492
episode: 5750 test result: 490.9
351
345
464
305
481
401
332
313
216
361
478
321
469
367
499
335
164
483
354
404
182
151
206
356
462
226
399
150
194
233
484
147
205
175
181
198
174
132
412
387
499
208
500
144
433
426
394
213
387
424
episode: 5800 test result: 500.0
435
382
190
362
500
402
500
359
139
231
500
500
364
413
409
357
441
500
500
500
500
248
194
500
209
500
385
172
500
391
467
186
500
199
379
498
500
485
500
500
500
401
417
500
399
500
413
500
465
500
episode: 5850 test result: 500.0
500
206
500
500
231
239
500
184
48
500
500
500
201
211
174
153
204
95
163
132
153
149
166
174
118
144
142
142
158
180
156
196
200
191
146
154
180
174
174
211
227
206
186
500
190
397
500
436
500
174
episode: 5900 test result: 500.0
500
500
500
500
500
500
500
500
500
494
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
441
500
500
245
500
500
500
245
271
500
210
148
500
272
226
201
223
episode: 5950 test result: 363.1
190
168
216
197
228
158
195
49
179
156
147
169
180
170
233
160
139
263
260
103
240
250
163
500
225
71
298
500
276
500
500
500
500
500
500
211
232
252
302
223
161
500
211
216
500
281
258
229
170
182
episode: 6000 test result: 371.4
138
180
173
198
167
181
178
186
173
159
162
159
151
161
154
149
162
168
158
154
188
166
156
162
198
215
242
205
262
164
500
500
244
500
244
500
500
226
500
227
248
500
500
500
500
500
500
500
500
500
episode: 6050 test result: 500.0
500
500
500
500
500
500
500
206
500
500
500
500
197
500
500
438
500
500
500
465
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
274
500
500
episode: 6100 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
245
500
500
500
500
500
500
500
500
500
500
244
500
500
193
246
195
198
197
190
196
201
199
188
192
176
162
184
162
158
176
157
147
179
163
160
134
135
episode: 6150 test result: 172.0
144
153
160
147
158
151
160
149
161
136
161
162
155
133
173
162
155
168
166
173
156
177
162
170
177
164
146
180
149
152
143
140
174
139
176
159
145
156
158
166
156
185
169
146
151
162
156
133
163
145
episode: 6200 test result: 176.1
146
142
152
176
163
154
153
161
140
178
172
190
179
204
152
232
500
500
500
227
500
500
213
500
500
500
500
186
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
441
500
500
500
episode: 6250 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6300 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6350 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6400 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6450 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
228
214
500
500
211
254
213
500
500
episode: 6500 test result: 500.0
211
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
468
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
213
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6550 test result: 500.0
419
500
500
500
500
500
500
500
398
500
500
500
500
500
407
500
500
449
500
500
500
500
415
448
200
388
404
438
437
406
204
172
500
218
165
200
422
200
188
192
182
176
163
193
165
404
172
167
188
413
episode: 6600 test result: 492.1
189
499
415
355
365
420
500
420
497
471
500
500
500
500
380
339
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
428
500
495
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6650 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
222
500
500
500
500
500
405
500
424
500
500
500
500
500
500
477
500
500
500
500
500
500
500
500
500
500
500
500
433
500
episode: 6700 test result: 500.0
500
430
500
500
500
500
500
500
500
408
500
500
500
500
500
163
500
166
179
179
177
173
186
184
500
192
175
164
173
148
160
159
155
156
144
146
142
157
153
155
160
163
160
160
157
179
161
159
129
155
episode: 6750 test result: 191.3
133
162
133
161
148
129
134
149
132
125
39
137
134
140
135
140
126
120
135
139
131
129
143
140
133
154
152
158
134
146
149
138
152
136
142
150
136
140
145
148
61
145
145
134
44
136
152
132
140
143
episode: 6800 test result: 163.9
144
143
142
136
138
147
157
147
154
160
155
157
152
152
146
155
142
186
137
173
153
157
166
173
175
164
209
141
152
204
195
173
163
168
200
186
500
160
500
195
500
500
500
415
500
500
500
498
383
500
episode: 6850 test result: 500.0
500
497
500
485
500
500
500
500
500
500
500
488
500
500
500
500
498
500
500
500
500
500
500
500
500
500
470
500
500
500
500
500
386
500
500
500
385
400
500
396
496
438
500
465
202
227
137
175
192
162
episode: 6900 test result: 500.0
169
164
217
154
168
138
173
166
149
167
170
149
128
170
155
146
164
161
168
142
179
39
146
156
132
155
35
144
149
138
146
146
162
173
180
161
54
135
147
195
176
153
157
142
169
145
165
156
157
162
episode: 6950 test result: 363.1
148
182
17
141
165
161
182
183
194
190
211
231
189
186
170
221
176
430
500
500
500
500
500
500
500
500
500
500
500
500
496
500
500
481
500
497
458
500
500
468
450
499
466
500
500
500
500
479
500
500
episode: 7000 test result: 500.0
500
500
500
482
500
500
500
500
500
500
500
496
500
500
500
500
500
500
500
500
485
416
500
500
500
500
399
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
246
484
403
500
210
500
183
235
episode: 7050 test result: 500.0
198
139
136
170
152
170
144
163
152
172
155
154
159
177
150
166
134
159
149
137
158
168
149
160
135
147
161
174
163
188
171
158
171
163
160
163
122
177
163
154
154
193
166
170
171
166
196
216
176
164
episode: 7100 test result: 500.0
171
220
208
181
500
500
500
500
211
500
216
440
500
500
261
500
500
500
500
500
231
221
236
152
212
200
209
208
184
211
193
196
226
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
237
episode: 7150 test result: 500.0
500
481
225
229
500
191
208
186
197
147
177
194
168
168
180
178
174
165
142
152
146
151
143
77
144
171
79
137
150
145
135
148
159
178
151
159
162
161
154
169
174
170
166
162
172
160
164
148
140
159
episode: 7200 test result: 214.0
158
182
164
181
185
152
176
158
171
189
152
145
154
171
141
146
154
170
138
178
159
159
159
161
152
153
180
156
149
175
159
173
140
158
152
150
150
176
150
143
166
140
147
151
155
134
136
144
150
146
episode: 7250 test result: 178.7
90
137
149
132
155
146
154
142
127
143
122
150
129
135
132
130
151
80
105
150
120
40
161
147
147
155
132
137
133
161
126
158
152
200
97
134
168
126
132
176
126
131
122
143
153
135
114
150
144
144
episode: 7300 test result: 201.3
167
137
142
43
134
159
123
143
132
143
156
132
145
134
148
48
139
139
64
148
134
141
142
148
84
159
165
151
144
140
147
177
156
146
134
143
154
168
145
89
152
146
171
210
208
166
220
226
204
284
episode: 7350 test result: 500.0
314
206
188
500
257
283
201
189
500
239
500
211
500
500
500
500
500
260
500
266
354
215
316
306
278
211
203
500
500
274
318
264
500
232
242
284
178
500
271
231
500
500
252
202
500
500
250
282
208
325
episode: 7400 test result: 500.0
500
500
500
240
500
325
500
500
500
500
265
500
500
500
245
500
500
442
500
172
323
304
189
241
500
180
201
345
156
188
140
176
178
43
171
173
181
222
164
161
197
162
158
189
200
247
171
183
176
240
episode: 7450 test result: 500.0
198
500
242
178
196
216
249
195
199
500
492
500
254
221
221
500
194
500
170
500
330
264
155
174
195
167
182
175
174
188
236
178
185
152
181
195
163
205
347
261
500
251
225
290
287
500
269
500
500
500
episode: 7500 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
240
276
182
216
215
227
236
227
170
176
173
182
168
160
151
158
178
159
169
158
160
169
159
146
149
157
168
153
148
211
161
159
147
172
174
170
181
158
episode: 7550 test result: 235.9
184
165
145
166
191
171
182
160
177
179
181
173
174
170
170
174
187
214
167
178
164
203
181
189
191
147
187
172
163
187
176
217
190
186
248
193
215
252
193
500
213
225
500
214
500
239
500
197
500
213
episode: 7600 test result: 500.0
235
198
211
279
500
500
500
500
500
500
500
500
500
500
500
500
204
500
218
500
500
247
500
500
208
500
500
223
500
243
219
204
193
224
194
229
217
500
206
224
214
185
170
155
157
184
179
201
186
190
episode: 7650 test result: 306.5
181
150
184
179
145
173
172
147
184
173
170
162
141
166
172
156
171
159
177
167
185
165
151
159
202
188
196
158
203
191
189
200
162
194
194
179
500
199
210
191
500
395
378
500
500
495
366
495
500
500
episode: 7700 test result: 500.0
412
484
350
358
358
411
474
500
500
355
500
389
416
500
481
398
408
500
500
485
500
500
425
500
428
500
500
500
500
500
500
500
500
500
500
498
500
189
500
500
500
500
500
222
500
200
240
166
167
152
episode: 7750 test result: 500.0
214
157
182
168
148
176
147
166
137
141
169
144
156
166
163
142
133
145
170
143
141
166
167
182
177
188
204
500
173
500
500
500
500
500
245
500
212
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 7800 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
241
500
230
500
500
500
500
205
256
500
episode: 7850 test result: 500.0
500
500
500
207
153
307
500
192
191
188
500
500
500
218
500
500
500
500
500
500
500
500
500
500
500
500
463
500
500
500
500
187
225
260
500
500
196
417
145
202
175
193
162
154
209
169
167
139
153
190
episode: 7900 test result: 474.8
187
152
147
82
151
143
168
182
165
166
197
174
231
189
177
500
222
500
215
198
247
154
168
186
203
53
195
500
88
500
203
177
500
227
168
196
202
196
500
180
154
159
187
281
153
194
500
200
186
193
episode: 7950 test result: 500.0
225
151
160
164
181
168
170
166
184
161
190
148
191
161
184
180
176
194
208
210
183
166
144
148
500
149
182
159
177
214
163
191
197
173
191
500
187
500
180
467
500
500
500
500
179
239
500
500
500
500
episode: 8000 test result: 500.0
500
500
500
500
208
500
500
500
500
500
187
190
500
500
500
500
232
196
180
500
161
201
197
197
500
500
187
500
219
164
225
177
186
500
500
500
209
188
213
500
171
194
228
230
500
192
170
190
201
187
episode: 8050 test result: 500.0
420
186
500
500
166
500
185
203
190
500
198
184
500
150
230
197
166
231
166
500
191
179
198
500
500
183
167
500
192
500
500
195
190
201
158
177
180
167
191
161
166
155
155
172
192
166
155
186
205
500
episode: 8100 test result: 500.0
500
212
500
500
472
213
215
500
500
161
500
500
214
500
500
500
500
499
428
500
195
227
500
500
500
500
500
444
500
478
405
500
500
500
437
500
482
500
443
500
500
500
500
381
500
500
500
497
500
467
episode: 8150 test result: 500.0
494
369
500
477
486
480
490
500
473
500
500
380
500
500
333
471
481
500
456
478
500
472
500
468
492
405
481
487
475
497
500
347
500
388
455
483
500
460
450
443
467
480
489
443
468
445
334
447
463
429
episode: 8200 test result: 495.7
487
467
382
370
408
418
500
374
211
207
181
184
187
185
170
146
164
133
168
180
136
153
154
174
163
137
139
154
165
140
160
152
141
149
151
152
148
132
141
164
149
150
158
182
161
176
194
175
230
500
episode: 8250 test result: 500.0
225
206
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
495
500
500
500
500
500
500
500
500
499
500
500
500
500
500
500
500
500
500
477
500
500
500
500
500
500
500
episode: 8300 test result: 500.0
500
500
500
500
500
177
500
500
500
183
165
190
183
208
180
171
190
206
166
167
176
191
166
160
147
162
155
163
154
165
169
144
140
158
150
155
131
179
170
150
171
164
163
166
165
147
138
173
149
173
episode: 8350 test result: 190.1
148
149
163
148
145
150
166
153
142
132
150
142
170
151
161
154
163
169
175
145
171
143
151
141
153
158
174
171
147
164
165
162
179
180
175
153
160
179
184
158
195
145
179
188
217
203
204
234
500
233
episode: 8400 test result: 500.0
197
220
500
205
500
230
203
213
186
173
186
177
207
178
213
209
190
181
204
181
169
165
168
174
175
155
160
174
156
170
168
167
168
169
152
145
173
144
162
156
165
149
136
144
150
158
145
151
173
136
episode: 8450 test result: 194.4
162
142
156
159
145
136
178
167
165
166
147
154
174
172
169
192
160
160
173
153
182
152
165
190
172
137
188
155
163
180
174
170
158
180
155
185
165
156
175
186
181
174
187
207
193
193
200
182
174
197
episode: 8500 test result: 431.6
221
198
212
199
178
244
500
500
247
207
233
220
241
201
225
202
227
216
222
209
210
230
241
206
213
256
215
243
208
220
234
247
242
219
261
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 8550 test result: 500.0
500
279
234
255
232
190
181
207
201
180
171
183
192
168
190
169
162
143
170
140
167
157
168
152
177
149
148
157
175
168
168
168
157
152
144
169
144
162
146
165
164
179
149
182
165
179
170
166
151
165
episode: 8600 test result: 167.6
161
165
164
142
168
162
155
168
159
154
159
148
176
178
189
145
161
179
157
166
167
203
139
162
176
168
165
173
168
176
159
160
187
173
164
146
168
149
151
157
153
154
172
168
172
153
143
151
152
158
episode: 8650 test result: 176.4
164
158
183
169
150
166
165
178
193
174
181
182
177
164
188
194
194
217
174
157
162
182
201
186
161
197
180
206
186
168
220
168
196
198
185
147
213
158
206
180
178
175
183
167
192
171
178
204
208
160
episode: 8700 test result: 184.9
186
191
151
148
194
148
157
161
181
172
184
184
169
183
185
157
174
193
155
177
178
184
234
207
144
150
172
172
139
167
188
183
146
207
202
191
198
173
180
207
205
173
188
176
172
188
186
190
228
162
episode: 8750 test result: 212.4
165
218
174
171
180
214
178
227
171
220
232
167
173
208
236
264
224
246
210
242
211
339
239
208
295
197
286
295
278
199
305
207
246
300
313
306
235
221
279
329
500
265
500
500
299
437
412
500
294
297
episode: 8800 test result: 492.0
500
500
477
441
500
372
500
402
500
500
426
340
428
409
459
500
328
500
298
307
422
335
223
385
229
210
214
275
247
368
234
308
387
262
294
273
304
209
500
390
360
352
367
314
314
282
281
489
383
341
episode: 8850 test result: 490.0
192
260
215
245
349
258
394
331
338
406
248
301
450
417
483
462
327
445
500
500
306
500
347
335
278
310
380
279
326
247
251
355
263
245
378
238
251
194
241
203
227
223
208
184
172
217
227
258
241
177
episode: 8900 test result: 286.9
248
231
191
168
220
235
192
238
251
199
223
290
193
211
250
243
269
259
316
276
244
359
237
296
266
288
389
254
299
456
358
373
380
491
291
448
388
451
449
361
500
479
500
500
500
500
500
500
500
500
episode: 8950 test result: 500.0
500
400
361
500
445
500
371
397
478
446
462
500
452
433
351
392
403
313
223
339
289
399
482
282
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
431
500
500
500
500
500
500
episode: 9000 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
339
500
396
500
500
500
357
318
371
317
347
500
285
214
287
426
284
267
500
377
266
500
345
500
500
500
286
500
500
500
500
500
500
500
500
500
500
episode: 9050 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
459
500
500
500
500
500
469
471
451
500
380
408
500
500
478
402
393
500
349
471
500
410
270
500
500
382
403
269
258
288
489
394
248
156
episode: 9100 test result: 469.2
343
399
389
271
253
232
295
369
265
249
369
322
297
493
461
409
470
401
385
391
446
411
301
249
206
278
293
288
336
256
279
192
314
159
408
270
186
206
158
263
166
133
346
155
275
142
204
155
220
235
episode: 9150 test result: 299.9
351
179
164
318
371
304
315
283
300
271
362
500
315
490
395
357
363
500
298
373
500
358
442
500
500
439
500
413
467
490
407
500
345
262
362
500
493
351
238
355
500
385
337
308
357
336
386
490
334
358
episode: 9200 test result: 493.4
492
390
399
395
500
370
359
350
500
500
312
472
469
351
253
500
233
500
406
500
500
500
475
500
500
500
440
500
473
500
500
500
500
500
500
500
500
500
500
500
500
500
500
465
458
500
368
500
473
500
episode: 9250 test result: 500.0
500
500
500
500
500
500
500
500
498
500
471
500
500
411
500
383
474
500
472
414
488
500
354
343
333
489
393
488
500
364
497
488
500
481
500
500
500
500
499
500
500
500
450
404
500
500
500
500
500
448
episode: 9300 test result: 498.7
177
494
500
500
500
500
500
500
200
163
176
164
138
151
145
140
149
157
138
152
130
135
132
144
138
147
140
153
157
163
143
163
146
141
150
169
150
168
145
175
155
167
196
182
184
186
185
205
222
500
episode: 9350 test result: 500.0
205
207
219
192
158
222
207
189
186
207
500
500
180
500
500
500
500
500
500
500
227
215
500
500
217
500
500
500
500
231
191
500
500
235
219
500
500
244
500
216
245
238
164
222
238
181
500
210
208
500
episode: 9400 test result: 500.0
231
213
193
225
189
221
175
196
191
196
174
195
199
164
178
201
184
193
167
161
200
205
187
209
187
181
180
204
211
191
205
179
192
201
185
204
182
212
188
195
181
184
170
179
161
189
183
164
166
166
episode: 9450 test result: 183.4
162
153
164
162
172
169
159
157
159
155
159
148
138
142
160
159
156
167
148
147
143
157
154
157
156
146
153
138
144
146
163
163
149
160
159
169
164
165
159
141
160
140
147
158
145
127
148
139
136
152
episode: 9500 test result: 158.0
162
156
148
165
170
154
150
165
165
148
147
144
130
150
159
168
180
137
165
182
161
160
169
160
143
143
143
153
151
160
168
156
178
167
148
159
165
159
158
161
172
166
175
142
168
165
154
168
149
179
episode: 9550 test result: 171.2
166
149
172
155
164
172
186
175
182
149
178
175
157
164
174
147
196
157
148
183
182
176
162
178
176
178
189
175
180
205
184
186
183
202
204
185
190
205
221
195
206
231
226
203
196
185
201
192
196
186
episode: 9600 test result: 225.9
192
175
202
218
190
208
182
213
230
219
208
194
214
203
198
191
204
178
191
184
172
197
174
170
178
183
185
162
182
184
163
170
174
177
162
159
140
169
157
163
153
154
148
154
148
164
143
158
147
154
episode: 9650 test result: 162.3
150
166
143
161
157
142
141
155
135
148
147
138
128
139
123
127
143
138
137
124
129
133
112
118
117
124
122
112
102
122
96
111
95
94
101
115
113
120
127
125
120
116
120
126
120
113
112
117
133
124
episode: 9700 test result: 129.3
120
116
129
125
104
125
129
104
122
123
101
96
95
89
112
73
100
104
95
98
114
105
95
99
94
117
107
83
97
79
63
72
79
78
78
79
94
86
87
89
99
92
102
81
110
119
100
101
106
99
episode: 9750 test result: 126.4
98
130
120
125
125
108
117
126
110
118
120
124
122
140
128
142
128
136
137
150
165
134
166
148
166
157
184
167
158
166
188
177
162
175
162
168
158
203
158
151
150
169
177
184
185
160
187
194
165
168
episode: 9800 test result: 201.7
189
162
218
200
178
178
209
257
172
232
186
187
230
214
205
202
274
202
229
232
195
190
185
188
225
164
235
184
213
207
176
198
175
181
172
219
166
196
166
184
155
177
176
163
198
193
159
156
157
177
episode: 9850 test result: 190.5
157
195
174
149
148
170
167
190
178
159
150
181
193
173
192
213
194
169
201
168
183
196
224
175
162
175
177
180
172
193
183
195
197
165
182
176
175
186
210
172
206
188
184
186
248
214
251
219
203
205
episode: 9900 test result: 243.1
213
285
207
219
192
196
223
210
258
229
232
232
254
244
305
337
347
325
258
382
371
331
469
390
307
308
298
332
428
378
457
440
418
425
465
500
473
375
364
325
342
392
500
500
500
352
500
500
500
500
episode: 9950 test result: 500.0
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
370
500
500
500
500
500
500
500
500
500
500
492
500
500
500
500
338
448
500
500
427
500
500
500
500
500
258
500
500
500
472
500
388
episode: 10000 test result: 500.0
10
11
9
24
13
16
19
11
10
11
9
9
15


<ipython-input-121-d33184fd7dfc>:142: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-121-d33184fd7dfc>:152: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


Streaming output truncated to the last 5000 lines.
500
episode: 5100 test result: 485.0
455
441
467
500
402
500
500
500
397
500
500
452
311
357
347
500
289
357
500
500
465
500
429
500
301
500
407
500
368
353
500
500
408
374
500
500
372
356
500
500
500
500
500
500
312
424
298
500
500
500
episode: 5150 test result: 491.7
428
500
500
500
287
500
301
305
475
500
399
447
500
474
349
461
500
445
453
500
500
500
479
500
500
473
479
500
456
185
500
500
416
164
500
214
218
500
500
500
484
500
500
500
500
475
500
251
500
500
episode: 5200 test result: 220.3
500
500
182
163
170
206
216
500
500
235
500
500
178
476
500
468
492
500
500
500
473
408
475
485
481
342
467
426
500
346
500
494
375
389
500
398
380
400
500
419
500
388
497
368
349
441
330
500
500
461
episode: 5250 test result: 482.7
500
320
500
346
500
449
400
500
401
415
500
500
478
446
467
500
500
450
455
428
492
431
199
500
441
500
450
389
391
500
406
396
500
500
441
356
500
458
500
404
356
346
342
500
368
360
368
374
500
463
episode: 5300 test result: 416.7
456
326
429
396
347
500
500
353
333
365
497
386
370
500
311
314
500
365
381
500
397
398
335
350
361
392
380
346
344
367
500
500
500
427
375
371
379
349
500
500
408
326
353
376
378
403
394
424
325
375
episode: 5350 test result: 486.0
389
315
390
331
288
500
350
350
421
361
500
392
500
337
457
366
479
290
485
326
387
402
421
266
500
318
406
365
404
363
500
367
347
371
500
360
500
500
383
500
500
302
500
430
312
378
500
327
500
378
episode: 5400 test result: 494.7
445
500
500
333
399
500
411
390
500
500
448
334
500
302
381
306
382
442
500
416
340
500
375
500
351
455
468
417
352
500
500
343
491
500
500
500
412
318
475
349
492
500
439
480
470
414
500
344
428
290
episode: 5450 test result: 457.8
417
500
358
500
364
447
356
392
500
389
500
310
283
270
388
431
500
293
363
346
500
454
500
475
500
354
500
406
367
436
398
258
500
465
399
500
396
495
401
500
343
394
500
298
318
327
396
369
384
402
episode: 5500 test result: 455.7
402
370
375
500
414
355
371
386
331
500
500
389
370
500
500
500
400
349
441
500
408
291
406
500
500
453
500
368
284
302
500
316
466
396
326
500
203
209
500
500
318
430
254
279
493
323
272
500
341
358
episode: 5550 test result: 431.6
218
348
242
500
371
220
364
433
220
257
316
284
343
298
331
500
164
215
345
362
366
165
309
188
500
350
352
335
219
304
289
390
293
363
246
500
500
500
313
262
346
308
500
431
285
500
201
238
193
293
episode: 5600 test result: 471.8
415
209
297
366
500
198
237
245
249
206
215
233
392
216
274
253
230
239
311
264
238
500
249
237
351
383
365
256
229
297
489
339
344
263
328
328
315
339
284
373
388
439
332
306
393
329
221
418
366
320
episode: 5650 test result: 487.4
303
500
493
275
248
500
298
262
262
500
368
398
225
289
301
281
216
304
265
264
214
272
500
192
500
244
500
500
259
445
255
295
221
387
267
222
334
500
372
263
208
275
244
216
229
252
269
188
297
255
episode: 5700 test result: 454.2
269
232
286
300
500
261
274
287
335
265
273
273
251
281
182
274
313
327
246
223
231
243
289
219
264
234
257
174
177
269
165
200
295
238
207
213
223
251
288
275
225
219
274
295
275
294
292
213
229
297
episode: 5750 test result: 426.3
217
203
500
500
389
221
252
340
290
311
384
500
500
500
500
452
404
404
314
500
500
426
500
328
462
500
500
500
500
414
389
500
500
475
500
500
433
500
500
449
500
500
500
306
348
422
423
368
500
327
episode: 5800 test result: 481.2
447
369
500
500
388
500
500
372
430
381
344
500
302
500
314
432
351
500
423
339
425
391
496
500
420
500
330
348
394
349
500
500
500
345
345
500
500
276
354
327
394
449
345
500
370
500
318
500
314
411
episode: 5850 test result: 483.3
335
500
430
500
363
440
342
500
332
500
478
427
469
297
447
469
411
423
469
485
407
397
413
365
450
352
413
423
363
283
371
392
500
448
413
500
326
500
474
349
500
500
371
288
339
396
497
433
400
334
episode: 5900 test result: 461.9
295
500
500
401
418
432
500
500
446
334
373
331
384
441
492
500
396
465
500
388
328
459
500
432
345
500
343
424
293
339
500
500
352
500
500
352
294
500
363
363
500
423
497
346
358
392
499
500
500
500
episode: 5950 test result: 487.7
492
421
405
449
484
500
405
500
500
462
374
500
440
399
481
500
500
438
406
500
500
472
488
382
440
500
494
500
475
500
467
500
500
492
465
500
499
418
409
500
384
500
485
500
384
187
466
500
500
421
episode: 6000 test result: 229.4
500
500
486
178
500
199
500
500
500
500
500
500
500
379
500
500
378
413
500
412
500
500
172
500
500
500
500
500
500
336
427
420
469
461
500
339
326
372
500
394
368
500
500
500
383
500
188
500
176
500
episode: 6050 test result: 469.2
500
500
388
500
442
488
419
500
452
500
500
500
162
500
500
500
500
401
500
420
329
500
387
389
500
376
500
331
374
487
489
500
381
413
500
407
500
326
500
500
500
391
444
388
310
500
478
358
326
348
episode: 6100 test result: 468.9
500
461
411
500
500
473
419
500
378
167
368
350
385
170
500
425
500
156
449
437
465
500
439
500
500
500
433
500
500
485
500
500
500
369
377
500
387
409
180
396
500
500
490
475
500
403
191
392
500
500
episode: 6150 test result: 434.3
417
500
429
500
500
187
188
500
500
173
184
500
500
169
177
179
162
155
500
186
174
208
500
401
172
500
500
371
500
500
418
500
484
482
500
375
500
401
500
500
349
371
353
500
500
500
500
500
480
500
episode: 6200 test result: 483.2
363
414
500
357
500
500
367
500
500
500
376
393
500
500
470
471
496
500
500
500
489
500
500
349
391
470
453
476
500
373
500
500
500
481
442
500
500
378
500
351
500
475
475
186
500
165
500
500
173
190
episode: 6250 test result: 438.5
500
500
406
184
500
500
500
212
176
162
162
193
500
170
154
500
216
500
500
445
500
500
185
500
500
500
500
500
500
500
500
479
392
500
417
500
406
500
500
500
403
500
500
500
500
500
500
456
479
415
episode: 6300 test result: 499.8
500
389
360
380
500
472
500
500
500
500
438
470
500
484
433
342
500
500
500
500
353
500
500
500
500
500
348
500
485
500
500
442
360
412
475
480
454
456
486
500
500
500
500
446
481
483
500
500
492
438
episode: 6350 test result: 484.2
482
500
491
433
446
500
433
453
499
472
447
500
428
500
444
473
500
461
413
468
500
455
484
361
452
459
481
493
455
500
475
486
466
465
480
493
500
184
165
198
147
156
172
169
500
165
167
149
147
164
episode: 6400 test result: 166.7
145
140
148
500
500
500
494
176
500
159
500
500
500
500
500
497
500
499
500
485
464
500
470
500
470
498
160
496
500
500
500
181
194
179
164
172
146
157
146
145
136
146
135
154
147
141
152
128
127
138
episode: 6450 test result: 147.4
134
144
132
137
145
138
144
131
130
124
126
129
128
133
123
123
136
121
129
136
125
115
105
125
133
123
122
113
124
110
115
112
115
114
108
110
115
124
117
112
105
117
110
111
114
109
110
114
112
106
episode: 6500 test result: 108.8
99
117
71
106
103
99
108
112
110
101
98
101
97
103
115
108
95
98
97
86
106
48
38
82
56
58
107
55
104
104
103
63
96
60
107
102
109
108
105
103
103
105
101
108
98
101
48
106
114
113
episode: 6550 test result: 107.7
107
107
103
56
112
104
105
101
111
105
114
112
111
113
103
108
114
105
120
104
98
115
116
110
120
114
111
118
117
114
118
108
124
116
111
116
113
117
117
105
111
116
111
123
115
129
119
124
110
120
episode: 6600 test result: 119.1
115
120
112
112
113
123
127
109
117
123
126
115
112
105
113
113
126
108
128
112
117
120
115
122
113
115
128
116
130
122
122
129
125
114
128
128
115
128
121
124
131
123
132
110
129
113
118
111
116
111
episode: 6650 test result: 125.3
122
133
145
114
119
129
128
118
133
124
123
133
121
129
120
139
139
142
136
148
125
128
136
123
131
122
144
129
138
136
140
138
115
137
122
127
135
132
123
136
133
137
122
139
143
154
153
149
138
159
episode: 6700 test result: 143.6
121
131
125
126
136
158
137
147
159
175
164
165
153
150
153
155
156
163
140
165
166
156
152
164
142
166
152
150
181
169
165
171
173
151
172
147
152
152
179
156
164
169
147
156
160
172
191
158
195
194
episode: 6750 test result: 188.3
156
166
196
182
178
171
182
172
188
203
178
217
205
175
186
227
212
200
212
185
284
210
318
233
255
361
320
500
500
248
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
episode: 6800 test result: 500.0
500
500
500
500
500
500
474
500
443
500
451
500
500
500
500
500
500
500
479
500
500
338
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
471
500
500
500
500
500
418
500
306
500
258
278
episode: 6850 test result: 466.9
300
192
276
253
268
291
265
286
251
292
351
254
259
281
283
237
270
500
397
306
500
500
319
403
500
500
386
339
500
334
283
271
288
389
339
500
311
277
251
230
376
500
270
328
310
328
234
318
313
500
episode: 6900 test result: 474.2
330
312
251
238
272
244
295
368
320
420
309
374
310
251
408
242
249
221
361
253
271
337
314
312
323
303
475
275
300
374
284
440
345
302
311
268
312
296
261
232
285
314
263
214
224
202
368
335
304
214
episode: 6950 test result: 436.7
373
215
303
237
295
225
280
225
369
331
275
307
362
276
295
379
256
500
421
500
500
298
500
500
500
313
500
500
460
343
310
500
500
500
250
276
307
425
500
283
359
500
405
195
255
324
319
331
216
496
episode: 7000 test result: 447.0
342
322
296
500
333
500
303
281
399
500
500
330
500
441
215
264
321
331
500
500
402
264
407
271
339
296
223
228
346
356
286
500
500
224
500
500
500
308
241
345
500
248
253
264
500
245
289
269
289
316
episode: 7050 test result: 468.2
424
448
287
296
338
394
500
471
500
500
500
500
500
305
500
500
500
354
500
500
500
382
500
263
500
500
448
500
343
500
308
500
204
325
500
219
437
500
324
500
432
303
203
291
306
193
279
258
222
499
episode: 7100 test result: 494.9
291
337
447
318
349
266
318
251
500
500
369
454
377
284
339
500
500
351
500
381
500
478
500
500
500
500
500
500
500
500
289
326
397
275
346
328
391
328
280
422
262
239
306
342
240
338
283
229
216
218
episode: 7150 test result: 422.9
225
213
237
234
281
287
239
338
238
234
212
284
235
336
253
306
369
238
337
304
500
236
500
500
368
367
334
368
429
232
500
500
500
330
396
329
500
473
267
249
359
227
335
500
500
500
450
276
500
500
episode: 7200 test result: 487.9
500
274
500
500
500
500
470
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
500
461
500
483
471
500
500
500
466
500
500
450
500
500
500
294
410
326
500
438
437
409
379
500
451
500
416
500
episode: 7250 test result: 500.0
429
376
500
447
433
483
419
500
381
500
497
469
483
410
432
459
500
436
500
408
500
311
472
455
391
435
500
500
500
500
456
444
463
490
471
498
474
500
500
410
500
500
420
500
411
224
500
170
500
500
episode: 7300 test result: 224.3
210
231
500
500
147
197
184
206
255
500
268
177
500
500
500
500
500
500
500
500
488
500
500
484
500
390
500
448
186
494
495
486
476
464
500
500
500
500
500
500
500
498
495
483
500
495
489
467
494
489
episode: 7350 test result: 500.0
500
492
445
460
500
495
481
462
479
481
500
440
336
320
315
443
465
485
388
459
475
460
485
490
500
485
500
484
500
500
166
188
500
500
173
184
253
180
149
173
216
167
154
161
162
172
140
172
169
179
episode: 7400 test result: 196.0
155
161
177
165
188
158
169
184
194
175
187
186
174
208
179
207
176
159
206
197
500
222
500
500
500
500
500
500
500
500
500
466
500
431
490
500
500
486
490
494
455
407
478
500
499
499
495
459
460
500
episode: 7450 test result: 391.4
397
474
335
478
390
477
464
466
374
456
500
388
476
392
191
479
500
355
161
432
462
497
369
477
432
475
377
480
481
201
491
465
474
380
500
462
363
492
343
481
448
323
474
429
345
500
410
454
494
457
episode: 7500 test result: 474.0
380
493
443
437
500
489
476
495
500
470
417
477
462
494
500
469
500
500
481
500
500
491
500
500
163
500
238
174
186
190
198
500
179
210
211
500
222
232
500
500
500
183
500
500
500
500
500
500
500
500
episode: 7550 test result: 454.4
500
500
497
500
473
500
500
496
500
479
482
500
461
471
492
496
454
500
500
497
471
468
487
500
462
464
476
355
500
334
500
466
466
441
445
497
479
467
463
482
360
349
160
173
500
182
487
190
368
184
episode: 7600 test result: 265.1
500
500
500
342
486
475
484
486
494
460
481
495
475
480
492
341
430
443
439
421
450
473
317
462
445
475
431
463
426
480
446
336
443
475
461
463
433
438
330
438
438
493
459
460
310
469
480
488
457
466
episode: 7650 test result: 388.9
468
464
340
479
341
447
405
453
302
169
317
314
434
439
442
455
425
425
306
423
322
305
321
336
392
277
311
317
278
327
288
269
266
298
289
292
293
280
280
308
287
152
412
292
292
331
269
276
184
295
episode: 7700 test result: 257.9
169
339
148
155
146
439
153
315
155
284
162
163
305
146
149
328
340
148
312
145
162
337
312
155
435
326
176
313
158
162
351
365
332
444
278
302
450
354
316
315
319
303
433
327
378
335
419
441
434
433
episode: 7750 test result: 447.8
419
429
449
416
423
422
307
427
441
349
418
426
435
437
421
370
392
411
425
405
411
426
437
442
418
437
427
395
468
434
455
464
416
474
463
459
453
446
424
445
474
408
466
442
487
500
500
473
500
443
episode: 7800 test result: 489.7
473
468
483
322
476
493
472
500
500
318
499
500
480
500
500
365
174
500
500
500
500
179
500
185
423
500
500
500
500
500
500
181
500
500
500
500
151
190
204
192
215
188
227
500
500
223
188
200
198
229
episode: 7850 test result: 252.8
500
500
500
500
500
209
222
500
500
500
500
190
500
500
500
500
500
500
500
500
500
500
500
495
500
500
449
500
452
488
500
495
478
500
500
487
500
500
499
500
461
500
364
500
453
468
494
450
420
480
episode: 7900 test result: 474.4
500
500
500
485
477
456
472
474
481
500
490
500
160
199
163
500
169
500
500
202
500
500
159
500
191
500
500
493
214
500
500
500
500
167
200
500
183
500
184
500
190
500
500
435
500
500
500
500
207
500
episode: 7950 test result: 195.3
164
193
500
243
180
500
179
183
157
173
172
195
500
500
500
500
500
203
500
453
394
500
500
450
500
500
500
500
500
500
500
500
500
467
500
497
470
478
500
500
500
357
320
493
486
456
332
500
500
169
episode: 8000 test result: 313.8
195
182
472
171
319
488
360
470
500
482
344
500
487
500
486
360
500
350
495
500
500
500
500
347
460
482
500
329
346
331
500
500
500
361
500
339
493
500
313
356
499
477
500
487
299
476
500
345
492
441
episode: 8050 test result: 475.8
475
329
475
492
293
432
320
306
446
451
435
445
314
329
302
477
470
436
442
420
333
454
474
437
443
466
286
500
460
338
315
419
308
466
450
440
441
458
327
331
162
158
453
465
452
479
154
184
177
164
episode: 8100 test result: 329.3
148
461
460
495
498
485
470
496
481
485
489
458
365
167
450
392
469
368
459
352
320
333
449
412
318
434
424
270
449
403
288
421
415
399
408
408
413
421
410
411
426
410
399
406
409
426
385
398
390
383
episode: 8150 test result: 409.2
384
399
394
243
368
365
373
416
392
385
360
367
397
393
385
371
386
243
397
256
374
362
247
277
272
279
280
373
347
257
242
274
295
357
377
264
370
389
381
363
345
270
368
377
265
370
372
376
356
383
episode: 8200 test result: 396.0
351
373
373
355
362
380
371
354
369
364
370
265
255
386
253
267
393
386
379
403
396
311
294
203
313
429
415
309
420
293
416
301
431
412
394
313
426
444
413
425
430
414
296
406
359
424
422
283
408
414
episode: 8250 test result: 423.8
435
423
401
412
389
426
433
288
421
400
424
433
435
404
434
417
404
414
393
403
317
428
179
305
422
290
331
197
424
187
182
425
294
161
448
136
158
139
317
447
322
472
469
476
163
474
153
186
158
160
episode: 8300 test result: 198.7
154
159
154
172
153
186
154
160
148
158
163
148
184
159
173
166
157
492
163
163
500
160
467
182
464
161
461
343
475
478
464
452
353
411
460
328
315
171
314
319
455
463
460
336
439
499
460
461
467
457
episode: 8350 test result: 462.7
444
326
339
500
442
171
350
329
440
500
351
491
499
465
485
476
475
484
448
485
342
458
431
500
458
485
314
491
483
500
478
500
500
295
491
466
500
500
500
490
340
316
344
500
496
321
493
321
385
320
episode: 8400 test result: 429.8
493
392
483
472
316
500
338
492
313
330
181
328
323
323
471
333
465
478
470
452
456
478
159
463
158
371
162
476
496
165
467
160
165
477
176
154
473
470
463
461
445
467
168
167
466
457
418
342
461
437
episode: 8450 test result: 448.5
338
369
306
433
157
424
405
442
419
304
419
408
437
195
391
407
321
398
382
400
332
418
417
403
406
392
378
415
377
416
379
402
401
398
385
417
421
403
409
394
416
388
402
276
381
401
405
397
399
373
episode: 8500 test result: 404.2
412
420
409
388
383
389
391
285
281
402
262
148
156
386
279
394
166
388
381
393
269
409
393
159
167
395
414
389
397
154
395
400
383
402
409
430
392
400
276
411
281
261
421
418
259
428
408
308
427
410
episode: 8550 test result: 422.3
402
408
399
408
402
409
432
424
381
415
414
403
428
309
398
150
374
388
391
390
399
376
313
416
436
281
387
397
415
168
410
413
171
397
429
420
397
415
407
413
406
308
423
434
437
423
420
425
438
283
episode: 8600 test result: 438.2
415
436
427
304
429
257
286
405
428
426
293
277
413
308
414
288
414
443
416
414
376
405
402
411
420
403
430
426
281
412
299
321
334
164
290
396
406
278
291
299
377
396
382
271
282
311
396
311
270
416
episode: 8650 test result: 409.9
286
405
293
297
293
291
313
433
407
277
303
428
422
432
430
444
453
421
429
438
451
428
443
448
458
437
458
461
448
440
460
461
434
440
462
450
458
474
474
494
447
468
489
480
141
160
187
182
172
160
episode: 8700 test result: 241.0
486
165
490
488
480
488
459
466
473
463
475
484
346
453
478
467
474
469
471
460
461
462
162
472
453
479
474
188
479
165
178
454
163
476
477
481
478
480
345
349
487
344
463
462
439
436
466
438
473
474
episode: 8750 test result: 470.2
447
458
328
421
428
307
467
438
476
460
447
455
445
461
437
469
479
298
491
446
450
432
434
457
465
455
424
439
472
461
445
457
465
446
442
460
486
468
453
452
425
471
467
471
471
451
456
469
460
326
episode: 8800 test result: 453.2
433
484
477
449
453
463
338
468
197
432
446
459
302
157
325
467
450
467
476
335
484
457
466
150
493
169
478
159
152
169
176
154
151
153
166
153
147
138
142
139
142
146
139
136
145
140
136
120
139
140
episode: 8850 test result: 134.3
144
139
122
127
130
133
132
137
122
129
124
123
118
131
125
123
118
118
116
118
122
122
117
122
114
130
125
122
127
115
107
104
112
111
107
109
115
109
105
103
106
103
104
110
105
103
101
102
108
101
episode: 8900 test result: 105.2
111
102
113
104
105
102
103
101
102
101
99
102
99
99
105
100
100
101
103
98
101
97
102
97
106
47
101
97
106
98
37
95
96
103
93
97
98
101
96
40
100
45
43
93
99
96
35
101
100
101
episode: 8950 test result: 99.2
31
95
33
99
94
97
96
98
96
97
100
94
93
99
40
96
95
96
100
96
96
92
94
94
35
92
99
99
96
93
36
88
31
100
36
49
86
96
84
93
30
97
100
95
96
97
100
95
95
97
episode: 9000 test result: 99.7
95
95
96
94
98
96
95
96
92
93
92
31
92
18
96
99
34
98
95
98
99
97
99
100
99
90
93
94
95
98
100
92
93
93
96
93
92
97
28
92
94
90
25
43
89
92
90
28
98
36
episode: 9050 test result: 97.5
86
36
37
43
98
93
106
95
19
96
100
96
103
106
98
93
99
100
99
97
99
98
102
99
103
98
98
100
107
104
105
101
102
98
106
101
101
98
93
102
95
98
96
103
99
103
102
93
32
93
episode: 9100 test result: 102.7
100
94
104
95
97
102
95
96
99
108
102
101
98
103
106
98
103
105
99
100
104
105
92
96
91
98
94
98
95
101
99
98
104
101
96
94
101
98
96
97
101
97
100
105
103
104
98
99
97
104
episode: 9150 test result: 100.4
106
104
97
97
100
100
100
97
102
94
100
103
96
93
92
94
99
100
97
96
101
100
95
94
101
96
103
97
102
95
95
96
95
94
105
97
99
100
100
99
97
95
104
95
101
94
101
94
95
97
episode: 9200 test result: 98.2
94
101
97
97
96
96
100
98
101
98
98
99
99
92
96
101
98
100
100
31
34
97
98
94
90
98
50
87
92
96
94
94
95
100
43
96
47
98
96
94
96
94
102
94
97
100
98
97
102
100
episode: 9250 test result: 99.3
93
96
96
97
32
101
98
97
76
95
32
46
82
97
92
97
90
79
88
87
94
96
99
97
102
100
95
92
94
95
94
93
99
95
95
96
97
101
99
97
101
94
87
95
102
99
96
98
100
95
episode: 9300 test result: 98.4
96
98
102
42
98
95
98
91
102
102
97
102
40
97
92
102
98
92
99
92
27
100
39
41
97
83
53
87
98
55
40
102
97
92
98
97
102
100
93
92
101
93
98
107
94
94
96
98
99
104
episode: 9350 test result: 99.4
98
96
100
96
94
30
42
44
98
94
94
100
97
95
97
99
96
99
94
21
97
100
98
98
100
103
96
101
92
105
100
103
101
103
99
107
46
106
103
98
106
102
101
105
109
99
100
106
102
99
episode: 9400 test result: 100.4
96
103
99
108
93
103
108
101
102
109
103
105
98
101
101
112
107
112
116
116
108
97
110
102
110
105
100
100
99
104
99
101
100
109
105
101
97
102
103
109
108
103
107
107
102
105
107
101
101
107
episode: 9450 test result: 104.6
102
108
105
105
103
100
100
98
102
96
101
98
97
95
98
100
105
99
102
105
97
105
103
102
101
98
104
97
97
96
107
104
103
98
99
97
99
108
95
100
98
99
93
101
101
98
93
49
102
92
episode: 9500 test result: 97.3
97
92
96
99
96
96
99
98
97
36
95
96
98
96
99
91
37
92
95
33
37
35
40
99
96
92
98
99
50
48
95
96
103
97
98
96
99
102
105
100
103
96
42
103
107
99
94
105
98
105
episode: 9550 test result: 100.8
100
96
92
98
100
108
96
101
102
100
100
98
95
100
99
93
104
96
96
101
98
93
100
99
101
101
89
96
102
104
102
102
104
98
99
99
99
93
98
97
93
97
107
100
107
95
101
94
103
97
episode: 9600 test result: 102.6
103
98
105
113
99
105
98
107
100
108
106
99
102
110
102
97
105
106
106
101
100
94
100
100
98
104
99
103
103
107
99
101
99
102
112
96
95
103
103
106
100
105
98
100
97
107
102
102
104
96
episode: 9650 test result: 103.4
105
108
104
103
98
101
102
106
100
103
104
100
97
99
95
94
97
90
98
94
90
100
95
71
98
100
101
94
92
86
97
100
95
44
103
99
93
98
98
103
104
101
97
100
99
100
97
97
102
103
episode: 9700 test result: 98.9
102
105
97
104
97
104
97
96
95
101
98
99
101
103
106
101
103
106
100
101
91
97
95
99
101
97
105
106
103
100
97
104
103
104
103
99
107
102
103
101
98
99
103
97
101
100
99
102
98
95
episode: 9750 test result: 99.5
103
97
92
100
97
100
102
100
103
38
93
94
102
97
94
99
97
96
91
39
97
99
96
101
96
96
94
86
96
95
99
94
94
60
98
103
92
98
95
95
94
91
92
102
93
90
101
97
42
95
episode: 9800 test result: 97.0
93
47
97
96
54
95
95
46
65
91
97
55
94
95
95
90
96
100
96
103
92
98
94
100
97
98
92
103
102
97
99
104
95
98
98
98
99
102
105
96
103
104
108
104
104
99
103
101
107
97
episode: 9850 test result: 103.5
104
104
101
104
111
110
101
102
114
102
106
108
108
110
115
105
112
106
111
109
101
116
115
109
100
112
113
106
110
107
118
114
108
106
117
112
109
111
106
116
123
113
110
109
115
108
111
115
109
106
episode: 9900 test result: 116.4
116
117
122
124
120
113
112
123
107
116
115
119
110
123
106
114
113
133
113
113
117
114
113
121
119
114
112
118
124
116
117
116
119
117
126
127
133
118
123
127
123
135
138
135
129
129
130
129
129
131
episode: 9950 test result: 126.2
112
115
116
115
117
122
142
126
108
117
117
122
129
117
126
115
123
117
131
125
117
130
124
126
119
120
121
130
124
127
121
126
127
121
137
128
130
123
135
135
123
129
143
141
140
124
142
145
139
119
episode: 10000 test result: 142.2

png

ac2lstmagent = A2CLSTM('CartPole-v1')
ac2lstmagent.train()
plt.plot(ac2lstmagent.steps,ac2lstmagent.test_results,label='rl+lstm')

ac2mlp = A2CMLP('CartPole-v1')
ac2mlp.train()
plt.plot(ac2mlp.steps,ac2mlp.test_results,label='rl+mlp')

plt.legend()
plt.show()
/usr/local/lib/python3.10/dist-packages/gym/core.py:317: DeprecationWarning: WARN: Initializing wrapper in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.
  deprecation(
/usr/local/lib/python3.10/dist-packages/gym/wrappers/step_api_compatibility.py:39: DeprecationWarning: WARN: Initializing environment in old step API which returns one bool instead of two. It is recommended to set `new_step_api=True` to use new step API. This will be the default behaviour in future.
  deprecation(
<ipython-input-4-b08b303cb0d5>:119: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-4-b08b303cb0d5>:132: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 23.6
episode: 100 test result: 29.0
episode: 150 test result: 28.6
episode: 200 test result: 56.2
episode: 250 test result: 110.4
episode: 300 test result: 111.5
episode: 350 test result: 123.4
episode: 400 test result: 171.3
episode: 450 test result: 132.5
episode: 500 test result: 88.6
episode: 550 test result: 154.1
episode: 600 test result: 173.6
episode: 650 test result: 125.3
episode: 700 test result: 44.1
episode: 750 test result: 20.9
episode: 800 test result: 31.6
episode: 850 test result: 91.5
episode: 900 test result: 55.7
episode: 950 test result: 72.1
episode: 1000 test result: 176.0
episode: 1050 test result: 137.7
episode: 1100 test result: 124.2
episode: 1150 test result: 128.5
episode: 1200 test result: 80.8
episode: 1250 test result: 143.4
episode: 1300 test result: 157.6
episode: 1350 test result: 116.0
episode: 1400 test result: 34.1
episode: 1450 test result: 50.2
episode: 1500 test result: 197.5
episode: 1550 test result: 137.0
episode: 1600 test result: 256.0
episode: 1650 test result: 134.8
episode: 1700 test result: 132.3
episode: 1750 test result: 153.3
episode: 1800 test result: 141.6
episode: 1850 test result: 235.8
episode: 1900 test result: 168.0
episode: 1950 test result: 421.2
episode: 2000 test result: 381.3
episode: 2050 test result: 337.3
episode: 2100 test result: 360.8
episode: 2150 test result: 144.5
episode: 2200 test result: 422.2
episode: 2250 test result: 377.6
episode: 2300 test result: 366.9
episode: 2350 test result: 212.4
episode: 2400 test result: 183.5
episode: 2450 test result: 408.1
episode: 2500 test result: 215.6
episode: 2550 test result: 115.3
episode: 2600 test result: 290.7
episode: 2650 test result: 117.1
episode: 2700 test result: 266.1
episode: 2750 test result: 390.0
episode: 2800 test result: 272.0
episode: 2850 test result: 266.5
episode: 2900 test result: 335.7
episode: 2950 test result: 383.1
episode: 3000 test result: 417.9
episode: 3050 test result: 500.0
episode: 3100 test result: 255.5
episode: 3150 test result: 182.7
episode: 3200 test result: 317.0
episode: 3250 test result: 145.1
episode: 3300 test result: 150.8
episode: 3350 test result: 138.8
episode: 3400 test result: 401.5
episode: 3450 test result: 171.2
episode: 3500 test result: 126.0
episode: 3550 test result: 40.8
episode: 3600 test result: 22.4
episode: 3650 test result: 67.2
episode: 3700 test result: 180.9
episode: 3750 test result: 182.4
episode: 3800 test result: 285.2
episode: 3850 test result: 330.6
episode: 3900 test result: 318.7
episode: 3950 test result: 283.1
episode: 4000 test result: 497.5
episode: 4050 test result: 500.0
episode: 4100 test result: 500.0
episode: 4150 test result: 237.4
episode: 4200 test result: 213.0
episode: 4250 test result: 378.3
episode: 4300 test result: 431.5
episode: 4350 test result: 460.3
episode: 4400 test result: 170.9
episode: 4450 test result: 415.5
episode: 4500 test result: 167.6
episode: 4550 test result: 488.7
episode: 4600 test result: 487.7
episode: 4650 test result: 472.1
episode: 4700 test result: 472.0
episode: 4750 test result: 491.2
episode: 4800 test result: 456.6
episode: 4850 test result: 500.0
episode: 4900 test result: 500.0
episode: 4950 test result: 500.0
episode: 5000 test result: 500.0
episode: 5050 test result: 494.0
episode: 5100 test result: 360.9
episode: 5150 test result: 347.7
episode: 5200 test result: 450.3
episode: 5250 test result: 488.9
episode: 5300 test result: 434.0
episode: 5350 test result: 417.4
episode: 5400 test result: 484.8
episode: 5450 test result: 197.9
episode: 5500 test result: 393.6
episode: 5550 test result: 407.5
episode: 5600 test result: 355.2
episode: 5650 test result: 401.7
episode: 5700 test result: 324.4
episode: 5750 test result: 177.0
episode: 5800 test result: 415.1
episode: 5850 test result: 362.5
episode: 5900 test result: 318.9
episode: 5950 test result: 261.7
episode: 6000 test result: 218.1
episode: 6050 test result: 209.0
episode: 6100 test result: 163.6
episode: 6150 test result: 303.0
episode: 6200 test result: 307.1
episode: 6250 test result: 299.7
episode: 6300 test result: 274.3
episode: 6350 test result: 284.0
episode: 6400 test result: 270.9
episode: 6450 test result: 299.7
episode: 6500 test result: 378.1
episode: 6550 test result: 269.3
episode: 6600 test result: 349.9
episode: 6650 test result: 351.2
episode: 6700 test result: 242.3
episode: 6750 test result: 290.3
episode: 6800 test result: 257.1
episode: 6850 test result: 119.8
episode: 6900 test result: 260.7
episode: 6950 test result: 268.9
episode: 7000 test result: 273.1
episode: 7050 test result: 263.8
episode: 7100 test result: 330.0
episode: 7150 test result: 339.1
episode: 7200 test result: 479.6
episode: 7250 test result: 414.9
episode: 7300 test result: 438.7
episode: 7350 test result: 381.3
episode: 7400 test result: 415.4
episode: 7450 test result: 286.5
episode: 7500 test result: 415.3
episode: 7550 test result: 363.1
episode: 7600 test result: 341.3
episode: 7650 test result: 324.5
episode: 7700 test result: 330.3
episode: 7750 test result: 397.7
episode: 7800 test result: 358.9
episode: 7850 test result: 500.0
episode: 7900 test result: 500.0
episode: 7950 test result: 500.0
episode: 8000 test result: 498.9
episode: 8050 test result: 307.9
episode: 8100 test result: 307.2
episode: 8150 test result: 500.0
episode: 8200 test result: 472.3
episode: 8250 test result: 463.0
episode: 8300 test result: 497.1
episode: 8350 test result: 368.9
episode: 8400 test result: 319.0
episode: 8450 test result: 332.2
episode: 8500 test result: 486.2
episode: 8550 test result: 409.5
episode: 8600 test result: 457.2
episode: 8650 test result: 487.8
episode: 8700 test result: 378.6
episode: 8750 test result: 380.0
episode: 8800 test result: 308.3
episode: 8850 test result: 149.5
episode: 8900 test result: 169.4
episode: 8950 test result: 500.0
episode: 9000 test result: 500.0
episode: 9050 test result: 500.0
episode: 9100 test result: 496.0
episode: 9150 test result: 500.0
episode: 9200 test result: 500.0
episode: 9250 test result: 500.0
episode: 9300 test result: 500.0
episode: 9350 test result: 250.9
episode: 9400 test result: 500.0
episode: 9450 test result: 500.0
episode: 9500 test result: 500.0
episode: 9550 test result: 500.0
episode: 9600 test result: 500.0
episode: 9650 test result: 493.5
episode: 9700 test result: 391.9
episode: 9750 test result: 393.2
episode: 9800 test result: 416.6
episode: 9850 test result: 448.1
episode: 9900 test result: 494.3
episode: 9950 test result: 478.1
episode: 10000 test result: 486.9


<ipython-input-5-25fc2feeea79>:142: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-5-25fc2feeea79>:152: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 9.3
episode: 100 test result: 9.0
episode: 150 test result: 9.5
episode: 200 test result: 9.2
episode: 250 test result: 12.0
episode: 300 test result: 72.6
episode: 350 test result: 88.8
episode: 400 test result: 152.1
episode: 450 test result: 134.7
episode: 500 test result: 160.6
episode: 550 test result: 205.4
episode: 600 test result: 218.0
episode: 650 test result: 171.9
episode: 700 test result: 183.3
episode: 750 test result: 319.7
episode: 800 test result: 466.3
episode: 850 test result: 369.9
episode: 900 test result: 435.8
episode: 950 test result: 321.6
episode: 1000 test result: 303.9
episode: 1050 test result: 237.7
episode: 1100 test result: 377.2
episode: 1150 test result: 377.8
episode: 1200 test result: 219.9
episode: 1250 test result: 425.1
episode: 1300 test result: 443.1
episode: 1350 test result: 421.2
episode: 1400 test result: 409.2
episode: 1450 test result: 406.4
episode: 1500 test result: 479.8
episode: 1550 test result: 500.0
episode: 1600 test result: 389.8
episode: 1650 test result: 225.7
episode: 1700 test result: 500.0
episode: 1750 test result: 500.0
episode: 1800 test result: 482.5
episode: 1850 test result: 494.1
episode: 1900 test result: 180.9
episode: 1950 test result: 500.0
episode: 2000 test result: 500.0
episode: 2050 test result: 500.0
episode: 2100 test result: 500.0
episode: 2150 test result: 500.0
episode: 2200 test result: 500.0
episode: 2250 test result: 500.0
episode: 2300 test result: 482.7
episode: 2350 test result: 500.0
episode: 2400 test result: 500.0
episode: 2450 test result: 500.0
episode: 2500 test result: 500.0
episode: 2550 test result: 500.0
episode: 2600 test result: 500.0
episode: 2650 test result: 500.0
episode: 2700 test result: 500.0
episode: 2750 test result: 284.1
episode: 2800 test result: 500.0
episode: 2850 test result: 500.0
episode: 2900 test result: 500.0
episode: 2950 test result: 500.0
episode: 3000 test result: 481.2
episode: 3050 test result: 500.0
episode: 3100 test result: 500.0
episode: 3150 test result: 500.0
episode: 3200 test result: 500.0
episode: 3250 test result: 454.4
episode: 3300 test result: 500.0
episode: 3350 test result: 500.0
episode: 3400 test result: 498.3
episode: 3450 test result: 500.0
episode: 3500 test result: 499.1
episode: 3550 test result: 457.0
episode: 3600 test result: 435.8
episode: 3650 test result: 463.1
episode: 3700 test result: 498.1
episode: 3750 test result: 495.8
episode: 3800 test result: 409.6
episode: 3850 test result: 452.9
episode: 3900 test result: 498.9
episode: 3950 test result: 474.2
episode: 4000 test result: 198.8
episode: 4050 test result: 372.0
episode: 4100 test result: 365.5
episode: 4150 test result: 464.5
episode: 4200 test result: 465.6
episode: 4250 test result: 121.4
episode: 4300 test result: 115.8
episode: 4350 test result: 121.7
episode: 4400 test result: 153.8
episode: 4450 test result: 373.1
episode: 4500 test result: 423.8
episode: 4550 test result: 313.6
episode: 4600 test result: 467.7
episode: 4650 test result: 383.4
episode: 4700 test result: 388.5
episode: 4750 test result: 339.6
episode: 4800 test result: 500.0
episode: 4850 test result: 500.0
episode: 4900 test result: 500.0
episode: 4950 test result: 500.0
episode: 5000 test result: 500.0
episode: 5050 test result: 500.0
episode: 5100 test result: 500.0
episode: 5150 test result: 500.0
episode: 5200 test result: 499.3
episode: 5250 test result: 500.0
episode: 5300 test result: 500.0
episode: 5350 test result: 500.0
episode: 5400 test result: 408.6
episode: 5450 test result: 500.0
episode: 5500 test result: 500.0
episode: 5550 test result: 500.0
episode: 5600 test result: 500.0
episode: 5650 test result: 500.0
episode: 5700 test result: 500.0
episode: 5750 test result: 500.0
episode: 5800 test result: 500.0
episode: 5850 test result: 419.4
episode: 5900 test result: 500.0
episode: 5950 test result: 500.0
episode: 6000 test result: 500.0
episode: 6050 test result: 500.0
episode: 6100 test result: 500.0
episode: 6150 test result: 152.7
episode: 6200 test result: 500.0
episode: 6250 test result: 500.0
episode: 6300 test result: 446.4
episode: 6350 test result: 500.0
episode: 6400 test result: 218.4
episode: 6450 test result: 500.0
episode: 6500 test result: 500.0
episode: 6550 test result: 472.5
episode: 6600 test result: 500.0
episode: 6650 test result: 500.0
episode: 6700 test result: 268.4
episode: 6750 test result: 463.1
episode: 6800 test result: 456.6
episode: 6850 test result: 440.8
episode: 6900 test result: 487.9
episode: 6950 test result: 500.0
episode: 7000 test result: 500.0
episode: 7050 test result: 500.0
episode: 7100 test result: 500.0
episode: 7150 test result: 489.1
episode: 7200 test result: 500.0
episode: 7250 test result: 435.8
episode: 7300 test result: 471.8
episode: 7350 test result: 500.0
episode: 7400 test result: 489.5
episode: 7450 test result: 500.0
episode: 7500 test result: 500.0
episode: 7550 test result: 430.6
episode: 7600 test result: 471.0
episode: 7650 test result: 439.8
episode: 7700 test result: 495.6
episode: 7750 test result: 302.7
episode: 7800 test result: 147.5
episode: 7850 test result: 197.7
episode: 7900 test result: 254.5
episode: 7950 test result: 500.0
episode: 8000 test result: 500.0
episode: 8050 test result: 500.0
episode: 8100 test result: 498.8
episode: 8150 test result: 500.0
episode: 8200 test result: 287.3
episode: 8250 test result: 500.0
episode: 8300 test result: 500.0
episode: 8350 test result: 500.0
episode: 8400 test result: 498.9
episode: 8450 test result: 500.0
episode: 8500 test result: 500.0
episode: 8550 test result: 488.8
episode: 8600 test result: 476.0
episode: 8650 test result: 490.9
episode: 8700 test result: 160.9
episode: 8750 test result: 500.0
episode: 8800 test result: 500.0
episode: 8850 test result: 214.8
episode: 8900 test result: 323.2
episode: 8950 test result: 183.6
episode: 9000 test result: 500.0
episode: 9050 test result: 500.0
episode: 9100 test result: 500.0
episode: 9150 test result: 500.0
episode: 9200 test result: 500.0
episode: 9250 test result: 500.0
episode: 9300 test result: 500.0
episode: 9350 test result: 500.0
episode: 9400 test result: 500.0
episode: 9450 test result: 470.9
episode: 9500 test result: 498.7
episode: 9550 test result: 437.7
episode: 9600 test result: 500.0
episode: 9650 test result: 500.0
episode: 9700 test result: 489.8
episode: 9750 test result: 463.0
episode: 9800 test result: 304.3
episode: 9850 test result: 141.5
episode: 9900 test result: 210.8
episode: 9950 test result: 463.3
episode: 10000 test result: 413.5

png

ac2lstmagent = A2CLSTM('CartPole-v1',NUM_EPISODE=20000)
ac2lstmagent.train()
plt.plot(ac2lstmagent.steps,ac2lstmagent.test_results,label='rl+lstm')

ac2mlp = A2CMLP('CartPole-v1',NUM_EPISODE=20000)
ac2mlp.train()
plt.plot(ac2mlp.steps,ac2mlp.test_results,label='rl+mlp')

plt.legend()
plt.show()
<ipython-input-11-05e50d5a6aa9>:119: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-11-05e50d5a6aa9>:132: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 9.9
episode: 100 test result: 9.8
episode: 150 test result: 17.4
episode: 200 test result: 57.1
episode: 250 test result: 87.5
episode: 300 test result: 140.4
episode: 350 test result: 127.0
episode: 400 test result: 74.1
episode: 450 test result: 147.7
episode: 500 test result: 134.4
episode: 550 test result: 162.6
episode: 600 test result: 181.2
episode: 650 test result: 184.4
episode: 700 test result: 207.9
episode: 750 test result: 160.8
episode: 800 test result: 287.0
episode: 850 test result: 118.0
episode: 900 test result: 174.0
episode: 950 test result: 83.4
episode: 1000 test result: 201.3
episode: 1050 test result: 184.6
episode: 1100 test result: 377.6
episode: 1150 test result: 365.7
episode: 1200 test result: 347.6
episode: 1250 test result: 457.2
episode: 1300 test result: 209.9
episode: 1350 test result: 166.2
episode: 1400 test result: 431.5
episode: 1450 test result: 379.9
episode: 1500 test result: 230.8
episode: 1550 test result: 296.6
episode: 1600 test result: 279.1
episode: 1650 test result: 331.5
episode: 1700 test result: 139.7
episode: 1750 test result: 442.3
episode: 1800 test result: 154.8
episode: 1850 test result: 190.3
episode: 1900 test result: 436.5
episode: 1950 test result: 350.8
episode: 2000 test result: 187.2
episode: 2050 test result: 331.4
episode: 2100 test result: 375.0
episode: 2150 test result: 351.4
episode: 2200 test result: 434.2
episode: 2250 test result: 466.3
episode: 2300 test result: 467.8
episode: 2350 test result: 500.0
episode: 2400 test result: 500.0
episode: 2450 test result: 252.2
episode: 2500 test result: 82.0
episode: 2550 test result: 305.1
episode: 2600 test result: 336.4
episode: 2650 test result: 482.1
episode: 2700 test result: 266.2
episode: 2750 test result: 376.5
episode: 2800 test result: 286.4
episode: 2850 test result: 356.9
episode: 2900 test result: 323.1
episode: 2950 test result: 276.9
episode: 3000 test result: 325.8
episode: 3050 test result: 261.4
episode: 3100 test result: 355.3
episode: 3150 test result: 442.9
episode: 3200 test result: 216.3
episode: 3250 test result: 353.8
episode: 3300 test result: 308.0
episode: 3350 test result: 320.2
episode: 3400 test result: 194.2
episode: 3450 test result: 114.6
episode: 3500 test result: 466.8
episode: 3550 test result: 426.4
episode: 3600 test result: 152.4
episode: 3650 test result: 134.4
episode: 3700 test result: 175.6
episode: 3750 test result: 236.4
episode: 3800 test result: 500.0
episode: 3850 test result: 500.0
episode: 3900 test result: 500.0
episode: 3950 test result: 454.5
episode: 4000 test result: 157.1
episode: 4050 test result: 192.0
episode: 4100 test result: 175.7
episode: 4150 test result: 246.7
episode: 4200 test result: 500.0
episode: 4250 test result: 160.9
episode: 4300 test result: 500.0
episode: 4350 test result: 500.0
episode: 4400 test result: 500.0
episode: 4450 test result: 500.0
episode: 4500 test result: 500.0
episode: 4550 test result: 500.0
episode: 4600 test result: 171.7
episode: 4650 test result: 324.4
episode: 4700 test result: 500.0
episode: 4750 test result: 500.0
episode: 4800 test result: 500.0
episode: 4850 test result: 500.0
episode: 4900 test result: 500.0
episode: 4950 test result: 500.0
episode: 5000 test result: 500.0
episode: 5050 test result: 500.0
episode: 5100 test result: 500.0
episode: 5150 test result: 500.0
episode: 5200 test result: 500.0
episode: 5250 test result: 486.6
episode: 5300 test result: 500.0
episode: 5350 test result: 202.6
episode: 5400 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 5450 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 5500 test result: 500.0
episode: 5550 test result: 468.3
episode: 5600 test result: 462.0
episode: 5650 test result: 500.0
episode: 5700 test result: 500.0
episode: 5750 test result: 500.0
episode: 5800 test result: 500.0
episode: 5850 test result: 500.0
episode: 5900 test result: 500.0
episode: 5950 test result: 500.0
evals
evals
evals
evals
episode: 6000 test result: 500.0
episode: 6050 test result: 466.5
episode: 6100 test result: 500.0
episode: 6150 test result: 500.0
episode: 6200 test result: 500.0
episode: 6250 test result: 500.0
episode: 6300 test result: 500.0
episode: 6350 test result: 500.0
episode: 6400 test result: 500.0
episode: 6450 test result: 500.0
episode: 6500 test result: 500.0
episode: 6550 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 6600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 6650 test result: 500.0
episode: 6700 test result: 500.0
evals
evals
episode: 6750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 6800 test result: 500.0
episode: 6850 test result: 500.0
episode: 6900 test result: 500.0
episode: 6950 test result: 500.0
episode: 7000 test result: 500.0
episode: 7050 test result: 198.7
episode: 7100 test result: 500.0
episode: 7150 test result: 500.0
episode: 7200 test result: 500.0
episode: 7250 test result: 172.3
episode: 7300 test result: 173.0
episode: 7350 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 7400 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 7450 test result: 210.6
episode: 7500 test result: 500.0
episode: 7550 test result: 500.0
episode: 7600 test result: 172.7
episode: 7650 test result: 162.5
episode: 7700 test result: 194.1
episode: 7750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 7800 test result: 500.0
episode: 7850 test result: 188.4
episode: 7900 test result: 154.6
episode: 7950 test result: 155.5
episode: 8000 test result: 124.8
episode: 8050 test result: 142.6
episode: 8100 test result: 323.6
episode: 8150 test result: 172.6
episode: 8200 test result: 187.2
episode: 8250 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 8300 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 8350 test result: 162.4
episode: 8400 test result: 151.5
episode: 8450 test result: 139.4
episode: 8500 test result: 151.0
episode: 8550 test result: 167.0
episode: 8600 test result: 197.0
episode: 8650 test result: 198.7
episode: 8700 test result: 152.9
episode: 8750 test result: 154.0
episode: 8800 test result: 186.7
episode: 8850 test result: 134.3
episode: 8900 test result: 100.3
episode: 8950 test result: 98.2
episode: 9000 test result: 103.9
episode: 9050 test result: 104.4
episode: 9100 test result: 110.3
episode: 9150 test result: 130.7
episode: 9200 test result: 141.6
episode: 9250 test result: 130.6
episode: 9300 test result: 500.0
episode: 9350 test result: 500.0
episode: 9400 test result: 30.9
episode: 9450 test result: 194.6
episode: 9500 test result: 500.0
episode: 9550 test result: 147.1
episode: 9600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 9650 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 9700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 9750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
episode: 9800 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 9850 test result: 160.4
episode: 9900 test result: 167.3
episode: 9950 test result: 500.0
episode: 10000 test result: 500.0
evals
evals
evals
episode: 10050 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 10100 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 10150 test result: 500.0
episode: 10200 test result: 500.0
episode: 10250 test result: 500.0
episode: 10300 test result: 500.0
episode: 10350 test result: 500.0
episode: 10400 test result: 500.0
episode: 10450 test result: 500.0
episode: 10500 test result: 500.0
episode: 10550 test result: 498.8
episode: 10600 test result: 500.0
episode: 10650 test result: 500.0
episode: 10700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 10750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 10800 test result: 500.0
episode: 10850 test result: 500.0
episode: 10900 test result: 500.0
episode: 10950 test result: 467.9
episode: 11000 test result: 500.0
episode: 11050 test result: 500.0
episode: 11100 test result: 500.0
episode: 11150 test result: 500.0
episode: 11200 test result: 500.0
episode: 11250 test result: 500.0
episode: 11300 test result: 500.0
episode: 11350 test result: 499.1
episode: 11400 test result: 118.0
episode: 11450 test result: 200.7
episode: 11500 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 11550 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 11600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 11650 test result: 500.0
episode: 11700 test result: 500.0
episode: 11750 test result: 500.0
episode: 11800 test result: 433.4
episode: 11850 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 11900 test result: 492.9
episode: 11950 test result: 383.1
episode: 12000 test result: 328.9
episode: 12050 test result: 500.0
episode: 12100 test result: 500.0
evals
evals
evals
evals
episode: 12150 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 12200 test result: 500.0
episode: 12250 test result: 499.9
episode: 12300 test result: 494.0
episode: 12350 test result: 500.0
episode: 12400 test result: 500.0
episode: 12450 test result: 500.0
episode: 12500 test result: 500.0
episode: 12550 test result: 499.4
episode: 12600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 12650 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 12700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 12750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 12800 test result: 500.0
episode: 12850 test result: 500.0
episode: 12900 test result: 500.0
episode: 12950 test result: 500.0
episode: 13000 test result: 500.0
episode: 13050 test result: 447.8
episode: 13100 test result: 500.0
episode: 13150 test result: 500.0
episode: 13200 test result: 500.0
evals
evals
evals
evals
evals
episode: 13250 test result: 500.0
episode: 13300 test result: 449.1
episode: 13350 test result: 457.9
episode: 13400 test result: 477.3
episode: 13450 test result: 142.7
episode: 13500 test result: 466.8
episode: 13550 test result: 500.0
episode: 13600 test result: 500.0
episode: 13650 test result: 500.0
episode: 13700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 13750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 13800 test result: 500.0
episode: 13850 test result: 500.0
episode: 13900 test result: 500.0
episode: 13950 test result: 500.0
episode: 14000 test result: 500.0
episode: 14050 test result: 500.0
episode: 14100 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
episode: 14150 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14200 test result: 500.0
episode: 14250 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14300 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14350 test result: 500.0
episode: 14400 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14450 test result: 500.0
episode: 14500 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
episode: 14550 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14650 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
episode: 14700 test result: 500.0
episode: 14750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 14800 test result: 500.0
evals
evals
evals
evals
evals
episode: 14850 test result: 500.0
episode: 14900 test result: 500.0
episode: 14950 test result: 500.0
episode: 15000 test result: 500.0
episode: 15050 test result: 500.0
episode: 15100 test result: 500.0
episode: 15150 test result: 500.0
episode: 15200 test result: 500.0
episode: 15250 test result: 500.0
episode: 15300 test result: 434.5
episode: 15350 test result: 421.1
episode: 15400 test result: 450.5
episode: 15450 test result: 500.0
evals
evals
evals
evals
episode: 15500 test result: 500.0
episode: 15550 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15600 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15650 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15800 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15850 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15900 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 15950 test result: 500.0
evals
episode: 16000 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 16050 test result: 500.0
episode: 16100 test result: 500.0
episode: 16150 test result: 500.0
episode: 16200 test result: 500.0
episode: 16250 test result: 500.0
episode: 16300 test result: 500.0
episode: 16350 test result: 500.0
episode: 16400 test result: 500.0
episode: 16450 test result: 495.9
episode: 16500 test result: 498.7
episode: 16550 test result: 496.9
episode: 16600 test result: 500.0
episode: 16650 test result: 500.0
episode: 16700 test result: 500.0
episode: 16750 test result: 500.0
evals
evals
evals
evals
evals
evals
episode: 16800 test result: 500.0
episode: 16850 test result: 500.0
episode: 16900 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 16950 test result: 500.0
episode: 17000 test result: 500.0
episode: 17050 test result: 500.0
episode: 17100 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 17150 test result: 500.0
episode: 17200 test result: 500.0
episode: 17250 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 17300 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 17350 test result: 500.0
evals
episode: 17400 test result: 500.0
episode: 17450 test result: 500.0
episode: 17500 test result: 500.0
episode: 17550 test result: 500.0
episode: 17600 test result: 500.0
episode: 17650 test result: 500.0
episode: 17700 test result: 468.1
episode: 17750 test result: 500.0
episode: 17800 test result: 500.0
episode: 17850 test result: 142.9
episode: 17900 test result: 342.4
episode: 17950 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 18000 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 18050 test result: 500.0
episode: 18100 test result: 496.1
episode: 18150 test result: 500.0
episode: 18200 test result: 494.6
episode: 18250 test result: 494.4
episode: 18300 test result: 163.2
episode: 18350 test result: 498.6
episode: 18400 test result: 491.2
episode: 18450 test result: 497.5
episode: 18500 test result: 479.2
episode: 18550 test result: 500.0
episode: 18600 test result: 500.0
episode: 18650 test result: 500.0
episode: 18700 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 18750 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 18800 test result: 500.0
evals
evals
episode: 18850 test result: 500.0
episode: 18900 test result: 500.0
episode: 18950 test result: 500.0
episode: 19000 test result: 500.0
episode: 19050 test result: 500.0
episode: 19100 test result: 500.0
episode: 19150 test result: 500.0
episode: 19200 test result: 500.0
episode: 19250 test result: 500.0
episode: 19300 test result: 500.0
episode: 19350 test result: 500.0
episode: 19400 test result: 500.0
episode: 19450 test result: 500.0
episode: 19500 test result: 500.0
episode: 19550 test result: 500.0
episode: 19600 test result: 500.0
episode: 19650 test result: 500.0
episode: 19700 test result: 500.0
episode: 19750 test result: 500.0
episode: 19800 test result: 500.0
episode: 19850 test result: 500.0
episode: 19900 test result: 500.0
episode: 19950 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 20000 test result: 500.0


<ipython-input-12-fb39473e2cbf>:142: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-12-fb39473e2cbf>:152: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 9.1
episode: 100 test result: 9.5
episode: 150 test result: 8.9
episode: 200 test result: 122.2
episode: 250 test result: 106.9
episode: 300 test result: 122.2
episode: 350 test result: 341.2
episode: 400 test result: 346.1
episode: 450 test result: 214.6
episode: 500 test result: 474.2
episode: 550 test result: 467.6
episode: 600 test result: 404.6
episode: 650 test result: 478.0
episode: 700 test result: 498.1
episode: 750 test result: 473.7
episode: 800 test result: 500.0
episode: 850 test result: 478.7
episode: 900 test result: 465.4
episode: 950 test result: 302.2
episode: 1000 test result: 500.0
episode: 1050 test result: 293.7
episode: 1100 test result: 444.4
episode: 1150 test result: 327.9
episode: 1200 test result: 500.0
episode: 1250 test result: 500.0
episode: 1300 test result: 211.2
episode: 1350 test result: 500.0
episode: 1400 test result: 500.0
episode: 1450 test result: 499.0
episode: 1500 test result: 500.0
episode: 1550 test result: 500.0
episode: 1600 test result: 495.8
episode: 1650 test result: 500.0
episode: 1700 test result: 464.4
episode: 1750 test result: 495.9
episode: 1800 test result: 262.7
episode: 1850 test result: 482.4
episode: 1900 test result: 500.0
episode: 1950 test result: 500.0
episode: 2000 test result: 395.0
episode: 2050 test result: 454.8
episode: 2100 test result: 210.0
episode: 2150 test result: 255.8
episode: 2200 test result: 500.0
episode: 2250 test result: 498.3
episode: 2300 test result: 500.0
episode: 2350 test result: 500.0
episode: 2400 test result: 500.0
episode: 2450 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 2500 test result: 480.2
episode: 2550 test result: 500.0
episode: 2600 test result: 482.9
episode: 2650 test result: 202.8
episode: 2700 test result: 500.0
episode: 2750 test result: 375.4
episode: 2800 test result: 500.0
episode: 2850 test result: 391.3
episode: 2900 test result: 497.2
episode: 2950 test result: 226.4
episode: 3000 test result: 294.8
episode: 3050 test result: 190.2
episode: 3100 test result: 500.0
episode: 3150 test result: 206.3
episode: 3200 test result: 500.0
episode: 3250 test result: 149.5
episode: 3300 test result: 500.0
episode: 3350 test result: 443.1
episode: 3400 test result: 500.0
episode: 3450 test result: 500.0
episode: 3500 test result: 160.8
episode: 3550 test result: 163.3
episode: 3600 test result: 500.0
episode: 3650 test result: 160.6
episode: 3700 test result: 218.6
episode: 3750 test result: 469.8
episode: 3800 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 3850 test result: 500.0
episode: 3900 test result: 500.0
episode: 3950 test result: 481.8
episode: 4000 test result: 499.1
episode: 4050 test result: 407.6
episode: 4100 test result: 500.0
episode: 4150 test result: 442.9
episode: 4200 test result: 188.2
episode: 4250 test result: 176.5
episode: 4300 test result: 481.8
episode: 4350 test result: 500.0
episode: 4400 test result: 183.5
episode: 4450 test result: 261.8
episode: 4500 test result: 230.5
episode: 4550 test result: 500.0
episode: 4600 test result: 199.9
episode: 4650 test result: 500.0
episode: 4700 test result: 201.3
episode: 4750 test result: 500.0
episode: 4800 test result: 493.9
episode: 4850 test result: 491.5
episode: 4900 test result: 293.3
episode: 4950 test result: 187.0
episode: 5000 test result: 444.5
episode: 5050 test result: 494.8
episode: 5100 test result: 392.9
episode: 5150 test result: 500.0
episode: 5200 test result: 500.0
episode: 5250 test result: 500.0
episode: 5300 test result: 497.1
episode: 5350 test result: 403.4
episode: 5400 test result: 166.1
episode: 5450 test result: 500.0
episode: 5500 test result: 168.5
episode: 5550 test result: 123.5
episode: 5600 test result: 105.3
episode: 5650 test result: 99.2
episode: 5700 test result: 102.8
episode: 5750 test result: 102.5
episode: 5800 test result: 102.5
episode: 5850 test result: 101.7
episode: 5900 test result: 102.8
episode: 5950 test result: 106.9
episode: 6000 test result: 107.9
episode: 6050 test result: 111.6
episode: 6100 test result: 125.3
episode: 6150 test result: 155.2
episode: 6200 test result: 500.0
episode: 6250 test result: 303.0
episode: 6300 test result: 500.0
evals
evals
evals
evals
evals
evals
evals
evals
evals
episode: 6350 test result: 500.0
episode: 6400 test result: 500.0
episode: 6450 test result: 500.0
episode: 6500 test result: 494.6
episode: 6550 test result: 500.0
episode: 6600 test result: 488.4
episode: 6650 test result: 500.0
episode: 6700 test result: 495.8
episode: 6750 test result: 500.0
episode: 6800 test result: 495.4
episode: 6850 test result: 496.8
episode: 6900 test result: 500.0
episode: 6950 test result: 465.0
episode: 7000 test result: 194.0
episode: 7050 test result: 465.2
episode: 7100 test result: 500.0
episode: 7150 test result: 494.7
episode: 7200 test result: 167.7
episode: 7250 test result: 495.0
episode: 7300 test result: 500.0
episode: 7350 test result: 174.2
episode: 7400 test result: 500.0
episode: 7450 test result: 164.0
episode: 7500 test result: 166.0
episode: 7550 test result: 500.0
episode: 7600 test result: 500.0
episode: 7650 test result: 499.9
episode: 7700 test result: 500.0
episode: 7750 test result: 500.0
episode: 7800 test result: 500.0
episode: 7850 test result: 472.5
episode: 7900 test result: 497.1
episode: 7950 test result: 181.9
episode: 8000 test result: 490.2
episode: 8050 test result: 476.3
episode: 8100 test result: 471.3
episode: 8150 test result: 500.0
episode: 8200 test result: 500.0
episode: 8250 test result: 496.8
episode: 8300 test result: 500.0
episode: 8350 test result: 452.8
episode: 8400 test result: 491.1
episode: 8450 test result: 486.1
episode: 8500 test result: 498.0
episode: 8550 test result: 486.9
episode: 8600 test result: 482.1
episode: 8650 test result: 486.2
episode: 8700 test result: 500.0
episode: 8750 test result: 495.1
episode: 8800 test result: 447.5
episode: 8850 test result: 485.9
episode: 8900 test result: 490.5
episode: 8950 test result: 500.0
episode: 9000 test result: 500.0
episode: 9050 test result: 498.9
episode: 9100 test result: 476.1
episode: 9150 test result: 436.6
episode: 9200 test result: 477.9
episode: 9250 test result: 483.7
episode: 9300 test result: 500.0
episode: 9350 test result: 500.0
episode: 9400 test result: 487.6
episode: 9450 test result: 497.2
episode: 9500 test result: 500.0
episode: 9550 test result: 500.0
episode: 9600 test result: 179.3
episode: 9650 test result: 500.0
episode: 9700 test result: 500.0
episode: 9750 test result: 500.0
episode: 9800 test result: 500.0
episode: 9850 test result: 453.0
episode: 9900 test result: 479.1
episode: 9950 test result: 496.8
episode: 10000 test result: 500.0
episode: 10050 test result: 428.0
episode: 10100 test result: 439.5
episode: 10150 test result: 131.8
episode: 10200 test result: 106.0
episode: 10250 test result: 99.5
episode: 10300 test result: 101.8
episode: 10350 test result: 107.6
episode: 10400 test result: 107.3
episode: 10450 test result: 115.7
episode: 10500 test result: 131.9
episode: 10550 test result: 145.7
episode: 10600 test result: 193.3
episode: 10650 test result: 481.5
episode: 10700 test result: 500.0
episode: 10750 test result: 499.0
episode: 10800 test result: 406.4
episode: 10850 test result: 332.7
episode: 10900 test result: 500.0
episode: 10950 test result: 497.8
episode: 11000 test result: 478.7
episode: 11050 test result: 436.3
episode: 11100 test result: 422.5
episode: 11150 test result: 442.3
episode: 11200 test result: 410.8
episode: 11250 test result: 421.0
episode: 11300 test result: 467.3
episode: 11350 test result: 125.3
episode: 11400 test result: 116.6
episode: 11450 test result: 122.5
episode: 11500 test result: 473.7
episode: 11550 test result: 499.2
episode: 11600 test result: 360.8
episode: 11650 test result: 497.4
episode: 11700 test result: 496.8
episode: 11750 test result: 500.0
episode: 11800 test result: 500.0
episode: 11850 test result: 488.8
episode: 11900 test result: 486.2
episode: 11950 test result: 440.9
episode: 12000 test result: 416.2
episode: 12050 test result: 221.4
episode: 12100 test result: 442.5
episode: 12150 test result: 464.1
episode: 12200 test result: 120.9
episode: 12250 test result: 115.5
episode: 12300 test result: 161.5
episode: 12350 test result: 374.1
episode: 12400 test result: 492.9
episode: 12450 test result: 500.0
episode: 12500 test result: 482.3
episode: 12550 test result: 236.8
episode: 12600 test result: 467.3
episode: 12650 test result: 462.5
episode: 12700 test result: 450.4
episode: 12750 test result: 439.2
episode: 12800 test result: 450.8
episode: 12850 test result: 386.9
episode: 12900 test result: 474.6
episode: 12950 test result: 429.7
episode: 13000 test result: 444.3
episode: 13050 test result: 314.7
episode: 13100 test result: 442.6
episode: 13150 test result: 437.5
episode: 13200 test result: 233.4
episode: 13250 test result: 434.9
episode: 13300 test result: 445.3
episode: 13350 test result: 481.7
episode: 13400 test result: 271.2
episode: 13450 test result: 185.9
episode: 13500 test result: 482.2
episode: 13550 test result: 166.3
episode: 13600 test result: 213.8
episode: 13650 test result: 429.1
episode: 13700 test result: 482.7
episode: 13750 test result: 356.2
episode: 13800 test result: 500.0
episode: 13850 test result: 351.7
episode: 13900 test result: 359.5
episode: 13950 test result: 379.2
episode: 14000 test result: 300.0
episode: 14050 test result: 287.4
episode: 14100 test result: 429.3
episode: 14150 test result: 447.0
episode: 14200 test result: 306.1
episode: 14250 test result: 474.7
episode: 14300 test result: 211.9
episode: 14350 test result: 445.8
episode: 14400 test result: 380.7
episode: 14450 test result: 452.6
episode: 14500 test result: 446.7
episode: 14550 test result: 460.5
episode: 14600 test result: 466.6
episode: 14650 test result: 185.5
episode: 14700 test result: 473.9
episode: 14750 test result: 499.2
episode: 14800 test result: 352.1
episode: 14850 test result: 487.4
episode: 14900 test result: 369.4
episode: 14950 test result: 382.7
episode: 15000 test result: 329.7
episode: 15050 test result: 475.9
episode: 15100 test result: 303.0
episode: 15150 test result: 381.0
episode: 15200 test result: 188.3
episode: 15250 test result: 313.6
episode: 15300 test result: 402.3
episode: 15350 test result: 332.7
episode: 15400 test result: 208.6
episode: 15450 test result: 433.2
episode: 15500 test result: 438.8
episode: 15550 test result: 210.7
episode: 15600 test result: 352.4
episode: 15650 test result: 419.9
episode: 15700 test result: 421.9
episode: 15750 test result: 354.7
episode: 15800 test result: 297.7
episode: 15850 test result: 276.8
episode: 15900 test result: 266.9
episode: 15950 test result: 276.3
episode: 16000 test result: 421.6
episode: 16050 test result: 206.1
episode: 16100 test result: 304.4
episode: 16150 test result: 385.9
episode: 16200 test result: 286.6
episode: 16250 test result: 408.1
episode: 16300 test result: 224.5
episode: 16350 test result: 223.3
episode: 16400 test result: 383.1
episode: 16450 test result: 182.1
episode: 16500 test result: 432.8
episode: 16550 test result: 407.9
episode: 16600 test result: 418.2
episode: 16650 test result: 445.3
episode: 16700 test result: 424.7
episode: 16750 test result: 223.9
episode: 16800 test result: 456.1
episode: 16850 test result: 381.7
episode: 16900 test result: 321.9
episode: 16950 test result: 271.0
episode: 17000 test result: 432.3
episode: 17050 test result: 241.4
episode: 17100 test result: 330.7
episode: 17150 test result: 216.5
episode: 17200 test result: 348.5
episode: 17250 test result: 412.6
episode: 17300 test result: 318.7
episode: 17350 test result: 444.9
episode: 17400 test result: 468.3
episode: 17450 test result: 213.2
episode: 17500 test result: 483.3
episode: 17550 test result: 485.9
episode: 17600 test result: 203.8
episode: 17650 test result: 202.0
episode: 17700 test result: 493.2
episode: 17750 test result: 490.4
episode: 17800 test result: 341.7
episode: 17850 test result: 164.9
episode: 17900 test result: 497.1
episode: 17950 test result: 495.5
episode: 18000 test result: 453.5
episode: 18050 test result: 462.2
episode: 18100 test result: 446.6
episode: 18150 test result: 443.4
episode: 18200 test result: 396.4
episode: 18250 test result: 408.1
episode: 18300 test result: 263.8
episode: 18350 test result: 439.0
episode: 18400 test result: 357.5
episode: 18450 test result: 254.7
episode: 18500 test result: 245.2
episode: 18550 test result: 468.8
episode: 18600 test result: 284.1
episode: 18650 test result: 416.8
episode: 18700 test result: 473.0
episode: 18750 test result: 477.9
episode: 18800 test result: 377.1
episode: 18850 test result: 287.6
episode: 18900 test result: 467.8
episode: 18950 test result: 465.1
episode: 19000 test result: 487.6
episode: 19050 test result: 470.4
episode: 19100 test result: 386.7
episode: 19150 test result: 407.5
episode: 19200 test result: 399.7
episode: 19250 test result: 220.5
episode: 19300 test result: 304.4
episode: 19350 test result: 233.8
episode: 19400 test result: 396.3
episode: 19450 test result: 256.7
episode: 19500 test result: 447.5
episode: 19550 test result: 475.5
episode: 19600 test result: 493.7
episode: 19650 test result: 463.2
episode: 19700 test result: 472.9
episode: 19750 test result: 497.9
episode: 19800 test result: 337.5
episode: 19850 test result: 228.3
episode: 19900 test result: 252.8
episode: 19950 test result: 374.5
episode: 20000 test result: 220.1

png

修改网络后,使收敛后固定参数可以避免波动的情况

可以看到 mlp 相较于 lstm 可以更快收敛

ac2lstmagent = A2CLSTM('CartPole-v1',NUM_EPISODE=20000)
ac2lstmagent.train()
plt.plot(ac2lstmagent.steps,ac2lstmagent.test_results,label='rl+lstm')

ac2mlp = A2CMLP('CartPole-v1',NUM_EPISODE=20000)
ac2mlp.train()
plt.plot(ac2mlp.steps,ac2mlp.test_results,label='rl+mlp')

plt.legend()
plt.show()
<ipython-input-27-cf3fd05cc64c>:120: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-27-cf3fd05cc64c>:133: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 9.2
episode: 100 test result: 31.3
episode: 150 test result: 107.1
episode: 200 test result: 77.7
episode: 250 test result: 47.9
episode: 300 test result: 154.4
episode: 350 test result: 110.3
episode: 400 test result: 138.6
episode: 450 test result: 149.5
episode: 500 test result: 118.6
episode: 550 test result: 123.0
episode: 600 test result: 140.1
episode: 650 test result: 159.5
episode: 700 test result: 71.5
episode: 750 test result: 157.4
episode: 800 test result: 172.3
episode: 850 test result: 126.4
episode: 900 test result: 222.4
episode: 950 test result: 162.8
episode: 1000 test result: 301.1
episode: 1050 test result: 153.3
episode: 1100 test result: 245.5
episode: 1150 test result: 159.7
episode: 1200 test result: 465.9
episode: 1250 test result: 428.9
episode: 1300 test result: 25.6
episode: 1350 test result: 19.1
episode: 1400 test result: 31.5
episode: 1450 test result: 141.4
episode: 1500 test result: 175.1
episode: 1550 test result: 272.1
episode: 1600 test result: 307.0
episode: 1650 test result: 353.2
episode: 1700 test result: 146.9
episode: 1750 test result: 151.3
episode: 1800 test result: 138.6
episode: 1850 test result: 279.7
episode: 1900 test result: 422.2
episode: 1950 test result: 443.0
episode: 2000 test result: 354.8
episode: 2050 test result: 330.2
episode: 2100 test result: 369.4
episode: 2150 test result: 303.9
episode: 2200 test result: 363.8
episode: 2250 test result: 383.7
episode: 2300 test result: 379.9
episode: 2350 test result: 402.1
episode: 2400 test result: 500.0
episode: 2450 test result: 158.2
episode: 2500 test result: 430.3
episode: 2550 test result: 383.0
episode: 2600 test result: 404.2
episode: 2650 test result: 390.3
episode: 2700 test result: 321.7
episode: 2750 test result: 426.9
episode: 2800 test result: 461.5
episode: 2850 test result: 494.0
episode: 2900 test result: 173.5
episode: 2950 test result: 493.7
episode: 3000 test result: 298.0
episode: 3050 test result: 495.8
episode: 3100 test result: 346.4
episode: 3150 test result: 500.0
episode: 3200 test result: 500.0
episode: 3250 test result: 488.7
episode: 3300 test result: 500.0
episode: 3350 test result: 436.0
episode: 3400 test result: 500.0
episode: 3450 test result: 500.0
episode: 3500 test result: 499.8
episode: 3550 test result: 500.0
episode: 3600 test result: 500.0
episode: 3650 test result: 500.0
episode: 3700 test result: 390.1
episode: 3750 test result: 426.8
episode: 3800 test result: 241.8
episode: 3850 test result: 239.2
episode: 3900 test result: 361.2
episode: 3950 test result: 317.0
episode: 4000 test result: 455.4
episode: 4050 test result: 496.4
episode: 4100 test result: 500.0
episode: 4150 test result: 459.4
episode: 4200 test result: 268.1
episode: 4250 test result: 500.0
episode: 4300 test result: 474.9
episode: 4350 test result: 196.5
episode: 4400 test result: 500.0
episode: 4450 test result: 500.0
episode: 4500 test result: 474.5
episode: 4550 test result: 491.3
episode: 4600 test result: 434.1
episode: 4650 test result: 428.2
episode: 4700 test result: 484.5
episode: 4750 test result: 458.3
episode: 4800 test result: 461.2
episode: 4850 test result: 500.0
episode: 4900 test result: 386.3
episode: 4950 test result: 379.8
episode: 5000 test result: 500.0
episode: 5050 test result: 500.0
episode: 5100 test result: 500.0
episode: 5150 test result: 417.8
episode: 5200 test result: 500.0
episode: 5250 test result: 500.0
episode: 5300 test result: 500.0
episode: 5350 test result: 437.1
episode: 5400 test result: 500.0
episode: 5450 test result: 500.0
episode: 5500 test result: 500.0
episode: 5550 test result: 500.0
episode: 5600 test result: 480.7
episode: 5650 test result: 362.1
episode: 5700 test result: 337.0
episode: 5750 test result: 470.9
episode: 5800 test result: 475.0
episode: 5850 test result: 385.2
episode: 5900 test result: 332.1
episode: 5950 test result: 500.0
episode: 6000 test result: 500.0
episode: 6050 test result: 500.0
episode: 6100 test result: 438.7
episode: 6150 test result: 435.5
episode: 6200 test result: 118.8
episode: 6250 test result: 246.4
episode: 6300 test result: 500.0
episode: 6350 test result: 500.0
episode: 6400 test result: 419.4
episode: 6450 test result: 493.4
episode: 6500 test result: 500.0
episode: 6550 test result: 500.0
episode: 6600 test result: 430.4
episode: 6650 test result: 160.5
episode: 6700 test result: 430.7
episode: 6750 test result: 421.6
episode: 6800 test result: 275.8
episode: 6850 test result: 308.7
episode: 6900 test result: 403.1
episode: 6950 test result: 469.2
episode: 7000 test result: 498.3
episode: 7050 test result: 500.0
episode: 7100 test result: 499.1
episode: 7150 test result: 492.0
episode: 7200 test result: 499.1
episode: 7250 test result: 490.7
episode: 7300 test result: 500.0
episode: 7350 test result: 466.8
episode: 7400 test result: 499.9
episode: 7450 test result: 500.0
episode: 7500 test result: 500.0
episode: 7550 test result: 181.1
episode: 7600 test result: 478.7
episode: 7650 test result: 430.9
episode: 7700 test result: 500.0
episode: 7750 test result: 500.0
evals
episode: 7800 test result: 500.0
episode: 7850 test result: 500.0
episode: 7900 test result: 500.0
episode: 7950 test result: 500.0
episode: 8000 test result: 500.0
episode: 8050 test result: 500.0
episode: 8100 test result: 500.0
episode: 8150 test result: 500.0
episode: 8200 test result: 500.0
episode: 8250 test result: 500.0
episode: 8300 test result: 500.0
episode: 8350 test result: 500.0
episode: 8400 test result: 500.0
episode: 8450 test result: 500.0
episode: 8500 test result: 500.0
episode: 8550 test result: 500.0
episode: 8600 test result: 500.0
episode: 8650 test result: 500.0
episode: 8700 test result: 500.0
episode: 8750 test result: 500.0
episode: 8800 test result: 500.0
episode: 8850 test result: 500.0
episode: 8900 test result: 500.0
episode: 8950 test result: 500.0
episode: 9000 test result: 500.0
episode: 9050 test result: 500.0
episode: 9100 test result: 500.0
episode: 9150 test result: 500.0
episode: 9200 test result: 500.0
episode: 9250 test result: 500.0
episode: 9300 test result: 500.0
episode: 9350 test result: 500.0
episode: 9400 test result: 500.0
episode: 9450 test result: 500.0
episode: 9500 test result: 500.0
episode: 9550 test result: 500.0
episode: 9600 test result: 500.0
episode: 9650 test result: 500.0
episode: 9700 test result: 500.0
episode: 9750 test result: 500.0
episode: 9800 test result: 500.0
episode: 9850 test result: 500.0
episode: 9900 test result: 500.0
episode: 9950 test result: 500.0
episode: 10000 test result: 500.0
episode: 10050 test result: 500.0
episode: 10100 test result: 500.0
episode: 10150 test result: 500.0
episode: 10200 test result: 500.0
episode: 10250 test result: 500.0
episode: 10300 test result: 500.0
episode: 10350 test result: 500.0
episode: 10400 test result: 500.0
episode: 10450 test result: 500.0
episode: 10500 test result: 500.0
episode: 10550 test result: 500.0
episode: 10600 test result: 500.0
episode: 10650 test result: 500.0
episode: 10700 test result: 500.0
episode: 10750 test result: 500.0
episode: 10800 test result: 500.0
episode: 10850 test result: 500.0
episode: 10900 test result: 500.0
episode: 10950 test result: 500.0
episode: 11000 test result: 500.0
episode: 11050 test result: 500.0
episode: 11100 test result: 500.0
episode: 11150 test result: 500.0
episode: 11200 test result: 500.0
episode: 11250 test result: 500.0
episode: 11300 test result: 500.0
episode: 11350 test result: 500.0
episode: 11400 test result: 500.0
episode: 11450 test result: 500.0
episode: 11500 test result: 500.0
episode: 11550 test result: 500.0
episode: 11600 test result: 500.0
episode: 11650 test result: 500.0
episode: 11700 test result: 500.0
episode: 11750 test result: 500.0
episode: 11800 test result: 500.0
episode: 11850 test result: 500.0
episode: 11900 test result: 500.0
episode: 11950 test result: 500.0
episode: 12000 test result: 500.0
episode: 12050 test result: 500.0
episode: 12100 test result: 500.0
episode: 12150 test result: 500.0
episode: 12200 test result: 500.0
episode: 12250 test result: 500.0
episode: 12300 test result: 500.0
episode: 12350 test result: 500.0
episode: 12400 test result: 500.0
episode: 12450 test result: 500.0
episode: 12500 test result: 500.0
episode: 12550 test result: 500.0
episode: 12600 test result: 500.0
episode: 12650 test result: 500.0
episode: 12700 test result: 500.0
episode: 12750 test result: 500.0
episode: 12800 test result: 500.0
episode: 12850 test result: 500.0
episode: 12900 test result: 500.0
episode: 12950 test result: 500.0
episode: 13000 test result: 500.0
episode: 13050 test result: 500.0
episode: 13100 test result: 500.0
episode: 13150 test result: 500.0
episode: 13200 test result: 500.0
episode: 13250 test result: 500.0
episode: 13300 test result: 500.0
episode: 13350 test result: 500.0
episode: 13400 test result: 500.0
episode: 13450 test result: 500.0
episode: 13500 test result: 500.0
episode: 13550 test result: 500.0
episode: 13600 test result: 500.0
episode: 13650 test result: 500.0
episode: 13700 test result: 500.0
episode: 13750 test result: 500.0
episode: 13800 test result: 500.0
episode: 13850 test result: 500.0
episode: 13900 test result: 500.0
episode: 13950 test result: 500.0
episode: 14000 test result: 500.0
episode: 14050 test result: 500.0
episode: 14100 test result: 500.0
episode: 14150 test result: 500.0
episode: 14200 test result: 500.0
episode: 14250 test result: 500.0
episode: 14300 test result: 500.0
episode: 14350 test result: 500.0
episode: 14400 test result: 500.0
episode: 14450 test result: 500.0
episode: 14500 test result: 500.0
episode: 14550 test result: 500.0
episode: 14600 test result: 500.0
episode: 14650 test result: 500.0
episode: 14700 test result: 500.0
episode: 14750 test result: 500.0
episode: 14800 test result: 500.0
episode: 14850 test result: 500.0
episode: 14900 test result: 500.0
episode: 14950 test result: 500.0
episode: 15000 test result: 500.0
episode: 15050 test result: 500.0
episode: 15100 test result: 500.0
episode: 15150 test result: 500.0
episode: 15200 test result: 500.0
episode: 15250 test result: 500.0
episode: 15300 test result: 500.0
episode: 15350 test result: 500.0
episode: 15400 test result: 500.0
episode: 15450 test result: 500.0
episode: 15500 test result: 500.0
episode: 15550 test result: 500.0
episode: 15600 test result: 500.0
episode: 15650 test result: 500.0
episode: 15700 test result: 500.0
episode: 15750 test result: 500.0
episode: 15800 test result: 500.0
episode: 15850 test result: 500.0
episode: 15900 test result: 500.0
episode: 15950 test result: 500.0
episode: 16000 test result: 500.0
episode: 16050 test result: 500.0
episode: 16100 test result: 500.0
episode: 16150 test result: 500.0
episode: 16200 test result: 500.0
episode: 16250 test result: 500.0
episode: 16300 test result: 500.0
episode: 16350 test result: 500.0
episode: 16400 test result: 500.0
episode: 16450 test result: 500.0
episode: 16500 test result: 500.0
episode: 16550 test result: 500.0
episode: 16600 test result: 500.0
episode: 16650 test result: 500.0
episode: 16700 test result: 500.0
episode: 16750 test result: 500.0
episode: 16800 test result: 500.0
episode: 16850 test result: 500.0
episode: 16900 test result: 500.0
episode: 16950 test result: 500.0
episode: 17000 test result: 500.0
episode: 17050 test result: 500.0
episode: 17100 test result: 500.0
episode: 17150 test result: 500.0
episode: 17200 test result: 500.0
episode: 17250 test result: 500.0
episode: 17300 test result: 500.0
episode: 17350 test result: 500.0
episode: 17400 test result: 500.0
episode: 17450 test result: 500.0
episode: 17500 test result: 500.0
episode: 17550 test result: 500.0
episode: 17600 test result: 500.0
episode: 17650 test result: 500.0
episode: 17700 test result: 500.0
episode: 17750 test result: 500.0
episode: 17800 test result: 500.0
episode: 17850 test result: 500.0
episode: 17900 test result: 500.0
episode: 17950 test result: 500.0
episode: 18000 test result: 500.0
episode: 18050 test result: 500.0
episode: 18100 test result: 500.0
episode: 18150 test result: 500.0
episode: 18200 test result: 500.0
episode: 18250 test result: 500.0
episode: 18300 test result: 500.0
episode: 18350 test result: 500.0
episode: 18400 test result: 500.0
episode: 18450 test result: 500.0
episode: 18500 test result: 500.0
episode: 18550 test result: 500.0
episode: 18600 test result: 500.0
episode: 18650 test result: 500.0
episode: 18700 test result: 500.0
episode: 18750 test result: 500.0
episode: 18800 test result: 500.0
episode: 18850 test result: 500.0
episode: 18900 test result: 500.0
episode: 18950 test result: 500.0
episode: 19000 test result: 500.0
episode: 19050 test result: 500.0
episode: 19100 test result: 500.0
episode: 19150 test result: 500.0
episode: 19200 test result: 500.0
episode: 19250 test result: 500.0
episode: 19300 test result: 500.0
episode: 19350 test result: 500.0
episode: 19400 test result: 500.0
episode: 19450 test result: 500.0
episode: 19500 test result: 500.0
episode: 19550 test result: 500.0
episode: 19600 test result: 500.0
episode: 19650 test result: 500.0
episode: 19700 test result: 500.0
episode: 19750 test result: 500.0
episode: 19800 test result: 500.0
episode: 19850 test result: 500.0
episode: 19900 test result: 500.0
episode: 19950 test result: 500.0
episode: 20000 test result: 500.0


<ipython-input-28-3e9b3e25e434>:142: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.actor_network.parameters(),0.5)
<ipython-input-28-3e9b3e25e434>:152: UserWarning: torch.nn.utils.clip_grad_norm is now deprecated in favor of torch.nn.utils.clip_grad_norm_.
  torch.nn.utils.clip_grad_norm(self.value_network.parameters(),0.5)


episode: 50 test result: 78.1
episode: 100 test result: 118.5
episode: 150 test result: 175.8
episode: 200 test result: 309.2
episode: 250 test result: 398.3
episode: 300 test result: 395.4
episode: 350 test result: 483.1
episode: 400 test result: 451.1
episode: 450 test result: 466.5
episode: 500 test result: 498.9
episode: 550 test result: 474.9
episode: 600 test result: 480.2
episode: 650 test result: 500.0
episode: 700 test result: 411.5
episode: 750 test result: 484.0
episode: 800 test result: 474.4
episode: 850 test result: 436.9
episode: 900 test result: 500.0
episode: 950 test result: 400.5
episode: 1000 test result: 500.0
episode: 1050 test result: 500.0
episode: 1100 test result: 452.8
episode: 1150 test result: 455.0
episode: 1200 test result: 451.0
episode: 1250 test result: 500.0
episode: 1300 test result: 500.0
episode: 1350 test result: 321.7
episode: 1400 test result: 492.3
episode: 1450 test result: 294.3
episode: 1500 test result: 466.3
episode: 1550 test result: 469.9
episode: 1600 test result: 413.0
episode: 1650 test result: 498.6
episode: 1700 test result: 395.3
episode: 1750 test result: 500.0
episode: 1800 test result: 500.0
episode: 1850 test result: 237.6
episode: 1900 test result: 426.7
episode: 1950 test result: 176.9
episode: 2000 test result: 500.0
episode: 2050 test result: 466.8
episode: 2100 test result: 500.0
episode: 2150 test result: 415.4
episode: 2200 test result: 458.9
episode: 2250 test result: 473.2
episode: 2300 test result: 500.0
episode: 2350 test result: 500.0
episode: 2400 test result: 476.7
episode: 2450 test result: 427.3
episode: 2500 test result: 491.7
episode: 2550 test result: 200.8
episode: 2600 test result: 459.7
episode: 2650 test result: 423.5
episode: 2700 test result: 465.4
episode: 2750 test result: 440.3
episode: 2800 test result: 445.9
episode: 2850 test result: 413.5
episode: 2900 test result: 163.3
episode: 2950 test result: 312.0
episode: 3000 test result: 456.0
episode: 3050 test result: 378.8
episode: 3100 test result: 435.9
episode: 3150 test result: 483.8
episode: 3200 test result: 500.0
episode: 3250 test result: 500.0
episode: 3300 test result: 477.3
episode: 3350 test result: 488.9
episode: 3400 test result: 483.6
episode: 3450 test result: 500.0
episode: 3500 test result: 477.6
episode: 3550 test result: 485.8
episode: 3600 test result: 436.4
episode: 3650 test result: 148.6
episode: 3700 test result: 114.8
episode: 3750 test result: 107.6
episode: 3800 test result: 101.6
episode: 3850 test result: 116.9
episode: 3900 test result: 147.3
episode: 3950 test result: 256.8
episode: 4000 test result: 493.9
episode: 4050 test result: 467.8
episode: 4100 test result: 441.1
episode: 4150 test result: 500.0
episode: 4200 test result: 500.0
episode: 4250 test result: 498.4
episode: 4300 test result: 500.0
evals
episode: 4350 test result: 500.0
episode: 4400 test result: 500.0
episode: 4450 test result: 500.0
episode: 4500 test result: 500.0
episode: 4550 test result: 500.0
episode: 4600 test result: 500.0
episode: 4650 test result: 500.0
episode: 4700 test result: 500.0
episode: 4750 test result: 500.0
episode: 4800 test result: 500.0
episode: 4850 test result: 500.0
episode: 4900 test result: 500.0
episode: 4950 test result: 500.0
episode: 5000 test result: 500.0
episode: 5050 test result: 500.0
episode: 5100 test result: 500.0
episode: 5150 test result: 500.0
episode: 5200 test result: 500.0
episode: 5250 test result: 500.0
episode: 5300 test result: 500.0
episode: 5350 test result: 500.0
episode: 5400 test result: 500.0
episode: 5450 test result: 500.0
episode: 5500 test result: 500.0
episode: 5550 test result: 500.0
episode: 5600 test result: 500.0
episode: 5650 test result: 500.0
episode: 5700 test result: 500.0
episode: 5750 test result: 500.0
episode: 5800 test result: 500.0
episode: 5850 test result: 500.0
episode: 5900 test result: 500.0
episode: 5950 test result: 500.0
episode: 6000 test result: 500.0
episode: 6050 test result: 500.0
episode: 6100 test result: 500.0
episode: 6150 test result: 500.0
episode: 6200 test result: 500.0
episode: 6250 test result: 500.0
episode: 6300 test result: 500.0
episode: 6350 test result: 500.0
episode: 6400 test result: 500.0
episode: 6450 test result: 500.0
episode: 6500 test result: 500.0
episode: 6550 test result: 500.0
episode: 6600 test result: 500.0
episode: 6650 test result: 500.0
episode: 6700 test result: 500.0
episode: 6750 test result: 500.0
episode: 6800 test result: 500.0
episode: 6850 test result: 500.0
episode: 6900 test result: 500.0
episode: 6950 test result: 500.0
episode: 7000 test result: 500.0
episode: 7050 test result: 500.0
episode: 7100 test result: 500.0
episode: 7150 test result: 500.0
episode: 7200 test result: 500.0
episode: 7250 test result: 500.0
episode: 7300 test result: 500.0
episode: 7350 test result: 500.0
episode: 7400 test result: 500.0
episode: 7450 test result: 500.0
episode: 7500 test result: 500.0
episode: 7550 test result: 500.0
episode: 7600 test result: 500.0
episode: 7650 test result: 500.0
episode: 7700 test result: 500.0
episode: 7750 test result: 500.0
episode: 7800 test result: 500.0
episode: 7850 test result: 500.0
episode: 7900 test result: 500.0
episode: 7950 test result: 500.0
episode: 8000 test result: 500.0
episode: 8050 test result: 500.0
episode: 8100 test result: 500.0
episode: 8150 test result: 500.0
episode: 8200 test result: 500.0
episode: 8250 test result: 500.0
episode: 8300 test result: 500.0
episode: 8350 test result: 500.0
episode: 8400 test result: 500.0
episode: 8450 test result: 500.0
episode: 8500 test result: 500.0
episode: 8550 test result: 500.0
episode: 8600 test result: 500.0
episode: 8650 test result: 500.0
episode: 8700 test result: 500.0
episode: 8750 test result: 500.0
episode: 8800 test result: 500.0
episode: 8850 test result: 500.0
episode: 8900 test result: 500.0
episode: 8950 test result: 500.0
episode: 9000 test result: 500.0
episode: 9050 test result: 500.0
episode: 9100 test result: 500.0
episode: 9150 test result: 500.0
episode: 9200 test result: 500.0
episode: 9250 test result: 500.0
episode: 9300 test result: 500.0
episode: 9350 test result: 500.0
episode: 9400 test result: 500.0
episode: 9450 test result: 500.0
episode: 9500 test result: 500.0
episode: 9550 test result: 500.0
episode: 9600 test result: 500.0
episode: 9650 test result: 500.0
episode: 9700 test result: 500.0
episode: 9750 test result: 500.0
episode: 9800 test result: 500.0
episode: 9850 test result: 500.0
episode: 9900 test result: 500.0
episode: 9950 test result: 500.0
episode: 10000 test result: 500.0
episode: 10050 test result: 500.0
episode: 10100 test result: 500.0
episode: 10150 test result: 500.0
episode: 10200 test result: 500.0
episode: 10250 test result: 500.0
episode: 10300 test result: 500.0
episode: 10350 test result: 500.0
episode: 10400 test result: 500.0
episode: 10450 test result: 500.0
episode: 10500 test result: 500.0
episode: 10550 test result: 500.0
episode: 10600 test result: 500.0
episode: 10650 test result: 500.0
episode: 10700 test result: 500.0
episode: 10750 test result: 500.0
episode: 10800 test result: 500.0
episode: 10850 test result: 500.0
episode: 10900 test result: 500.0
episode: 10950 test result: 500.0
episode: 11000 test result: 500.0
episode: 11050 test result: 500.0
episode: 11100 test result: 500.0
episode: 11150 test result: 500.0
episode: 11200 test result: 500.0
episode: 11250 test result: 500.0
episode: 11300 test result: 500.0
episode: 11350 test result: 500.0
episode: 11400 test result: 500.0
episode: 11450 test result: 500.0
episode: 11500 test result: 500.0
episode: 11550 test result: 500.0
episode: 11600 test result: 500.0
episode: 11650 test result: 500.0
episode: 11700 test result: 500.0
episode: 11750 test result: 500.0
episode: 11800 test result: 500.0
episode: 11850 test result: 500.0
episode: 11900 test result: 500.0
episode: 11950 test result: 500.0
episode: 12000 test result: 500.0
episode: 12050 test result: 500.0
episode: 12100 test result: 500.0
episode: 12150 test result: 500.0
episode: 12200 test result: 500.0
episode: 12250 test result: 500.0
episode: 12300 test result: 500.0
episode: 12350 test result: 500.0
episode: 12400 test result: 500.0
episode: 12450 test result: 500.0
episode: 12500 test result: 500.0
episode: 12550 test result: 500.0
episode: 12600 test result: 500.0
episode: 12650 test result: 500.0
episode: 12700 test result: 500.0
episode: 12750 test result: 500.0
episode: 12800 test result: 500.0
episode: 12850 test result: 500.0
episode: 12900 test result: 500.0
episode: 12950 test result: 500.0
episode: 13000 test result: 500.0
episode: 13050 test result: 500.0
episode: 13100 test result: 500.0
episode: 13150 test result: 500.0
episode: 13200 test result: 500.0
episode: 13250 test result: 500.0
episode: 13300 test result: 500.0
episode: 13350 test result: 500.0
episode: 13400 test result: 500.0
episode: 13450 test result: 500.0
episode: 13500 test result: 500.0
episode: 13550 test result: 500.0
episode: 13600 test result: 500.0
episode: 13650 test result: 500.0
episode: 13700 test result: 500.0
episode: 13750 test result: 500.0
episode: 13800 test result: 500.0
episode: 13850 test result: 500.0
episode: 13900 test result: 500.0
episode: 13950 test result: 500.0
episode: 14000 test result: 500.0
episode: 14050 test result: 500.0
episode: 14100 test result: 500.0
episode: 14150 test result: 500.0
episode: 14200 test result: 500.0
episode: 14250 test result: 500.0
episode: 14300 test result: 500.0
episode: 14350 test result: 500.0
episode: 14400 test result: 500.0
episode: 14450 test result: 500.0
episode: 14500 test result: 500.0
episode: 14550 test result: 500.0
episode: 14600 test result: 500.0
episode: 14650 test result: 500.0
episode: 14700 test result: 500.0
episode: 14750 test result: 500.0
episode: 14800 test result: 500.0
episode: 14850 test result: 500.0
episode: 14900 test result: 500.0
episode: 14950 test result: 500.0
episode: 15000 test result: 500.0
episode: 15050 test result: 500.0
episode: 15100 test result: 500.0
episode: 15150 test result: 500.0
episode: 15200 test result: 500.0
episode: 15250 test result: 500.0
episode: 15300 test result: 500.0
episode: 15350 test result: 500.0
episode: 15400 test result: 500.0
episode: 15450 test result: 500.0
episode: 15500 test result: 500.0
episode: 15550 test result: 500.0
episode: 15600 test result: 500.0
episode: 15650 test result: 500.0
episode: 15700 test result: 500.0
episode: 15750 test result: 500.0
episode: 15800 test result: 500.0
episode: 15850 test result: 500.0
episode: 15900 test result: 500.0
episode: 15950 test result: 500.0
episode: 16000 test result: 500.0
episode: 16050 test result: 500.0
episode: 16100 test result: 500.0
episode: 16150 test result: 500.0
episode: 16200 test result: 500.0
episode: 16250 test result: 500.0
episode: 16300 test result: 500.0
episode: 16350 test result: 500.0
episode: 16400 test result: 500.0
episode: 16450 test result: 500.0
episode: 16500 test result: 500.0
episode: 16550 test result: 500.0
episode: 16600 test result: 500.0
episode: 16650 test result: 500.0
episode: 16700 test result: 500.0
episode: 16750 test result: 500.0
episode: 16800 test result: 500.0
episode: 16850 test result: 500.0
episode: 16900 test result: 500.0
episode: 16950 test result: 500.0
episode: 17000 test result: 500.0
episode: 17050 test result: 500.0
episode: 17100 test result: 500.0
episode: 17150 test result: 500.0
episode: 17200 test result: 500.0
episode: 17250 test result: 500.0
episode: 17300 test result: 500.0
episode: 17350 test result: 500.0
episode: 17400 test result: 500.0
episode: 17450 test result: 500.0
episode: 17500 test result: 500.0
episode: 17550 test result: 500.0
episode: 17600 test result: 500.0
episode: 17650 test result: 500.0
episode: 17700 test result: 500.0
episode: 17750 test result: 500.0
episode: 17800 test result: 500.0
episode: 17850 test result: 500.0
episode: 17900 test result: 500.0
episode: 17950 test result: 500.0
episode: 18000 test result: 500.0
episode: 18050 test result: 500.0
episode: 18100 test result: 500.0
episode: 18150 test result: 500.0
episode: 18200 test result: 500.0
episode: 18250 test result: 500.0
episode: 18300 test result: 500.0
episode: 18350 test result: 500.0
episode: 18400 test result: 500.0
episode: 18450 test result: 500.0
episode: 18500 test result: 500.0
episode: 18550 test result: 500.0
episode: 18600 test result: 500.0
episode: 18650 test result: 500.0
episode: 18700 test result: 500.0
episode: 18750 test result: 500.0
episode: 18800 test result: 500.0
episode: 18850 test result: 500.0
episode: 18900 test result: 500.0
episode: 18950 test result: 500.0
episode: 19000 test result: 500.0
episode: 19050 test result: 500.0
episode: 19100 test result: 500.0
episode: 19150 test result: 500.0
episode: 19200 test result: 500.0
episode: 19250 test result: 500.0
episode: 19300 test result: 500.0
episode: 19350 test result: 500.0
episode: 19400 test result: 500.0
episode: 19450 test result: 500.0
episode: 19500 test result: 500.0
episode: 19550 test result: 500.0
episode: 19600 test result: 500.0
episode: 19650 test result: 500.0
episode: 19700 test result: 500.0
episode: 19750 test result: 500.0
episode: 19800 test result: 500.0
episode: 19850 test result: 500.0
episode: 19900 test result: 500.0
episode: 19950 test result: 500.0
episode: 20000 test result: 500.0

png