from fastrl.envs.gym import *
from fastrl.agents.dqn.target import *
DQN Dueling
DQN using a split head for comparing the davantage of different actions
Training DataPipes
DuelingHead
DuelingHead (hidden:int, n_actions:int, lin_cls=<class 'torch.nn.modules.linear.Linear'>)
Same as nn.Module
, but no need for subclasses to call super().__init__
Type | Default | Details | |
---|---|---|---|
hidden | int | Input into the DuelingHead, likely a hidden layer input | |
n_actions | int | Number/dim of actions to output | |
lin_cls | type | Linear |
Try training with basic defaults…
# Setup Loggers
= ProgressBarLogger(epoch_on_pipe=EpocherCollector,
logger_base =BatchCollector)
batch_on_pipe
# Setup up the core NN
0)
torch.manual_seed(= DQN(4,2,head_layer=DuelingHead) #.cuda()
model # Setup the Agent
= DQNAgent(model,[logger_base],max_steps=4000)#,device='cuda')
agent # Setup the DataBlock
= DataBlock(
block =agent,nsteps=1,nskips=1,firstlast=False,n=1000,bs=1)
GymTransformBlock(agent
)# pipes = L(block.datapipes(['CartPole-v1']*1,n=10))
= L(block.dataloaders(['CartPole-v1']*1))
dls # Setup the Learner
= DQNLearner(model,dls,logger_bases=[logger_base],bs=128,max_sz=100_000,
learner =[
dp_augmentation_fns
TargetModelUpdater.insert_dp(),
TargetModelQCalc.replace_dp()
])3)
learner.fit(# learner.fit(25)
loss | episode | rolling_reward | epoch | batch | epsilon |
---|---|---|---|---|---|
0.010209812 | 53 | 18.826923 | 1 | 1000 | 0.749500 |
0.032916732 | 92 | 21.406593 | 2 | 1000 | 0.499250 |
0.041681465 | 105 | 28.240000 | 2 | 1000 | 0.249250 |
The DQN learners, but I wonder if we can get it to learn faster…
# Setup Loggers
= ProgressBarLogger(epoch_on_pipe=EpocherCollector,
logger_base =BatchCollector)
batch_on_pipe
# Setup up the core NN
0)
torch.manual_seed(= DQN(4,2,head_layer=DuelingHead)
model # Setup the Agent
= DQNAgent(model,[logger_base],max_steps=10000)
agent # Setup the DataBlock
= DataBlock(
block =agent,nsteps=2,nskips=2,firstlast=True,n=1000,bs=1), # We basically merge 2 steps into 1 and skip.
GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True,n=100,include_images=True),VSCodeTransformBlock())
(GymTransformBlock(agent
)= L(block.dataloaders(['CartPole-v1']*1))
dls # Setup the Learner
= DQNLearner(model,dls,logger_bases=[logger_base],bs=128,max_sz=20_000,nsteps=2,lr=0.001,
learner =[
dp_augmentation_fns
TargetModelUpdater.insert_dp(),
TargetModelQCalc.replace_dp()
])3)
learner.fit(# learner.fit(10)
loss | episode | rolling_reward | epoch | batch | epsilon |
---|---|---|---|---|---|
0.24247332 | 72 | 28.780000 | 1 | 1000 | 0.810000 |
2.1317954 | 110 | 45.080000 | 2 | 1000 | 0.615800 |
3.3318765 | 133 | 71.960000 | 2 | 1000 | 0.419400 |