from fastrl.envs.gym import *
from fastrl.agents.dqn.target import *DQN Dueling
DQN using a split head for comparing the davantage of different actions
Training DataPipes
DuelingHead
DuelingHead (hidden:int, n_actions:int, lin_cls=<class 'torch.nn.modules.linear.Linear'>)
Same as nn.Module, but no need for subclasses to call super().__init__
| Type | Default | Details | |
|---|---|---|---|
| hidden | int | Input into the DuelingHead, likely a hidden layer input | |
| n_actions | int | Number/dim of actions to output | |
| lin_cls | type | Linear |
Try training with basic defaults…
# Setup Loggers
logger_base = ProgressBarLogger(epoch_on_pipe=EpocherCollector,
batch_on_pipe=BatchCollector)
# Setup up the core NN
torch.manual_seed(0)
model = DQN(4,2,head_layer=DuelingHead) #.cuda()
# Setup the Agent
agent = DQNAgent(model,[logger_base],max_steps=4000)#,device='cuda')
# Setup the DataBlock
block = DataBlock(
GymTransformBlock(agent=agent,nsteps=1,nskips=1,firstlast=False,n=1000,bs=1)
)
# pipes = L(block.datapipes(['CartPole-v1']*1,n=10))
dls = L(block.dataloaders(['CartPole-v1']*1))
# Setup the Learner
learner = DQNLearner(model,dls,logger_bases=[logger_base],bs=128,max_sz=100_000,
dp_augmentation_fns=[
TargetModelUpdater.insert_dp(),
TargetModelQCalc.replace_dp()
])
learner.fit(3)
# learner.fit(25)| loss | episode | rolling_reward | epoch | batch | epsilon |
|---|---|---|---|---|---|
| 0.010209812 | 53 | 18.826923 | 1 | 1000 | 0.749500 |
| 0.032916732 | 92 | 21.406593 | 2 | 1000 | 0.499250 |
| 0.041681465 | 105 | 28.240000 | 2 | 1000 | 0.249250 |
The DQN learners, but I wonder if we can get it to learn faster…
# Setup Loggers
logger_base = ProgressBarLogger(epoch_on_pipe=EpocherCollector,
batch_on_pipe=BatchCollector)
# Setup up the core NN
torch.manual_seed(0)
model = DQN(4,2,head_layer=DuelingHead)
# Setup the Agent
agent = DQNAgent(model,[logger_base],max_steps=10000)
# Setup the DataBlock
block = DataBlock(
GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True,n=1000,bs=1), # We basically merge 2 steps into 1 and skip.
(GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True,n=100,include_images=True),VSCodeTransformBlock())
)
dls = L(block.dataloaders(['CartPole-v1']*1))
# Setup the Learner
learner = DQNLearner(model,dls,logger_bases=[logger_base],bs=128,max_sz=20_000,nsteps=2,lr=0.001,
dp_augmentation_fns=[
TargetModelUpdater.insert_dp(),
TargetModelQCalc.replace_dp()
])
learner.fit(3)
# learner.fit(10)| loss | episode | rolling_reward | epoch | batch | epsilon |
|---|---|---|---|---|---|
| 0.24247332 | 72 | 28.780000 | 1 | 1000 | 0.810000 |
| 2.1317954 | 110 | 45.080000 | 2 | 1000 | 0.615800 |
| 3.3318765 | 133 | 71.960000 | 2 | 1000 | 0.419400 |