from fastrl.envs.gym import *DQN Rainbow
Combines target, dueling, double, categorical dqns
# Setup Loggers
logger_base = ProgressBarLogger(epoch_on_pipe=EpocherCollector,
batch_on_pipe=BatchCollector)
# Setup up the core NN
torch.manual_seed(0)
model = CategoricalDQN( # CategoricalDQN
4,2,
head_layer=DuelingHead # DuelingDQN
).to(device='cuda')
# Setup the Agent
agent = DQNAgent(model,[logger_base],max_steps=4000,device='cuda',
dp_augmentation_fns=[
MultiModelRunner.replace_dp(device='cuda') # CategoricalDQN
])
# Setup the DataBlock
block = DataBlock(
GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True), # We basically merge 2 steps into 1 and skip.
(GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True,n=100,include_images=True),VSCodeTransformBlock())
)
dls = L(block.dataloaders(['CartPole-v1']*1))
# Setup the Learner
learner = DQNLearner(model,dls,logger_bases=[logger_base],bs=128,
batches=1000,
loss_func = PartialCrossEntropy, # CategoricalDQN
device='cuda',
max_sz=100_000,
lr=0.001,
dp_augmentation_fns=[
TargetModelUpdater.insert_dp(),# TargetDQN
CategoricalTargetQCalc.replace_remove_dp( # CategoricalDQN
device='cuda',
nsteps=2,
double_dqn_strategy=True # DoubleDQN
)
])
learner.fit(1)
# learner.fit(7)| loss | episode | rolling_reward | epoch | batch | epsilon |
|---|---|---|---|---|---|
| 2.784912 | 77 | 27.920000 | 0 | 1001 | 0.528000 |