from fastrl.envs.gym import *
DQN Rainbow
Combines target, dueling, double, categorical dqns
# Setup Loggers
= ProgressBarLogger(epoch_on_pipe=EpocherCollector,
logger_base =BatchCollector)
batch_on_pipe
# Setup up the core NN
0)
torch.manual_seed(= CategoricalDQN( # CategoricalDQN
model 4,2,
=DuelingHead # DuelingDQN
head_layer='cuda')
).to(device# Setup the Agent
= DQNAgent(model,[logger_base],max_steps=4000,device='cuda',
agent =[
dp_augmentation_fns='cuda') # CategoricalDQN
MultiModelRunner.replace_dp(device
])# Setup the DataBlock
= DataBlock(
block =agent,nsteps=2,nskips=2,firstlast=True), # We basically merge 2 steps into 1 and skip.
GymTransformBlock(agent=agent,nsteps=2,nskips=2,firstlast=True,n=100,include_images=True),VSCodeTransformBlock())
(GymTransformBlock(agent
)= L(block.dataloaders(['CartPole-v1']*1))
dls # Setup the Learner
= DQNLearner(model,dls,logger_bases=[logger_base],bs=128,
learner =1000,
batches= PartialCrossEntropy, # CategoricalDQN
loss_func ='cuda',
device=100_000,
max_sz=0.001,
lr=[
dp_augmentation_fns# TargetDQN
TargetModelUpdater.insert_dp(),# CategoricalDQN
CategoricalTargetQCalc.replace_remove_dp( ='cuda',
device=2,
nsteps=True # DoubleDQN
double_dqn_strategy
)
])1)
learner.fit(# learner.fit(7)
loss | episode | rolling_reward | epoch | batch | epsilon |
---|---|---|---|---|---|
2.784912 | 77 | 27.920000 | 0 | 1001 | 0.528000 |