DQN Async

Components that allow for syncing multiple dqn agents on multiple processes to calcualtions on the

There is a little weirdness using cuda with spawn. pytorch has a bug: https://github.com/pytorch/pytorch/issues/30401 so queue usage isnt so simple

Training DataPipes


 ModelSubscriber (*args, **kwds)

If an agent is passed to another process and ‘spawn’ start method is used, then this module is needed.


 ModelPublisher (*args, **kwds)

Try training with basic defaults…

import torch
from torch.nn import *
import torch.nn.functional as F
from fastrl.loggers.core import *
from fastrl.loggers.jupyter_visualizers import *
from fastrl.learner.core import *
from fastrl.data.block import *
from fastrl.envs.gym import *
from fastrl.agents.core import *
from fastrl.agents.discrete import *
from fastrl.agents.dqn.basic import *

logger_base = ProgressBarLogger(epoch_on_pipe=EpocherCollector,

# Setup up the core NN
model = DQN(4,2).cuda()
# model.share_memory() # This will not work in spawn
# Setup the Agent
agent = DQNAgent(model,max_steps=4000,device='cuda',
# Setup the DataBlock
block = DataBlock(
dls = L(block.dataloaders(['CartPole-v1']*1,num_workers=1))
# # Setup the Learner
learner = DQNLearner(model,dls,batches=1000,logger_bases=[logger_base],bs=128,max_sz=100_000,device='cuda',
# learner.fit(2)
# %%python

if __name__=='__main__':
    from torch.multiprocessing import Pool, Process, set_start_method
    except RuntimeError:
    from fastcore.all import *
    import torch
    from torch.nn import *
    import torch.nn.functional as F
    from fastrl.loggers.core import *
    from fastrl.loggers.jupyter_visualizers import *
    from fastrl.learner.core import *
    from fastrl.data.block import *
    from fastrl.envs.gym import *
    from fastrl.agents.core import *
    from fastrl.agents.discrete import *
    from fastrl.agents.dqn.basic import *
    from fastrl.agents.dqn.asynchronous import *
    from torchdata.dataloader2 import DataLoader2
    from torchdata.dataloader2.graph import traverse
    from fastrl.data.dataloader2 import *
    logger_base = ProgressBarLogger(epoch_on_pipe=EpocherCollector,

    # Setup up the core NN
    model = DQN(4,2).cuda()
    # model.share_memory() # This will not work in spawn
    # Setup the Agent
    agent = DQNAgent(model,max_steps=4000,device='cuda',
    # Setup the DataBlock
    block = DataBlock(
    dls = L(block.dataloaders(['CartPole-v1']*1,num_workers=1))
    # # Setup the Learner
    learner = DQNLearner(model,dls,batches=1000,logger_bases=[logger_base],bs=128,max_sz=100_000,device='cuda',
    # print(traverse(learner))
