0)
torch.manual_seed(
class DQN(Module):
def __init__(self,state_sz:int,action_sz:int,hidden=512):
self.layers=Sequential(
Linear(state_sz,hidden),
ReLU(),
Linear(hidden,action_sz),
)def forward(self,x): return self.layers(x)
Agent Core
AgentBase
AgentBase (*args, **kwds)
Acts as the footer of the Agent pipeline. Maintains important state such as the model
being used for get actions from. Also optionally allows passing a reference list of action_iterator
which is a persistent list of actions for the entire agent pipeline to process through.
Important: Must be at the start of the pipeline, and be used with AgentHead at the end.
Important:
action_iterator
is stored in theiterable
field. However the recommended way of passing actions to the pipeline is to call anAgentHead
instance.
AgentHead
AgentHead (*args, **kwds)
Acts as the head of the Agent pipeline. Used for conveniently adding actions to the pipeline to process.
Important: Must be paired with
AgentBase
SimpleModelRunner
SimpleModelRunner (*args, **kwds)
Takes input from source_datapipe
and pushes through the agent bases model assuming there is only one model field.
Check that the 1x4 tensor assuccessfully pushes through the model can get expected outputs…
# from fastrl.agents.dqn.basic import DQN
# Setup up the core NN
0)
torch.manual_seed(= DQN(4,2)
model # Setup the agent
= AgentBase(model)
agent = SimpleModelRunner(agent)
agent = AgentHead(agent) agent
= tensor([1,2,3,4]).float()
input_tensor
for action in agent([input_tensor]):
print(action)
1., 2., 3., 4.])) test_eq(input_tensor,tensor([
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
StepFieldSelector
StepFieldSelector (*args, **kwds)
Grabs field
from source_datapipe
to push to the rest of the pipeline.
Check that using StepFieldSelector
, we can grab the state
field from the Simplestep
to push through the model…
= AgentBase(model)
agent = StepFieldSelector(agent,field='state')
agent = SimpleModelRunner(agent)
agent = AgentHead(agent)
agent
for action in agent([SimpleStep.random(state=tensor([1.,2.,3.,4.]))]):
print(action)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
# Setup up the core NN
0)
torch.manual_seed(= DQN(4,2)
model # Setup the agent
= AgentBase(model,[])
agent # All the things that make this agent unique and special
# In this instance, all this module does is pass the action directly through to the model.
= SimpleModelRunner(agent)
agent # Bring everything together into the AgentHead where actions will be passed and then run through the pipeline
= AgentHead(agent) agent
If we pass a list of tensors, we will get a list of actions:
for action in agent([tensor([1,2,3,4]).float()]):
print(action)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
for action in agent([tensor([1,2,3,4]).float()]*3):
print(action)
; # Check that we can traverse it traverse(agent)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
tensor([[-0.2909, -1.0357]], grad_fn=<AddmmBackward0>)
from fastrl.pipes.core import *
from fastrl.pipes.map.transforms import *
from fastrl.data.block import *
from fastrl.envs.gym import *
# def baseline_test(envs,total_steps,seed=0):
# pipe = dp.map.Mapper(envs)
# pipe = TypeTransformer(pipe,[GymTypeTransform])
# pipe = dp.iter.MapToIterConverter(pipe)
# pipe = dp.iter.InMemoryCacheHolder(pipe)
# pipe = pipe.cycle()
# pipe = GymStepper(pipe,seed=seed)
# steps = [step for _,step in zip(*(range(total_steps),pipe))]
# return steps, pipe
# steps, pipe = baseline_test(['CartPole-v1'],0)
StepModelFeeder
StepModelFeeder (*args, **kwds)
Converts StepTypes
into unified tensors using keys
and feeds them into self.agent_base.model
NumpyConverter
NumpyConverter (*args, **kwds)
Given input Tensor
from source_datapipe
returns a numpy array of same shape with argmax set to 1.
= [tensor([4]) for _ in range(10)]
tensors = NumpyConverter(tensors)
pipe list(pipe);
= [tensor([4]).to(device='cuda') for _ in range(10)]
tensors = NumpyConverter(tensors)
pipe list(pipe);