Envs Gym

Fastrl API for working with OpenAI Gyms

Pipes


GymTypeTransform

 GymTypeTransform (enc=None, dec=None, split_idx=None, order=None)

Creates an gym.env


GymStepper

 GymStepper (*args, **kwds)

Accepts a source_datapipe or iterable whose next() produces a single gym.Env. Tracks multiple envs using id(env).

Iteration Examples

import pandas as pd
from fastrl.agents.core import *
class ConstantRunner(dp.iter.IterDataPipe):
    def __init__(self,source_datapipe,constant=1,array_nestings=0): 
        self.source_datapipe = source_datapipe
        self.agent_base = find_dp(traverse(self.source_datapipe),AgentBase)
        self.constant = constant
        self.array_nestings = array_nestings
    
    def __iter__(self):
        for o in self.source_datapipe: 
            try: 
                if self.array_nestings==0: yield self.constant
                else:
                    yield [self.constant]*self.array_nestings
            except Exception:
                print('Failed on ',o)
                raise

agent = AgentBase(None,[])
agent = ConstantRunner(agent)
agent = AgentHead(agent)

pipe = dp.map.Mapper(['CartPole-v1']*3)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle()
pipe = GymStepper(pipe,agent=agent,seed=0)

pd.DataFrame([step for step,_ in zip(*(pipe,range(10)))])[['state','next_state','action','terminated']]
state next_state action terminated
0 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
1 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
2 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
3 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
4 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
5 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
6 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
7 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
8 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
9 [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] [tensor(0.0353), tensor(0.7603), tensor(-0.0866), tensor(-1.2844)] tensor(1.) tensor(False)
pipe = dp.map.Mapper(['CartPole-v1']*3)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle()
pipe = GymStepper(pipe,seed=0)

pd.DataFrame([step for step,_ in zip(*(pipe,range(10)))])[['state','next_state','action','terminated']]
state next_state action terminated
0 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
1 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
2 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] tensor(1.) tensor(False)
3 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
4 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
5 [tensor(0.0132), tensor(0.1727), tensor(-0.0469), tensor(-0.3552)] [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(1.) tensor(False)
6 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
7 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
8 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(1.) tensor(False)
9 [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] [tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)] tensor(0.) tensor(False)
from torch.utils.data.dataloader_experimental import DataLoader2
def seed_worker(worker_id): torch.manual_seed(0)

dl = DataLoader2(pipe,num_workers=2,worker_init_fn=seed_worker)

pd.DataFrame([step for step,_ in zip(*(dl,range(10)))])[['state','next_state','action','terminated']]
state next_state action terminated
0 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
1 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
2 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
3 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
4 [[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]] [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [tensor(0.)] [tensor(False)]
5 [[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]] [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [tensor(0.)] [tensor(False)]
6 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
7 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
8 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
9 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
pipe = dp.map.Mapper(['CartPole-v1']*3)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle()
pipe = GymStepper(pipe,synchronized_reset=True)

pd.DataFrame([step for step,_ in zip(*(dl,range(10)))])[['state','next_state','action','terminated']]
state next_state action terminated
0 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
1 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
2 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
3 [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [tensor(0.)] [tensor(False)]
4 [[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]] [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [tensor(0.)] [tensor(False)]
5 [[tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)]] [[tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)]] [tensor(0.)] [tensor(False)]
6 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
7 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
8 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]
9 [[tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)]] [[tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)]] [tensor(0.)] [tensor(False)]

Tests

We create 3 envs and put a max iteration count at 180. Each env will run for 18 steps before ending, which means we expect there to be 10 total episodes.

envs = ['CartPole-v1']*3
n_episodes = 3

pipe = dp.map.Mapper(envs)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
# We want to cycle through the envs enough times that their epsiode sum to 9, 3 episodes each
pipe = pipe.cycle(count=(18*len(envs))) 
pipe = GymStepper(pipe,seed=0)

All the of the environments should reach max 18 steps given a seed of 0…
The total number of iterations should be ( 18 * n_envs) * n_episodes_per_env = 162

steps = list(pipe)
gsteps = groupby(steps,lambda o:int(o.step_n))
test_len(gsteps.keys(),18)
pd.DataFrame([step for step in steps])[['state','terminated','env_id','episode_n','step_n']][::10]
state terminated env_id episode_n step_n
0 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] tensor(False) tensor(140361096758800) tensor(1) tensor(1)
10 [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(False) tensor(140361096757392) tensor(1) tensor(4)
20 [tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)] tensor(False) tensor(140361096758352) tensor(1) tensor(7)
30 [tensor(0.0217), tensor(-0.4009), tensor(-0.0929), tensor(0.2661)] tensor(False) tensor(140361096758800) tensor(1) tensor(11)
40 [tensor(0.0094), tensor(0.1879), tensor(-0.0961), tensor(-0.6926)] tensor(False) tensor(140361096757392) tensor(1) tensor(14)
50 [tensor(0.0325), tensor(0.7771), tensor(-0.1570), tensor(-1.6694)] tensor(False) tensor(140361096758352) tensor(1) tensor(17)
60 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(False) tensor(140361096758800) tensor(2) tensor(3)
70 [tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)] tensor(False) tensor(140361096757392) tensor(2) tensor(6)
80 [tensor(0.0417), tensor(-0.4040), tensor(-0.1113), tensor(0.3342)] tensor(False) tensor(140361096758352) tensor(2) tensor(9)
90 [tensor(0.0096), tensor(-0.0083), tensor(-0.0886), tensor(-0.3733)] tensor(False) tensor(140361096758800) tensor(2) tensor(13)
100 [tensor(0.0209), tensor(0.5806), tensor(-0.1302), tensor(-1.3390)] tensor(False) tensor(140361096757392) tensor(2) tensor(16)
110 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] tensor(False) tensor(140361096758352) tensor(3) tensor(1)
120 [tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)] tensor(False) tensor(140361096758800) tensor(3) tensor(5)
130 [tensor(0.0459), tensor(-0.2106), tensor(-0.1129), tensor(0.0792)] tensor(False) tensor(140361096757392) tensor(3) tensor(8)
140 [tensor(0.0217), tensor(-0.4009), tensor(-0.0929), tensor(0.2661)] tensor(False) tensor(140361096758352) tensor(3) tensor(11)
150 [tensor(0.0132), tensor(0.3842), tensor(-0.1099), tensor(-1.0139)] tensor(False) tensor(140361096758800) tensor(3) tensor(15)
160 [tensor(0.0480), tensor(0.9737), tensor(-0.1904), tensor(-2.0066)] tensor(True) tensor(140361096757392) tensor(3) tensor(18)

All of the step groups should be the same length…

group_sz = None
for name,group in gsteps.items():
    if group_sz is None: group_sz = len(group)
    else:                assert len(group)==group_sz,f' Got lengths {len(group)} and {group_sz} for {name}.\n\n{group}'

Each step group’s state and next_states should match across envs…

group_sz = None
for name,group in gsteps.items():
    e1 = group[0]
    for other in group[1:]: test_eq(e1.state,other.state)
    for other in group[1:]: test_eq(e1.next_state,other.next_state)

Each step group value should not show up/be duplicated in any other step groups…

group_sz = None
for name,group in gsteps.items():
    e1 = group[0]
    for other_name,other_group in gsteps.items():
        if other_name==name: continue
        for other in other_group[1:]: test_ne(e1.state,other.state)
        for other in other_group[1:]: test_ne(e1.next_state,other.next_state)

Given 3 envs, single steps, epsiodes of 18 steps in len, 3 episodes each, run for 162 iterations, we should expect there to be 9 dones.

test_eq(sum([o.terminated for o in steps]),tensor([9]))

The max episode numbers for each env should sum to 9 where for each env, it should reach and finish 3 episodes…

gsteps = groupby(steps,lambda o:int(o.env_id))
test_len(gsteps.keys(),3)
env1,env2,env3 = L(gsteps.values()).map(L).map(Self.map(Self.episode_n()).map(int))
test_eq(max(env1)+max(env2)+max(env3),9)

Test the synchronized_reset param…

In this case, we will have iterate through the 3 envs without producing a step on warmup.

envs = ['CartPole-v1']*3
n_episodes = 3

pipe = dp.map.Mapper(envs)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
# We want to cycle through the envs enough times that their epsiode sum to 9, 3 episodes each
# We add an additional +3 cycles since `synchronized_reset` cycles through the envs additional times
# to make sure they are all reset prior to stepping
pipe = pipe.cycle(count=(18*len(envs))+3) 
pipe = GymStepper(pipe,seed=0,synchronized_reset=True)
steps = list(pipe)
gsteps = groupby(steps,lambda o:int(o.step_n))
test_len(gsteps.keys(),18)
pd.DataFrame([step for step in steps])[['state','terminated','env_id','episode_n','step_n']][::10]
state terminated env_id episode_n step_n
0 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] tensor(False) tensor(140361096587536) tensor(1) tensor(1)
10 [tensor(0.0241), tensor(0.5643), tensor(-0.0672), tensor(-0.9714)] tensor(False) tensor(140361096587472) tensor(1) tensor(4)
20 [tensor(0.0463), tensor(-0.0172), tensor(-0.1094), tensor(-0.1771)] tensor(False) tensor(140361096587920) tensor(1) tensor(7)
30 [tensor(0.0217), tensor(-0.4009), tensor(-0.0929), tensor(0.2661)] tensor(False) tensor(140361096587536) tensor(1) tensor(11)
40 [tensor(0.0094), tensor(0.1879), tensor(-0.0961), tensor(-0.6926)] tensor(False) tensor(140361096587472) tensor(1) tensor(14)
50 [tensor(0.0325), tensor(0.7771), tensor(-0.1570), tensor(-1.6694)] tensor(False) tensor(140361096587920) tensor(1) tensor(17)
60 [tensor(0.0167), tensor(0.3685), tensor(-0.0540), tensor(-0.6622)] tensor(False) tensor(140361096587536) tensor(2) tensor(3)
70 [tensor(0.0427), tensor(0.1763), tensor(-0.1007), tensor(-0.4364)] tensor(False) tensor(140361096587472) tensor(2) tensor(6)
80 [tensor(0.0417), tensor(-0.4040), tensor(-0.1113), tensor(0.3342)] tensor(False) tensor(140361096587920) tensor(2) tensor(9)
90 [tensor(0.0096), tensor(-0.0083), tensor(-0.0886), tensor(-0.3733)] tensor(False) tensor(140361096587536) tensor(2) tensor(13)
100 [tensor(0.0209), tensor(0.5806), tensor(-0.1302), tensor(-1.3390)] tensor(False) tensor(140361096587472) tensor(2) tensor(16)
110 [tensor(0.0137), tensor(-0.0230), tensor(-0.0459), tensor(-0.0483)] tensor(False) tensor(140361096587920) tensor(3) tensor(1)
120 [tensor(0.0353), tensor(0.3702), tensor(-0.0866), tensor(-0.7006)] tensor(False) tensor(140361096587536) tensor(3) tensor(5)
130 [tensor(0.0459), tensor(-0.2106), tensor(-0.1129), tensor(0.0792)] tensor(False) tensor(140361096587472) tensor(3) tensor(8)
140 [tensor(0.0217), tensor(-0.4009), tensor(-0.0929), tensor(0.2661)] tensor(False) tensor(140361096587920) tensor(3) tensor(11)
150 [tensor(0.0132), tensor(0.3842), tensor(-0.1099), tensor(-1.0139)] tensor(False) tensor(140361096587536) tensor(3) tensor(15)
160 [tensor(0.0480), tensor(0.9737), tensor(-0.1904), tensor(-2.0066)] tensor(True) tensor(140361096587472) tensor(3) tensor(18)

All of the step groups should be the same length…

group_sz = None
for name,group in gsteps.items():
    if group_sz is None: group_sz = len(group)
    else:                assert len(group)==group_sz,f' Got lengths {len(group)} and {group_sz} for {name}.\n\n{group}'

Each step group’s state and next_states should match across envs…

group_sz = None
for name,group in gsteps.items():
    e1 = group[0]
    for other in group[1:]: test_eq(e1.state,other.state)
    for other in group[1:]: test_eq(e1.next_state,other.next_state)

Each step group value should not show up/be duplicated in any other step groups…

group_sz = None
for name,group in gsteps.items():
    e1 = group[0]
    for other_name,other_group in gsteps.items():
        if other_name==name: continue
        for other in other_group[1:]: test_ne(e1.state,other.state)
        for other in other_group[1:]: test_ne(e1.next_state,other.next_state)

Given 3 envs, single steps, epsiodes of 18 steps in len, 3 episodes each, run for 162 iterations, we should expect there to be 9 dones.

test_eq(sum([o.terminated for o in steps]),tensor([9]))

The max episode numbers for each env should sum to 9 where for each env, it should reach and finish 3 episodes…

gsteps = groupby(steps,lambda o:int(o.env_id))
test_len(gsteps.keys(),3)
env1,env2,env3 = L(gsteps.values()).map(L).map(Self.map(Self.episode_n()).map(int))
test_eq(max(env1)+max(env2)+max(env3),9)
envs = ['CartPole-v1']*10

pipe = dp.map.Mapper(envs)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle(count=(18*len(envs))) 
# Turn off the seed so that some envs end before others...
pipe = GymStepper(pipe,synchronized_reset=True)
steps = list(pipe)

Since the seed is turned off the only properties we are to expect are:

- If an env finishes, no steps from that env should be seen until all 9 of the other envs finish
def synchronized_reset_checker(steps):
    env_id_done_tracker = {}
    did_syncs_happen = False
    for d,env_id,idx in [(bool(o.terminated),int(o.env_id),i) for i,o in enumerate(steps)]:

        if d: 
            env_id_done_tracker[env_id] = idx
            continue

        if env_id in env_id_done_tracker:
            if len(env_id_done_tracker)!=len(envs):
                raise Exception(f'env_id {env_id} was iterated through when it should not have been! idx: {idx}')
        if len(env_id_done_tracker)==len(envs):
            did_syncs_happen = True
            env_id_done_tracker = {}

    if not did_syncs_happen: 
        raise Exception('There should have at least been 1 time where all the envs had to reset, which did not happen.')
synchronized_reset_checker(steps)

For sanity, we should expect that without synchronized_reset envs will be reset and stepped through before other envs are reset, synchronized_reset_checker should fail.

pipe = dp.map.Mapper(envs)
pipe = TypeTransformer(pipe,[GymTypeTransform])
pipe = dp.iter.MapToIterConverter(pipe)
pipe = dp.iter.InMemoryCacheHolder(pipe)
pipe = pipe.cycle(count=(18*len(envs))) 
# Turn off the seed so that some envs end before others...
pipe = GymStepper(pipe)
steps = list(pipe)
with ExceptionExpected(regex='was iterated through when it should not have been'):
    synchronized_reset_checker(steps)

GymTransformBlock

 GymTransformBlock (agent:Union[torch.utils.data.datapipes.datapipe.IterDa
                    taPipe,torch.utils.data.datapipes.datapipe.MapDataPipe
                    ], seed:Union[int,NoneType]=None, nsteps:int=1,
                    nskips:int=1, firstlast:bool=False,
                    type_tfms:Union[List[Callable],NoneType]=None,
                    item_tfms:Union[List[Callable],NoneType]=None,
                    batch_tfms:Union[List[Callable],NoneType]=None,
                    bs:int=1, n:Union[int,NoneType]=None,
                    synchronized_reset:bool=False,
                    include_images:bool=False,
                    terminate_on_truncation:bool=True, dp_augmentation_fns
                    :Tuple[fastrl.pipes.core.DataPipeAugmentationFn]=None)

Basic OpenAi gym DataPipeGraph with first-last, nstep, and nskip capability

Type Default Details
agent typing.Union[torch.utils.data.datapipes.datapipe.IterDataPipe, torch.utils.data.datapipes.datapipe.MapDataPipe] An AgentHead
seed typing.Union[int, NoneType] None The seed for the gym to use
nsteps int 1 Used by NStepper, outputs tuples / chunks of assiciated steps
nskips int 1 Used by NSkipper to skip a certain number of steps (agent still gets called for each)
firstlast bool False Whether when nsteps>1 to merge it into a single StepType
type_tfms typing.Union[typing.List[typing.Callable], NoneType] None Functions to run once, at the beginning of the pipeline
item_tfms typing.Union[typing.List[typing.Callable], NoneType] None Functions to run over individual steps before batching
batch_tfms typing.Union[typing.List[typing.Callable], NoneType] None Functions to run over batches (as specified by bs)
bs int 1 The batch size, which is different from nsteps in that firstlast will be
run prior to batching, and a batch of steps might come from multiple envs,
where nstep is associated with a single env
n typing.Union[int, NoneType] None The prefered default is for the pipeline to be infinate, and the learner
decides how much to iter. If this is not None, then the pipeline will run for
that number of n
synchronized_reset bool False Whether to reset all the envs at the same time as opposed to reseting them
the moment an episode ends.
include_images bool False Should be used only for validation / logging, will grab a render of the gym
and assign to the StepType image field. This data should not be used for training.
If it images are needed for training, then you should wrap the env instead.
terminate_on_truncation bool True If an environment truncates, terminate it.
dp_augmentation_fns typing.Tuple[fastrl.pipes.core.DataPipeAugmentationFn] None Additional pipelines to insert, replace, remove
Returns None
pd.set_option('display.max_rows', 50)

envs = ['CartPole-v1']*3
block = GymTransformBlock(None,nsteps=2,nskips=2,firstlast=True,bs=1,n=100)
pipes = block(envs)
pd.DataFrame([o[0] for o in pipes])[['state','action','terminated','reward']][:50]
state action terminated reward
0 [tensor(-0.0067), tensor(-0.0143), tensor(-0.0049), tensor(-0.0393)] tensor(0.) tensor(False) tensor(1.9900)
1 [tensor(-0.0420), tensor(0.0171), tensor(-0.0034), tensor(0.0493)] tensor(0.) tensor(False) tensor(1.9900)
2 [tensor(-0.0061), tensor(0.0313), tensor(-0.0255), tensor(-0.0405)] tensor(0.) tensor(False) tensor(1.9900)
3 [tensor(-0.0070), tensor(-0.2094), tensor(-0.0057), tensor(0.2518)] tensor(1.) tensor(False) tensor(1.9900)
4 [tensor(-0.0417), tensor(-0.1780), tensor(-0.0025), tensor(0.3409)] tensor(0.) tensor(False) tensor(1.9900)
5 [tensor(-0.0055), tensor(-0.1635), tensor(-0.0263), tensor(0.2440)] tensor(0.) tensor(False) tensor(1.9900)
6 [tensor(-0.0114), tensor(0.1810), tensor(-0.0015), tensor(-0.3355)] tensor(1.) tensor(False) tensor(1.9900)
7 [tensor(-0.0527), tensor(-0.5682), tensor(0.0170), tensor(0.9269)] tensor(0.) tensor(False) tensor(1.9900)
8 [tensor(-0.0159), tensor(-0.1628), tensor(-0.0109), tensor(0.2289)] tensor(1.) tensor(False) tensor(1.9900)
9 [tensor(-0.0003), tensor(0.5714), tensor(-0.0208), tensor(-0.9239)] tensor(0.) tensor(False) tensor(1.9900)
10 [tensor(-0.0794), tensor(-0.5689), tensor(0.0601), tensor(0.9435)] tensor(1.) tensor(False) tensor(1.9900)
11 [tensor(-0.0185), tensor(-0.1626), tensor(-0.0076), tensor(0.2235)] tensor(1.) tensor(False) tensor(1.9900)
12 [tensor(0.0187), tensor(0.1820), tensor(-0.0520), tensor(-0.3578)] tensor(1.) tensor(False) tensor(1.9900)
13 [tensor(-0.0982), tensor(-0.1807), tensor(0.0923), tensor(0.4035)] tensor(1.) tensor(False) tensor(1.9900)
14 [tensor(-0.0211), tensor(-0.1624), tensor(-0.0046), tensor(0.2201)] tensor(0.) tensor(False) tensor(1.9900)
15 [tensor(0.0299), tensor(0.5737), tensor(-0.0725), tensor(-0.9771)] tensor(1.) tensor(False) tensor(1.9900)
16 [tensor(-0.1016), tensor(0.2065), tensor(0.1032), tensor(-0.1181)] tensor(0.) tensor(False) tensor(1.9900)
17 [tensor(-0.0315), tensor(-0.1623), tensor(0.0101), tensor(0.2186)] tensor(1.) tensor(False) tensor(1.9900)
18 [tensor(0.0567), tensor(0.9659), tensor(-0.1179), tensor(-1.6117)] tensor(0.) tensor(False) tensor(1.9900)
19 [tensor(-0.0973), tensor(0.2036), tensor(0.1050), tensor(-0.0540)] tensor(1.) tensor(False) tensor(1.9900)
20 [tensor(-0.0341), tensor(-0.1627), tensor(0.0130), tensor(0.2263)] tensor(0.) tensor(False) tensor(1.9900)
21 [tensor(0.0915), tensor(0.5794), tensor(-0.1773), tensor(-1.1158)] tensor(0.) tensor(True) tensor(1.9900)
22 [tensor(0.1031), tensor(0.3870), tensor(-0.1996), tensor(-0.8836)] tensor(1.) tensor(True) tensor(1.)
23 [tensor(-0.0852), tensor(0.5906), tensor(0.0977), tensor(-0.5700)] tensor(0.) tensor(False) tensor(1.9900)
24 [tensor(-0.0445), tensor(-0.1631), tensor(0.0280), tensor(0.2360)] tensor(0.) tensor(False) tensor(1.9900)
25 [tensor(-0.0385), tensor(0.0387), tensor(0.0470), tensor(-0.0146)] tensor(1.) tensor(False) tensor(1.9900)
26 [tensor(-0.0655), tensor(0.5880), tensor(0.0813), tensor(-0.5125)] tensor(0.) tensor(False) tensor(1.9900)
27 [tensor(-0.0549), tensor(-0.1640), tensor(0.0435), tensor(0.2552)] tensor(1.) tensor(False) tensor(1.9900)
28 [tensor(-0.0330), tensor(0.4275), tensor(0.0409), tensor(-0.5697)] tensor(0.) tensor(False) tensor(1.9900)
29 [tensor(-0.0459), tensor(0.5859), tensor(0.0671), tensor(-0.4648)] tensor(0.) tensor(False) tensor(1.9900)
30 [tensor(-0.0576), tensor(-0.1653), tensor(0.0481), tensor(0.2841)] tensor(0.) tensor(False) tensor(1.9900)
31 [tensor(-0.0198), tensor(0.4266), tensor(0.0242), tensor(-0.5476)] tensor(0.) tensor(False) tensor(1.9900)
32 [tensor(-0.0264), tensor(0.1940), tensor(0.0548), tensor(0.1586)] tensor(0.) tensor(False) tensor(1.9900)
33 [tensor(-0.0681), tensor(-0.1667), tensor(0.0656), tensor(0.3163)] tensor(0.) tensor(False) tensor(1.9900)
34 [tensor(-0.0067), tensor(0.4260), tensor(0.0083), tensor(-0.5359)] tensor(1.) tensor(False) tensor(1.9900)
35 [tensor(-0.0226), tensor(0.1924), tensor(0.0673), tensor(0.1942)] tensor(1.) tensor(False) tensor(1.9900)
36 [tensor(-0.0787), tensor(-0.1687), tensor(0.0845), tensor(0.3598)] tensor(0.) tensor(False) tensor(1.9900)
37 [tensor(0.0143), tensor(0.8162), tensor(-0.0189), tensor(-1.1194)] tensor(0.) tensor(False) tensor(1.9900)
38 [tensor(-0.0110), tensor(0.1904), tensor(0.0697), tensor(0.2378)] tensor(0.) tensor(False) tensor(1.9900)
39 [tensor(-0.0894), tensor(-0.1712), tensor(0.1053), tensor(0.4154)] tensor(1.) tensor(False) tensor(1.9900)
40 [tensor(0.0430), tensor(0.8170), tensor(-0.0579), tensor(-1.1380)] tensor(0.) tensor(False) tensor(1.9900)
41 [tensor(-0.0073), tensor(0.1884), tensor(0.0855), tensor(0.2833)] tensor(1.) tensor(False) tensor(1.9900)
42 [tensor(-0.0924), tensor(0.2157), tensor(0.1167), tensor(-0.0971)] tensor(0.) tensor(False) tensor(1.9900)
43 [tensor(0.0718), tensor(0.4287), tensor(-0.0980), tensor(-0.5978)] tensor(0.) tensor(False) tensor(1.9900)
44 [tensor(0.0041), tensor(0.5759), tensor(0.0915), tensor(-0.2439)] tensor(0.) tensor(False) tensor(1.9900)
45 [tensor(-0.0877), tensor(0.2124), tensor(0.1194), tensor(-0.0244)] tensor(0.) tensor(False) tensor(1.9900)
46 [tensor(0.0851), tensor(0.4316), tensor(-0.1167), tensor(-0.6628)] tensor(0.) tensor(False) tensor(1.9900)
47 [tensor(0.0232), tensor(0.5734), tensor(0.0882), tensor(-0.1879)] tensor(0.) tensor(False) tensor(1.9900)
48 [tensor(-0.0831), tensor(-0.1808), tensor(0.1250), tensor(0.6311)] tensor(0.) tensor(False) tensor(1.9900)
49 [tensor(0.0985), tensor(0.4350), tensor(-0.1381), tensor(-0.7397)] tensor(0.) tensor(False) tensor(1.9900)
pd.set_option('display.max_rows', 50)

envs = ['CartPole-v1']*3
block = GymTransformBlock(None,nsteps=1,nskips=1,firstlast=True,bs=1,n=100)
pipes = block(envs)

pd.DataFrame([o[0] for o in pipes])[['state','action','terminated','reward']][:50]
state action terminated reward
0 [tensor(0.0350), tensor(-0.0038), tensor(0.0399), tensor(-0.0427)] tensor(0.) tensor(False) tensor(1.)
1 [tensor(0.0178), tensor(0.0382), tensor(0.0337), tensor(0.0479)] tensor(0.) tensor(False) tensor(1.)
2 [tensor(0.0076), tensor(-0.0453), tensor(-0.0180), tensor(0.0093)] tensor(0.) tensor(False) tensor(1.)
3 [tensor(0.0350), tensor(-0.1995), tensor(0.0391), tensor(0.2623)] tensor(1.) tensor(False) tensor(1.)
4 [tensor(0.0186), tensor(-0.1574), tensor(0.0346), tensor(0.3510)] tensor(1.) tensor(False) tensor(1.)
5 [tensor(0.0067), tensor(-0.2402), tensor(-0.0178), tensor(0.2963)] tensor(1.) tensor(False) tensor(1.)
6 [tensor(0.0310), tensor(-0.0050), tensor(0.0443), tensor(-0.0178)] tensor(0.) tensor(False) tensor(1.)
7 [tensor(0.0154), tensor(0.0372), tensor(0.0416), tensor(0.0694)] tensor(0.) tensor(False) tensor(1.)
8 [tensor(0.0019), tensor(-0.0448), tensor(-0.0119), tensor(-0.0020)] tensor(1.) tensor(False) tensor(1.)
9 [tensor(0.0309), tensor(-0.2007), tensor(0.0440), tensor(0.2885)] tensor(0.) tensor(False) tensor(1.)
10 [tensor(0.0162), tensor(-0.1585), tensor(0.0430), tensor(0.3749)] tensor(0.) tensor(False) tensor(1.)
11 [tensor(0.0010), tensor(0.1505), tensor(-0.0119), tensor(-0.2984)] tensor(1.) tensor(False) tensor(1.)
12 [tensor(0.0269), tensor(-0.3964), tensor(0.0498), tensor(0.5948)] tensor(1.) tensor(False) tensor(1.)
13 [tensor(0.0130), tensor(-0.3542), tensor(0.0505), tensor(0.6808)] tensor(1.) tensor(False) tensor(1.)
14 [tensor(0.0040), tensor(0.3458), tensor(-0.0179), tensor(-0.5948)] tensor(0.) tensor(False) tensor(1.)
15 [tensor(0.0189), tensor(-0.2020), tensor(0.0616), tensor(0.3182)] tensor(0.) tensor(False) tensor(1.)
16 [tensor(0.0059), tensor(-0.1598), tensor(0.0641), tensor(0.4045)] tensor(1.) tensor(False) tensor(1.)
17 [tensor(0.0109), tensor(0.1509), tensor(-0.0298), tensor(-0.3078)] tensor(0.) tensor(False) tensor(1.)
18 [tensor(0.0149), tensor(-0.3980), tensor(0.0680), tensor(0.6296)] tensor(0.) tensor(False) tensor(1.)
19 [tensor(0.0027), tensor(0.0343), tensor(0.0722), tensor(0.1327)] tensor(1.) tensor(False) tensor(1.)
20 [tensor(0.0139), tensor(-0.0438), tensor(-0.0360), tensor(-0.0247)] tensor(0.) tensor(False) tensor(1.)
21 [tensor(0.0069), tensor(-0.5940), tensor(0.0806), tensor(0.9429)] tensor(0.) tensor(False) tensor(1.)
22 [tensor(0.0034), tensor(0.2283), tensor(0.0749), tensor(-0.1364)] tensor(0.) tensor(False) tensor(1.)
23 [tensor(0.0131), tensor(-0.2384), tensor(-0.0365), tensor(0.2564)] tensor(1.) tensor(False) tensor(1.)
24 [tensor(-0.0049), tensor(-0.7901), tensor(0.0995), tensor(1.2598)] tensor(1.) tensor(False) tensor(1.)
25 [tensor(0.0080), tensor(0.0322), tensor(0.0721), tensor(0.1790)] tensor(0.) tensor(False) tensor(1.)
26 [tensor(0.0083), tensor(-0.0428), tensor(-0.0313), tensor(-0.0475)] tensor(0.) tensor(False) tensor(1.)
27 [tensor(-0.0207), tensor(-0.5964), tensor(0.1247), tensor(0.9999)] tensor(0.) tensor(False) tensor(1.)
28 [tensor(0.0086), tensor(-0.1638), tensor(0.0757), tensor(0.4935)] tensor(1.) tensor(False) tensor(1.)
29 [tensor(0.0074), tensor(-0.2374), tensor(-0.0323), tensor(0.2351)] tensor(0.) tensor(False) tensor(1.)
30 [tensor(-0.0327), tensor(-0.7929), tensor(0.1447), tensor(1.3290)] tensor(1.) tensor(False) tensor(1.)
31 [tensor(0.0053), tensor(0.0301), tensor(0.0856), tensor(0.2256)] tensor(1.) tensor(False) tensor(1.)
32 [tensor(0.0027), tensor(-0.4321), tensor(-0.0276), tensor(0.5174)] tensor(1.) tensor(False) tensor(1.)
33 [tensor(-0.0485), tensor(-0.5999), tensor(0.1712), tensor(1.0848)] tensor(0.) tensor(False) tensor(1.)
34 [tensor(0.0059), tensor(0.2239), tensor(0.0901), tensor(-0.0389)] tensor(0.) tensor(False) tensor(1.)
35 [tensor(-0.0060), tensor(-0.2366), tensor(-0.0172), tensor(0.2162)] tensor(0.) tensor(False) tensor(1.)
36 [tensor(-0.0605), tensor(-0.7968), tensor(0.1929), tensor(1.4260)] tensor(1.) tensor(True) tensor(1.)
37 [tensor(0.0104), tensor(0.0276), tensor(0.0893), tensor(0.2808)] tensor(0.) tensor(False) tensor(1.)
38 [tensor(-0.0107), tensor(-0.4314), tensor(-0.0129), tensor(0.5034)] tensor(0.) tensor(False) tensor(1.)
39 [tensor(-0.0243), tensor(-0.0176), tensor(0.0349), tensor(-0.0015)] tensor(1.) tensor(False) tensor(1.)
40 [tensor(0.0110), tensor(-0.1686), tensor(0.0950), tensor(0.6003)] tensor(1.) tensor(False) tensor(1.)
41 [tensor(-0.0193), tensor(-0.6264), tensor(-0.0028), tensor(0.7920)] tensor(0.) tensor(False) tensor(1.)
42 [tensor(-0.0246), tensor(0.1771), tensor(0.0349), tensor(-0.2830)] tensor(0.) tensor(False) tensor(1.)
43 [tensor(0.0076), tensor(0.0250), tensor(0.1070), tensor(0.3390)] tensor(1.) tensor(False) tensor(1.)
44 [tensor(-0.0318), tensor(-0.8215), tensor(0.0130), tensor(1.0838)] tensor(0.) tensor(False) tensor(1.)
45 [tensor(-0.0211), tensor(-0.0185), tensor(0.0292), tensor(0.0205)] tensor(0.) tensor(False) tensor(1.)
46 [tensor(0.0081), tensor(0.2185), tensor(0.1137), tensor(0.0818)] tensor(1.) tensor(False) tensor(1.)
47 [tensor(-0.0483), tensor(-1.0168), tensor(0.0347), tensor(1.3805)] tensor(1.) tensor(False) tensor(1.)
48 [tensor(-0.0215), tensor(-0.2141), tensor(0.0296), tensor(0.3223)] tensor(0.) tensor(False) tensor(1.)
49 [tensor(0.0125), tensor(0.4118), tensor(0.1154), tensor(-0.1729)] tensor(1.) tensor(False) tensor(1.)
envs = ['CartPole-v1']*3
block = GymTransformBlock(None,nsteps=2,nskips=1,firstlast=True,bs=1,n=100)
pipes = block(envs)
pd.DataFrame([o[0] for o in pipes])[['state','action','terminated','reward']][:50]
state action terminated reward
0 [tensor(-0.0313), tensor(0.0204), tensor(-0.0340), tensor(-0.0372)] tensor(1.) tensor(False) tensor(1.9900)
1 [tensor(-0.0237), tensor(-0.0383), tensor(-0.0466), tensor(0.0060)] tensor(1.) tensor(False) tensor(1.9900)
2 [tensor(-0.0127), tensor(-0.0256), tensor(-0.0099), tensor(-0.0028)] tensor(0.) tensor(False) tensor(1.9900)
3 [tensor(-0.0309), tensor(0.2160), tensor(-0.0347), tensor(-0.3404)] tensor(0.) tensor(False) tensor(1.9900)
4 [tensor(-0.0245), tensor(0.1574), tensor(-0.0465), tensor(-0.3010)] tensor(0.) tensor(False) tensor(1.9900)
5 [tensor(-0.0132), tensor(-0.2206), tensor(-0.0100), tensor(0.2867)] tensor(0.) tensor(False) tensor(1.9900)
6 [tensor(-0.0266), tensor(0.0214), tensor(-0.0415), tensor(-0.0589)] tensor(0.) tensor(False) tensor(1.9900)
7 [tensor(-0.0213), tensor(-0.0370), tensor(-0.0525), tensor(-0.0234)] tensor(0.) tensor(False) tensor(1.9900)
8 [tensor(-0.0176), tensor(-0.4156), tensor(-0.0043), tensor(0.5762)] tensor(0.) tensor(False) tensor(1.9900)
9 [tensor(-0.0262), tensor(-0.1731), tensor(-0.0427), tensor(0.2204)] tensor(0.) tensor(False) tensor(1.9900)
10 [tensor(-0.0221), tensor(-0.2314), tensor(-0.0530), tensor(0.2523)] tensor(1.) tensor(False) tensor(1.9900)
11 [tensor(-0.0260), tensor(-0.6106), tensor(0.0073), tensor(0.8676)] tensor(0.) tensor(False) tensor(1.9900)
12 [tensor(-0.0296), tensor(-0.3676), tensor(-0.0383), tensor(0.4993)] tensor(0.) tensor(False) tensor(1.9900)
13 [tensor(-0.0267), tensor(-0.0355), tensor(-0.0479), tensor(-0.0566)] tensor(0.) tensor(False) tensor(1.9900)
14 [tensor(-0.0382), tensor(-0.8058), tensor(0.0246), tensor(1.1625)] tensor(1.) tensor(False) tensor(1.9900)
15 [tensor(-0.0370), tensor(-0.5622), tensor(-0.0283), tensor(0.7797)] tensor(0.) tensor(False) tensor(1.9900)
16 [tensor(-0.0274), tensor(-0.2299), tensor(-0.0491), tensor(0.2205)] tensor(0.) tensor(False) tensor(1.9900)
17 [tensor(-0.0543), tensor(-0.6111), tensor(0.0479), tensor(0.8777)] tensor(0.) tensor(False) tensor(1.9900)
18 [tensor(-0.0482), tensor(-0.7569), tensor(-0.0127), tensor(1.0633)] tensor(1.) tensor(False) tensor(1.9900)
19 [tensor(-0.0320), tensor(-0.4243), tensor(-0.0447), tensor(0.4973)] tensor(1.) tensor(False) tensor(1.9900)
20 [tensor(-0.0665), tensor(-0.8068), tensor(0.0654), tensor(1.1850)] tensor(0.) tensor(False) tensor(1.9900)
21 [tensor(-0.0634), tensor(-0.5616), tensor(0.0086), tensor(0.7667)] tensor(1.) tensor(False) tensor(1.9900)
22 [tensor(-0.0405), tensor(-0.2286), tensor(-0.0347), tensor(0.1909)] tensor(1.) tensor(False) tensor(1.9900)
23 [tensor(-0.0826), tensor(-1.0027), tensor(0.0891), tensor(1.4975)] tensor(1.) tensor(False) tensor(1.9900)
24 [tensor(-0.0746), tensor(-0.3666), tensor(0.0239), tensor(0.4767)] tensor(1.) tensor(False) tensor(1.9900)
25 [tensor(-0.0451), tensor(-0.0330), tensor(-0.0309), tensor(-0.1125)] tensor(1.) tensor(False) tensor(1.9900)
26 [tensor(-0.1027), tensor(-0.8088), tensor(0.1191), tensor(1.2339)] tensor(0.) tensor(False) tensor(1.9900)
27 [tensor(-0.0819), tensor(-0.1718), tensor(0.0334), tensor(0.1917)] tensor(1.) tensor(False) tensor(1.9900)
28 [tensor(-0.0457), tensor(0.1626), tensor(-0.0332), tensor(-0.4148)] tensor(0.) tensor(False) tensor(1.9900)
29 [tensor(-0.1189), tensor(-1.0052), tensor(0.1438), tensor(1.5614)] tensor(0.) tensor(True) tensor(1.9900)
30 [tensor(-0.1390), tensor(-1.2017), tensor(0.1750), tensor(1.8952)] tensor(1.) tensor(True) tensor(1.)
31 [tensor(-0.0854), tensor(0.0228), tensor(0.0373), tensor(-0.0903)] tensor(1.) tensor(False) tensor(1.9900)
32 [tensor(-0.0425), tensor(-0.0321), tensor(-0.0415), tensor(-0.1327)] tensor(0.) tensor(False) tensor(1.9900)
33 [tensor(-0.0849), tensor(0.2174), tensor(0.0355), tensor(-0.3710)] tensor(0.) tensor(False) tensor(1.9900)
34 [tensor(-0.0431), tensor(-0.2266), tensor(-0.0441), tensor(0.1466)] tensor(0.) tensor(False) tensor(1.9900)
35 [tensor(0.0108), tensor(0.0430), tensor(-0.0169), tensor(-0.0207)] tensor(1.) tensor(False) tensor(1.9900)
36 [tensor(-0.0806), tensor(0.0218), tensor(0.0280), tensor(-0.0674)] tensor(1.) tensor(False) tensor(1.9900)
37 [tensor(-0.0477), tensor(-0.4210), tensor(-0.0412), tensor(0.4250)] tensor(1.) tensor(False) tensor(1.9900)
38 [tensor(0.0116), tensor(0.2383), tensor(-0.0174), tensor(-0.3187)] tensor(0.) tensor(False) tensor(1.9900)
39 [tensor(-0.0801), tensor(0.2165), tensor(0.0267), tensor(-0.3511)] tensor(0.) tensor(False) tensor(1.9900)
40 [tensor(-0.0561), tensor(-0.2254), tensor(-0.0327), tensor(0.1197)] tensor(0.) tensor(False) tensor(1.9900)
41 [tensor(0.0164), tensor(0.0435), tensor(-0.0237), tensor(-0.0315)] tensor(1.) tensor(False) tensor(1.9900)
42 [tensor(-0.0758), tensor(0.0210), tensor(0.0197), tensor(-0.0501)] tensor(0.) tensor(False) tensor(1.9900)
43 [tensor(-0.0606), tensor(-0.4200), tensor(-0.0303), tensor(0.4019)] tensor(1.) tensor(False) tensor(1.9900)
44 [tensor(0.0173), tensor(0.2389), tensor(-0.0244), tensor(-0.3316)] tensor(0.) tensor(False) tensor(1.9900)
45 [tensor(-0.0754), tensor(-0.1744), tensor(0.0187), tensor(0.2487)] tensor(0.) tensor(False) tensor(1.9900)
46 [tensor(-0.0690), tensor(-0.2245), tensor(-0.0222), tensor(0.0998)] tensor(0.) tensor(False) tensor(1.9900)
47 [tensor(0.0220), tensor(0.0442), tensor(-0.0310), tensor(-0.0467)] tensor(0.) tensor(False) tensor(1.9900)
48 [tensor(-0.0789), tensor(-0.3698), tensor(0.0236), tensor(0.5472)] tensor(1.) tensor(False) tensor(1.9900)
49 [tensor(-0.0735), tensor(-0.4193), tensor(-0.0202), tensor(0.3854)] tensor(1.) tensor(False) tensor(1.9900)

Multi Processing

import torch
import torchdata.datapipes as dp
from torch.utils.data.dataloader_experimental import DataLoader2
       
class PointlessLoop(dp.iter.IterDataPipe):
    def __init__(self,datapipe=None):
        self.datapipe = datapipe
    
    def __iter__(self):
        while True:
            yield torch.LongTensor(4).detach().clone()
            

if __name__=='__main__':
    from torch.multiprocessing import Pool, Process, set_start_method
    try:
         set_start_method('spawn')
    except RuntimeError:
        pass


    pipe = PointlessLoop()
    pipe = pipe.header(limit=10)
    dls = [DataLoader2(pipe,num_workers=1)]
    # Setup the Learner
    print('type: ',type(dls[0]))
    for o in dls[0]:
        print(o)
Overwriting ../external_run_scripts/spawn_multiproc.py
pass
type:  <class 'torch.utils.data.dataloader.DataLoader'>
tensor([[139884568362288,  94350800652960, 139882444517456,   5896925594232]])
tensor([[     139884568362656,       94350809486752, -2738188573198908631,
          6879089491455928577]])
tensor([[139882428343472, 139882428343504, 139882428343536, 139882428343568]])
tensor([[139884568362144, 139884568362144,               0,               0]])
tensor([[139884568362144, 139884568362144,               0,               0]])
tensor([[    139884568362144,     139884568362144, 7575175955194277983,
          737013014016696435]])
tensor([[    139884568362304,     139884568362304, 8297992743086744941,
         7022364571614997605]])
tensor([[8247626271654158368, 7954879166212546670, 7957695015157983604,
         2318341632102134867]])
tensor([[             0,              0, 94350811801232, 94350790702720]])
tensor([[1, 4, 0, 0]])