Hyperparameters

HyperParameter classes
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

source

HyperParamDPG

 HyperParamDPG (BatchSize:int=4, NStates:int=90, NActions:int=68,
                ActionBias:float=0.0, NLayerActor:int=2,
                NLayerCritic:int=2, Gamma:float=0.99,
                TauActor:float=0.005, TauCritic:float=0.005,
                ActorLR:float=0.001, CriticLR:float=0.002,
                CkptInterval:int=5)

*Generic Hyperparameters for the RL agent

Attributes:

- BatchSize: int = 4  # batch size for training
- NStates: int = (
    default_truck.observation_numel
)  # number of states in the state space
- NActions: int = (
    default_truck.torque_flash_numel
)  # number of actions in the action space
- ActionBias: float = 0.0  # bias for action output
- NLayerActor: int = 2  # number of layers for the actor network
- NLayerCritic: int = 2  # number of layers for the critic network
- Gamma: float = 0.99  # Gamma value for RL discount
- TauActor: float = 0.005  # Tau value for Polyak averaging for the actor network
- TauCritic: float = 0.005  # Tau value for Polyak averaging for the actor network
- ActorLR: float = 0.001  # learning rate for the actor network
- CriticLR: float = 0.002  # learning rate for the critic network
- CkptInterval: int = 5  # checkpoint interval*

source

HyperParamDDPG

 HyperParamDDPG (BatchSize:int=4, NStates:int=90, NActions:int=68,
                 ActionBias:float=0.0, NLayerActor:int=2,
                 NLayerCritic:int=2, Gamma:float=0.99,
                 TauActor:float=0.005, TauCritic:float=0.005,
                 ActorLR:float=0.001, CriticLR:float=0.002,
                 CkptInterval:int=5,
                 CriticStateInputDenseDimension1:int=16,
                 CriticStateInputDenseDimension2:int=32,
                 CriticActionInputDenseDimension:int=32,
                 CriticOutputDenseDimension1:int=256,
                 CriticOutputDenseDimension2:int=256,
                 ActorInputDenseDimension1:int=256,
                 ActorInputDenseDimension2:int=256)

*Hyperparameters for the DDPG agent

Attributes:

- CriticStateInputDenseDimension1: int = (
    16  # output dimension for the state input (first) Dense layer
)
- CriticStateInputDenseDimension2: int = (
    32  # output dimension for the state input second Dense layer
)
- CriticActionInputDenseDimension: int = (
    32  # output dimension for the action input Dense layer
)
- CriticOutputDenseDimension1: int = (
    256  # output dimension for the first critic output Dense layer
)
- CriticOutputDenseDimension2: int = (
    256  # output dimension for the second critic output Dense layer
)
- ActorInputDenseDimension1: int = (
    256  # output dimension for the first actor input Dense layer
)
- ActorInputDenseDimension2: int = (
    256  # output dimension for the second actor input Dense layer
)*

source

HyperParamIDQL

 HyperParamIDQL (BatchSize:int=4, NStates:int=90, NActions:int=68,
                 ActionBias:float=0.0, NLayerActor:int=2,
                 NLayerCritic:int=2, Gamma:float=0.99,
                 TauActor:float=0.005, TauCritic:float=0.005,
                 ActorLR:float=0.001, CriticLR:float=0.002,
                 CkptInterval:int=5,
                 CriticStateInputDenseDimension1:int=16,
                 CriticStateInputDenseDimension2:int=32,
                 CriticActionInputDenseDimension:int=32,
                 CriticOutputDenseDimension1:int=256,
                 CriticOutputDenseDimension2:int=256,
                 ActorInputDenseDimension1:int=256,
                 ActorInputDenseDimension2:int=256)

*Hyperparameters for the DDPG agent

Attributes:

- CriticStateInputDenseDimension1: int = (
    16  # output dimension for the state input (first) Dense layer
)
- CriticStateInputDenseDimension2: int = (
    32  # output dimension for the state input second Dense layer
)
- CriticActionInputDenseDimension: int = (
    32  # output dimension for the action input Dense layer
)
- CriticOutputDenseDimension1: int = (
    256  # output dimension for the first critic output Dense layer
)
- CriticOutputDenseDimension2: int = (
    256  # output dimension for the second critic output Dense layer
)
- ActorInputDenseDimension1: int = (
    256  # output dimension for the first actor input Dense layer
)
- ActorInputDenseDimension2: int = (
    256  # output dimension for the second actor input Dense layer
)*

source

HyperParamRDPG

 HyperParamRDPG (BatchSize:int=4, NStates:int=90, NActions:int=68,
                 ActionBias:float=0.0, NLayerActor:int=2,
                 NLayerCritic:int=2, Gamma:float=0.99,
                 TauActor:float=0.005, TauCritic:float=0.005,
                 ActorLR:float=0.001, CriticLR:float=0.002,
                 CkptInterval:int=5, HiddenDimension:int=256,
                 PaddingValue:float=-10000, tbptt_k1:int=200,
                 tbptt_k2:int=200)

*Hyperparameters for the RDPG agent

Attributes:

- HiddenDimension: int = 256  # hidden unit number for the action input layer
- PaddingValue: float = (
    -10000
)  # padding value for the input, impossible value for observation, action or reward
- tbptt_k1: int = 200  # truncated backpropagation through time: forward steps,
- tbptt_k2: int = 200  # truncated backpropagation through time: backward steps*

source

HyperParamIDQL

 HyperParamIDQL (BatchSize:int=4, NStates:int=90, NActions:int=68,
                 ActionBias:float=0.0, NLayerActor:int=2,
                 NLayerCritic:int=2, Gamma:float=0.99,
                 TauActor:float=0.005, TauCritic:float=0.005,
                 ActorLR:float=0.001, CriticLR:float=0.002,
                 CkptInterval:int=5, HiddenDimension:int=256,
                 PaddingValue:float=-10000, tbptt_k1:int=200,
                 tbptt_k2:int=200)

*Hyperparameters for the IDQL agent

Attributes:

- HiddenDimension: int = 256  # hidden unit number for the action input layer
- PaddingValue: float = (
    -10000
)  # padding value for the input, impossible value for observation, action or reward
- tbptt_k1: int = 200  # truncated backpropagation through time: forward steps,
- tbptt_k2: int = 200  # truncated backpropagation through time: backward steps*