Source code for world_models.reward.dreamer_v1_value

import torch
import torch.nn as nn
import torch.nn.functional as F


[docs] class ValueModel(nn.Module): """Estimate scalar value from Dreamer latent belief and state vectors. This MLP is trained on imagined returns and used for actor/value updates. """ def __init__( self, belief_size, state_size, hidden_size, activation_function="relu" ): super().__init__() self.act_fn = getattr(F, activation_function) self.fc1 = nn.Linear(belief_size + state_size, hidden_size) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, hidden_size) self.fc4 = nn.Linear(hidden_size, 1)
[docs] def forward(self, belief, state): x = torch.cat([belief, state], dim=1) hidden = self.act_fn(self.fc1(x)) hidden = self.act_fn(self.fc2(hidden)) hidden = self.act_fn(self.fc3(hidden)) reward = self.fc4(hidden).squeeze(dim=1) return reward