From 39584ab60ac3e7d5567273a973bba29823c917fc Mon Sep 17 00:00:00 2001 From: albert bou Date: Wed, 22 Nov 2023 15:37:27 +0100 Subject: [PATCH] fixes --- torchrl/objectives/value/advantages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchrl/objectives/value/advantages.py b/torchrl/objectives/value/advantages.py index 8e641dfb8c1..8f4c9c09ae8 100644 --- a/torchrl/objectives/value/advantages.py +++ b/torchrl/objectives/value/advantages.py @@ -821,7 +821,7 @@ def value_estimate( if self.average_rewards: reward = reward - reward.mean() - reward = reward / reward.std().clamp_min(1e-4) + reward = reward / reward.std().clamp_min(1e-5) tensordict.set( ("next", self.tensor_keys.reward), reward ) # we must update the rewards if they are used later in the code