Source code for l2rpn_baselines.utils.replayBuffer

# Original author Abhinav Sagar in https://github.com/abhinavsagar/Reinforcement-Learning-Tutorial
# Since then deleted

# Copyright (c) 2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

from collections import deque
import numpy as np
import random
import copy
import pdb


[docs]class ReplayBuffer: """Constructs a buffer object that stores the past moves and samples a set of subsamples""" def __init__(self, buffer_size): self.buffer_size = buffer_size self.count = 0 self.buffer = deque()
[docs] def add(self, s, a, r, d, s2): """Add an experience to the buffer""" # S represents current state, a is action, # r is reward, d is whether it is the end, # and s2 is next state if np.any(~np.isfinite(s)) or np.any(~np.isfinite(s2)) or np.any(~np.isfinite(r)): # TODO proper handling of infinite values somewhere !!!! raise RuntimeError("Infinite value somwhere in at least one of the state") experience = (s, a, r, d, s2) experience = copy.deepcopy(experience) if self.count < self.buffer_size: self.buffer.append(experience) self.count += 1 else: self.buffer.popleft() self.buffer.append(experience)
def size(self): return self.count
[docs] def sample(self, batch_size): """Samples a total of elements equal to batch_size from buffer if buffer contains enough elements. Otherwise return all elements""" batch = [] if self.count < batch_size: batch = random.sample(self.buffer, self.count) else: batch = random.sample(self.buffer, batch_size) # Maps each experience in batch in batches of states, actions, rewards # and new states s_batch, a_batch, r_batch, d_batch, s2_batch = list(map(np.array, list(zip(*batch)))) return s_batch, a_batch, r_batch, d_batch, s2_batch
def clear(self): self.buffer.clear() self.count = 0