# Original author Abhinav Sagar in https://github.com/abhinavsagar/Reinforcement-Learning-Tutorial
# Since then deleted
# Copyright (c) 2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
from collections import deque
import numpy as np
import random
import copy
import pdb
[docs]class ReplayBuffer:
"""Constructs a buffer object that stores the past moves
and samples a set of subsamples"""
def __init__(self, buffer_size):
self.buffer_size = buffer_size
self.count = 0
self.buffer = deque()
[docs] def add(self, s, a, r, d, s2):
"""Add an experience to the buffer"""
# S represents current state, a is action,
# r is reward, d is whether it is the end,
# and s2 is next state
if np.any(~np.isfinite(s)) or np.any(~np.isfinite(s2)) or np.any(~np.isfinite(r)):
# TODO proper handling of infinite values somewhere !!!!
raise RuntimeError("Infinite value somwhere in at least one of the state")
experience = (s, a, r, d, s2)
experience = copy.deepcopy(experience)
if self.count < self.buffer_size:
self.buffer.append(experience)
self.count += 1
else:
self.buffer.popleft()
self.buffer.append(experience)
def size(self):
return self.count
[docs] def sample(self, batch_size):
"""Samples a total of elements equal to batch_size from buffer
if buffer contains enough elements. Otherwise return all elements"""
batch = []
if self.count < batch_size:
batch = random.sample(self.buffer, self.count)
else:
batch = random.sample(self.buffer, batch_size)
# Maps each experience in batch in batches of states, actions, rewards
# and new states
s_batch, a_batch, r_batch, d_batch, s2_batch = list(map(np.array, list(zip(*batch))))
return s_batch, a_batch, r_batch, d_batch, s2_batch
def clear(self):
self.buffer.clear()
self.count = 0