# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.
from abc import abstractmethod
import copy
from typing import List, Optional
from grid2op.Agent import BaseAgent
from grid2op.Observation import BaseObservation
from grid2op.Action import BaseAction
from l2rpn_baselines.utils.gymenv_custom import GymEnvWithHeuristics
[docs]class GymAgent(BaseAgent):
"""
This class maps a neural network (trained using ray / rllib or stable baselines for example
It can then be used as a "regular" grid2op agent, in a runner, grid2viz, grid2game etc.
It is also compatible with the "l2rpn baselines" interface.
Use it only with a trained agent. It does not provide the "save" method and
is not suitable for training.
.. note::
To load a previously saved agent the function `GymAgent.load` will be called
and you must provide the `nn_path` keyword argument.
To build a new agent, the function `GymAgent.build` is called and
you must provide the `nn_kwargs` keyword argument.
Examples
---------
Some examples of such agents are provided in the classes:
- :class:`l2rpn_baselines.PPO_SB3.PPO_SB3` that implements such an agent with the "stable baselines3" RL framework
- :class:`l2rpn_baselines.PPO_RLLIB.PPO_RLLIB` that implements such an agent with the "ray / rllib" RL framework
Both can benefit from the feature of this class, most notably the possibility to include "heuristics" (such as:
"if a powerline can be reconnected, do it" or "do not act if the grid is not in danger")
Notes
-----
The main goal of this class is to be able to use "heuristics" (both for training and at inference time) quite simply
and with out of the box support of external libraries.
All top performers in all l2rpn competitions (as of writing) used some kind of heuristics in their agent (such as:
"if a powerline can be reconnected, do it" or "do not act if the grid is not in danger"). This is why we made some
effort to develop a generic class that allows to train agents directly using these "heuristics".
This features is split in two parts:
- At training time, the "*heuristics*" are part of the environment. The agent will see only observations that are relevant
to it (and not the stat handled by the heuristic.)
- At inference time, the "*heuristics*" of the environment used to train the agent are included in the "agent.act" function.
If a heuristic has been used at training time, the agent will first "ask" the environment is a heuristic should be
performed on the grid (in this case it will do it) otherwise it will ask the underlying neural network what to do.
Some examples are provided in the "examples" code (under the "examples/ppo_stable_baselines") repository that
demonstrates the use of :class:`l2rpn_baselines.utils.GymEnvWithRecoWithDN` .
"""
def __init__(self,
g2op_action_space,
gym_act_space,
gym_obs_space,
*, # to prevent positional argument
nn_path=None,
nn_kwargs=None,
gymenv=None,
_check_both_set=True,
_check_none_set=True):
super().__init__(g2op_action_space)
self._gym_act_space = gym_act_space
self._gym_obs_space = gym_obs_space
self._has_heuristic : bool = False
self.gymenv : Optional[GymEnvWithHeuristics] = gymenv
self._action_list : Optional[List] = None
if self.gymenv is not None and isinstance(self.gymenv, GymEnvWithHeuristics):
self._has_heuristic = True
self._action_list = []
if _check_none_set and (nn_path is None and nn_kwargs is None):
raise RuntimeError("Impossible to build a GymAgent without providing at "
"least one of `nn_path` (to load the agent from disk) "
"or `nn_kwargs` (to create the underlying agent).")
if _check_both_set and (nn_path is not None and nn_kwargs is not None):
raise RuntimeError("Impossible to build a GymAgent by providing both "
"`nn_path` (*ie* you want load the agent from disk) "
"and `nn_kwargs` (*ie* you want to create the underlying agent from these "
"parameters).")
if nn_path is not None:
self._nn_path = nn_path
else:
self._nn_path = None
if nn_kwargs is not None:
self._nn_kwargs = copy.deepcopy(nn_kwargs)
else:
self._nn_kwargs = None
self.nn_model = None
if nn_path is not None:
self.load()
else:
self.build()
[docs] @abstractmethod
def get_act(self, gym_obs, reward, done):
"""
retrieve the action from the NN model
"""
pass
[docs] @abstractmethod
def load(self):
"""
Load the NN model
..info:: Only called if the agent has been build with `nn_path` not None and `nn_kwargs=None`
"""
pass
[docs] @abstractmethod
def build(self):
"""
Build the NN model.
..info:: Only called if the agent has been build with `nn_path=None` and `nn_kwargs` not None
"""
pass
[docs] def clean_heuristic_actions(self, observation: BaseObservation, reward: float, done: bool) -> None:
"""This function allows to cure the heuristic actions.
It is called at each step, just after the heuristic actions are computed (but before they are selected).
It can be used, for example, to reorder the `self._action_list` for example.
It is not used during training.
Args:
observation (BaseObservation): The current observation
reward (float): the current reward
done (bool): the current flag "done"
"""
pass
[docs] def act(self, observation: BaseObservation, reward: float, done: bool) -> BaseAction:
"""This function is called to "map" the grid2op world
into a usable format by a neural networks (for example in a format
usable by stable baselines or ray/rllib)
Parameters
----------
observation : BaseObservation
The grid2op observation
reward : ``float``
The reward
done : function
the flag "done" by open ai gym.
Returns
-------
BaseAction
The action taken by the agent, in a form of a grid2op BaseAction.
Notes
-------
In case your "real agent" wants to implement some "non learned" heuristic,
you can also put them here.
In this case the "gym agent" will only be used in particular settings.
"""
grid2op_act = None
# heuristic part
if self._has_heuristic:
if not self._action_list:
# the list of actions is empty, i querry the heuristic to see if there's something I can do
self._action_list = self.gymenv.heuristic_actions(observation, reward, done, {})
self.clean_heuristic_actions(observation, reward, done)
if self._action_list:
# some heuristic actions have been selected, i select the first one
grid2op_act = self._action_list.pop(0)
# the heursitic did not select any actions, then ask the NN to do one !
if grid2op_act is None:
gym_obs = self._gym_obs_space.to_gym(observation)
gym_act = self.get_act(gym_obs, reward, done)
grid2op_act = self._gym_act_space.from_gym(gym_act)
# fix the action if needed (for example by limiting curtailment and storage)
if self._has_heuristic:
grid2op_act = self.gymenv.fix_action(grid2op_act, observation)
return grid2op_act