Source code for l2rpn_baselines.PPO_SB3.utils

# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import warnings
import os
import json
from typing import List, Optional

from l2rpn_baselines.utils import GymAgent

try:
    from stable_baselines3 import PPO
except ImportError:
    _CAN_USE_STABLE_BASELINE = False
    class PPO(object):
        """
        Do not use, this class is a template when stable baselines3 is not installed.
        
        It represents `from stable_baselines3 import PPO`
        """
        
        
default_obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                            "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                            "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status",
                            "storage_power", "storage_charge"]


default_act_attr_to_keep = ["redispatch", "curtail", "set_storage"]


[docs]def remove_non_usable_attr(grid2openv, act_attr_to_keep: List[str]) -> List[str]: """This function modifies the attribute (of the actions) to remove the one that are non usable with your gym environment. If only filters things if the default variables are used (see _default_act_attr_to_keep) Parameters ---------- grid2openv : grid2op.Environment.Environment The used grid2op environment act_attr_to_keep : List[str] The attributes of the actions to keep. Returns ------- List[str] The same as `act_attr_to_keep` if the user modified the default. Or the attributes usable by the environment from the default list. """ modif_attr = act_attr_to_keep if act_attr_to_keep == default_act_attr_to_keep: # by default, i remove all the attributes that are not supported by the action type # i do not do that if the user specified specific attributes to keep. This is his responsibility in # in this case modif_attr = [] for el in act_attr_to_keep: if grid2openv.action_space.supports_type(el): modif_attr.append(el) else: warnings.warn(f"attribute {el} cannot be processed by the allowed " "action type. It has been removed from the " "gym space as well.") return modif_attr
[docs]def save_used_attribute(save_path: Optional[str], name: str, obs_attr_to_keep: List[str], act_attr_to_keep: List[str]) -> bool: """Serialize, as jon the obs_attr_to_keep and act_attr_to_keep This is typically called in the `train` function. Parameters ---------- save_path : Optional[str] where to save the used attributes (put ``None`` if you don't want to save it) name : str Name of the model obs_attr_to_keep : List[str] List of observation attributes to keep act_attr_to_keep : List[str] List of action attributes to keep Returns ------- bool whether the data have been saved or not """ res = False if save_path is not None: my_path = os.path.join(save_path, name) if not os.path.exists(save_path): os.mkdir(save_path) if not os.path.exists(my_path): os.mkdir(my_path) with open(os.path.join(my_path, "obs_attr_to_keep.json"), encoding="utf-8", mode="w") as f: json.dump(fp=f, obj=obs_attr_to_keep) with open(os.path.join(my_path, "act_attr_to_keep.json"), encoding="utf-8", mode="w") as f: json.dump(fp=f, obj=act_attr_to_keep) res = True return res
class SB3Agent(GymAgent): """This class represents the Agent (directly usable with grid2op framework) This agents uses the stable-baselines3 `nn_type` (by default PPO) as the neural network to take decisions on the grid. To be built, it requires: - `g2op_action_space`: a grid2op action space (used for initializing the grid2op agent) - `gym_act_space`: a gym observation space (used for the neural networks) - `gym_obs_space`: a gym action space (used for the neural networks) It can also accept different types of parameters: - `nn_type`: the type of "neural network" from stable baselines (by default PPO) - `nn_path`: the path where the neural network can be loaded from - `nn_kwargs`: the parameters used to build the neural network from scratch. Exactly one of `nn_path` and `nn_kwargs` should be provided. No more, no less. TODO heuristic part ! Examples --------- The best way to have such an agent is either to train it: .. code-block:: python from l2rpn_baselnes.PPO_SB3 import train agent = train(...) # see the doc of the `train` function ! Or you can also load it when you evaluate it (after it has been trained !): .. code-block:: python from l2rpn_baselnes.PPO_SB3 import evaluate agent = evaluate(...) # see the doc of the `evaluate` function ! To create such an agent from scratch (NOT RECOMMENDED), you can do: .. code-block:: python import grid2op from grid2op.gym_compat import BoxGymObsSpace, BoxGymActSpace, GymEnv from lightsim2grid import LightSimBackend from l2rpn_baselnes.PPO_SB3 import PPO_SB3 env_name = "l2rpn_case14_sandbox" # or any other name # customize the observation / action you want to keep obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status", "storage_power", "storage_charge"] act_attr_to_keep = ["redispatch", "curtail", "set_storage"] # create the grid2op environment env = grid2op.make(env_name, backend=LightSimBackend()) # define the action space and observation space that your agent # will be able to use env_gym = GymEnv(env) env_gym.observation_space.close() env_gym.observation_space = BoxGymObsSpace(env.observation_space, attr_to_keep=obs_attr_to_keep) env_gym.action_space.close() env_gym.action_space = BoxGymActSpace(env.action_space, attr_to_keep=act_attr_to_keep) # create the key word arguments used for the NN nn_kwargs = { "policy": MlpPolicy, "env": env_gym, "verbose": 0, "learning_rate": 1e-3, "tensorboard_log": ..., "policy_kwargs": { "net_arch": [100, 100, 100] } } # create a grid2gop agent based on that (this will reload the save weights) grid2op_agent = PPO_SB3(env.action_space, env_gym.action_space, env_gym.observation_space, nn_kwargs=nn_kwargs # don't load it from anywhere ) """ def __init__(self, g2op_action_space, gym_act_space, gym_obs_space, nn_type=PPO, nn_path=None, nn_kwargs=None, custom_load_dict=None, gymenv=None, iter_num=None, ): self._nn_type = nn_type if custom_load_dict is not None: self.custom_load_dict = custom_load_dict else: self.custom_load_dict = {} self._iter_num : Optional[int] = iter_num super().__init__(g2op_action_space, gym_act_space, gym_obs_space, nn_path=nn_path, nn_kwargs=nn_kwargs, gymenv=gymenv ) def get_act(self, gym_obs, reward, done): """Retrieve the gym action from the gym observation and the reward. It only (for now) work for non recurrent policy. Parameters ---------- gym_obs : gym observation The gym observation reward : ``float`` the current reward done : ``bool`` whether the episode is over or not. Returns ------- gym action The gym action, that is processed in the :func:`GymAgent.act` to be used with grid2op """ action, _ = self.nn_model.predict(gym_obs, deterministic=True) return action def load(self): """ Load the NN model. In the case of a PPO agent, this is equivalent to perform the: .. code-block:: python PPO.load(nn_path) """ custom_objects = {"action_space": self._gym_act_space, "observation_space": self._gym_obs_space} for key, val in self.custom_load_dict.items(): custom_objects[key] = val path_load = self._nn_path if self._iter_num is not None: path_load = path_load + f"_{self._iter_num}_steps" self.nn_model = self._nn_type.load(path_load, custom_objects=custom_objects) def build(self): """Create the underlying NN model from scratch. In the case of a PPO agent, this is equivalent to perform the: .. code-block:: python PPO(**nn_kwargs) """ self.nn_model = PPO(**self._nn_kwargs) if __name__ == "__main__": PPO_SB3 = SB3Agent import grid2op from grid2op.gym_compat import BoxGymObsSpace, BoxGymActSpace, GymEnv from lightsim2grid import LightSimBackend from stable_baselines3.ppo import MlpPolicy # from l2rpn_baselnes.PPO_SB3 import PPO_SB3 env_name = "l2rpn_case14_sandbox" # or any other name # customize the observation / action you want to keep obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status", "storage_power", "storage_charge"] act_attr_to_keep = ["redispatch", "curtail", "set_storage"] # create the grid2op environment env = grid2op.make(env_name, backend=LightSimBackend()) # define the action space and observation space that your agent # will be able to use env_gym = GymEnv(env) env_gym.observation_space.close() env_gym.observation_space = BoxGymObsSpace(env.observation_space, attr_to_keep=obs_attr_to_keep) env_gym.action_space.close() env_gym.action_space = BoxGymActSpace(env.action_space, attr_to_keep=act_attr_to_keep) # create the key word arguments used for the NN nn_kwargs = { "policy": MlpPolicy, "env": env_gym, "verbose": 0, "learning_rate": 1e-3, "tensorboard_log": ..., "policy_kwargs": { "net_arch": [100, 100, 100] } } # create a grid2gop agent based on that (this will reload the save weights) grid2op_agent = PPO_SB3(env.action_space, env_gym.action_space, env_gym.observation_space, nn_kwargs=nn_kwargs # don't load it from anywhere )