Source code for l2rpn_baselines.PPO_SB3.utils

# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import warnings
import os
import json
from typing import List, Optional

from l2rpn_baselines.utils import GymAgent

try:
    from stable_baselines3 import PPO
except ImportError:
    _CAN_USE_STABLE_BASELINE = False
    class PPO(object):
        """
        Do not use, this class is a template when stable baselines3 is not installed.
        
        It represents `from stable_baselines3 import PPO`
        """
        
        
default_obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                            "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                            "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status",
                            "storage_power", "storage_charge"]


default_act_attr_to_keep = ["redispatch", "curtail", "set_storage"]


[docs]def remove_non_usable_attr(grid2openv, act_attr_to_keep: List[str]) -> List[str]:
    """This function modifies the attribute (of the actions)
    to remove the one that are non usable with your gym environment.
    
    If only filters things if the default variables are used 
    (see _default_act_attr_to_keep)

    Parameters
    ----------
    grid2openv : grid2op.Environment.Environment
        The used grid2op environment
    act_attr_to_keep : List[str]
        The attributes of the actions to keep.

    Returns
    -------
    List[str]
        The same as `act_attr_to_keep` if the user modified the default.
        Or the attributes usable by the environment from the default list.
        
    """
    modif_attr = act_attr_to_keep
    if act_attr_to_keep == default_act_attr_to_keep:
        # by default, i remove all the attributes that are not supported by the action type
        # i do not do that if the user specified specific attributes to keep. This is his responsibility in
        # in this case
        modif_attr = []
        for el in act_attr_to_keep:
            if grid2openv.action_space.supports_type(el):
                modif_attr.append(el)
            else:
                warnings.warn(f"attribute {el} cannot be processed by the allowed "
                                "action type. It has been removed from the "
                                "gym space as well.")
    return modif_attr


[docs]def save_used_attribute(save_path: Optional[str],
                        name: str,
                        obs_attr_to_keep: List[str],
                        act_attr_to_keep: List[str]) -> bool:
    """Serialize, as jon the obs_attr_to_keep and act_attr_to_keep
    
    This is typically called in the `train` function.

    Parameters
    ----------
    save_path : Optional[str]
        where to save the used attributes (put ``None`` if you don't want to
        save it)
    name : str
        Name of the model
    obs_attr_to_keep : List[str]
        List of observation attributes to keep
    act_attr_to_keep : List[str]
        List of action attributes to keep

    Returns
    -------
    bool
        whether the data have been saved or not
    """
    res = False
    if save_path is not None:
        my_path = os.path.join(save_path, name)
        if not os.path.exists(save_path):
            os.mkdir(save_path)
        if not os.path.exists(my_path):
            os.mkdir(my_path)

        with open(os.path.join(my_path, "obs_attr_to_keep.json"), encoding="utf-8", mode="w") as f:
            json.dump(fp=f, obj=obs_attr_to_keep)
        with open(os.path.join(my_path, "act_attr_to_keep.json"), encoding="utf-8", mode="w") as f:
            json.dump(fp=f, obj=act_attr_to_keep)
        res = True
    return res
      
            
class SB3Agent(GymAgent):
    """This class represents the Agent (directly usable with grid2op framework)

    This agents uses the stable-baselines3 `nn_type` (by default PPO) as
    the neural network to take decisions on the grid.
    
    To be built, it requires:
    
    - `g2op_action_space`: a grid2op action space (used for initializing the grid2op agent)
    - `gym_act_space`: a gym observation space (used for the neural networks)
    - `gym_obs_space`: a gym action space (used for the neural networks)

    It can also accept different types of parameters:
    
    - `nn_type`: the type of "neural network" from stable baselines (by default PPO)
    - `nn_path`: the path where the neural network can be loaded from
    - `nn_kwargs`: the parameters used to build the neural network from scratch.
    
    Exactly one of `nn_path` and `nn_kwargs` should be provided. No more, no less.
    
    TODO heuristic part !
    
    Examples
    ---------
    
    The best way to have such an agent is either to train it:
    
    .. code-block:: python
    
        from l2rpn_baselnes.PPO_SB3 import train
        agent = train(...)  # see the doc of the `train` function !
        
    Or you can also load it when you evaluate it (after it has been trained !):
    
    .. code-block:: python
    
        from l2rpn_baselnes.PPO_SB3 import evaluate
        agent = evaluate(...)  # see the doc of the `evaluate` function !
        
    To create such an agent from scratch (NOT RECOMMENDED), you can do:
    
    .. code-block:: python

        import grid2op
        from grid2op.gym_compat import BoxGymObsSpace, BoxGymActSpace, GymEnv
        from lightsim2grid import LightSimBackend
        
        from l2rpn_baselnes.PPO_SB3 import PPO_SB3
            
        env_name = "l2rpn_case14_sandbox"  # or any other name
        
        # customize the observation / action you want to keep
        obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                            "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                            "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status",
                            "storage_power", "storage_charge"]
        act_attr_to_keep = ["redispatch", "curtail", "set_storage"]
        
        # create the grid2op environment
        env = grid2op.make(env_name, backend=LightSimBackend())
        
        # define the action space and observation space that your agent
        # will be able to use
        env_gym = GymEnv(env)
        env_gym.observation_space.close()
        env_gym.observation_space = BoxGymObsSpace(env.observation_space,
                                                attr_to_keep=obs_attr_to_keep)
        env_gym.action_space.close()
        env_gym.action_space = BoxGymActSpace(env.action_space,
                                            attr_to_keep=act_attr_to_keep)
        
        # create the key word arguments used for the NN
        nn_kwargs = {
            "policy": MlpPolicy,
            "env": env_gym,
            "verbose": 0,
            "learning_rate": 1e-3,
            "tensorboard_log": ...,
            "policy_kwargs": {
                "net_arch": [100, 100, 100]
            }
        }
        
        # create a grid2gop agent based on that (this will reload the save weights)
        grid2op_agent = PPO_SB3(env.action_space,
                                env_gym.action_space,
                                env_gym.observation_space,
                                nn_kwargs=nn_kwargs  # don't load it from anywhere
                               )
        
    """
    def __init__(self,
                 g2op_action_space,
                 gym_act_space,
                 gym_obs_space,
                 nn_type=PPO,
                 nn_path=None,
                 nn_kwargs=None,
                 custom_load_dict=None,
                 gymenv=None,
                 iter_num=None,
                 ):
        self._nn_type = nn_type
        if custom_load_dict is not None:
            self.custom_load_dict = custom_load_dict
        else:
            self.custom_load_dict = {}
        self._iter_num : Optional[int] = iter_num 
        super().__init__(g2op_action_space, gym_act_space, gym_obs_space,
                         nn_path=nn_path, nn_kwargs=nn_kwargs,
                         gymenv=gymenv
                         )
        
    def get_act(self, gym_obs, reward, done):
        """Retrieve the gym action from the gym observation and the reward. 
        It only (for now) work for non recurrent policy.

        Parameters
        ----------
        gym_obs : gym observation
            The gym observation
        reward : ``float``
            the current reward
        done : ``bool``
            whether the episode is over or not.

        Returns
        -------
        gym action
            The gym action, that is processed in the :func:`GymAgent.act`
            to be used with grid2op
        """
        action, _ = self.nn_model.predict(gym_obs, deterministic=True)
        return action

    def load(self):
        """
        Load the NN model.
        
        In the case of a PPO agent, this is equivalent to perform the:
        
        .. code-block:: python
            
            PPO.load(nn_path)
        """
        custom_objects = {"action_space": self._gym_act_space,
                          "observation_space": self._gym_obs_space}
        for key, val in self.custom_load_dict.items():
            custom_objects[key] = val
        path_load = self._nn_path
        if self._iter_num is not None:
            path_load = path_load + f"_{self._iter_num}_steps"
        self.nn_model = self._nn_type.load(path_load,
                                           custom_objects=custom_objects)
        
    def build(self):
        """Create the underlying NN model from scratch.
        
        In the case of a PPO agent, this is equivalent to perform the:
        
        .. code-block:: python
            
            PPO(**nn_kwargs)
        """
        self.nn_model = PPO(**self._nn_kwargs)

if __name__ == "__main__":
    PPO_SB3 = SB3Agent
    
    import grid2op
    from grid2op.gym_compat import BoxGymObsSpace, BoxGymActSpace, GymEnv
    from lightsim2grid import LightSimBackend
    from stable_baselines3.ppo import MlpPolicy
    
    # from l2rpn_baselnes.PPO_SB3 import PPO_SB3
        
    env_name = "l2rpn_case14_sandbox"  # or any other name
    
    # customize the observation / action you want to keep
    obs_attr_to_keep = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q",
                        "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line",
                        "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status",
                        "storage_power", "storage_charge"]
    act_attr_to_keep = ["redispatch", "curtail", "set_storage"]
    
    # create the grid2op environment
    env = grid2op.make(env_name, backend=LightSimBackend())
    
    # define the action space and observation space that your agent
    # will be able to use
    env_gym = GymEnv(env)
    env_gym.observation_space.close()
    env_gym.observation_space = BoxGymObsSpace(env.observation_space,
                                               attr_to_keep=obs_attr_to_keep)
    env_gym.action_space.close()
    env_gym.action_space = BoxGymActSpace(env.action_space,
                                          attr_to_keep=act_attr_to_keep)
    
    # create the key word arguments used for the NN
    nn_kwargs = {
        "policy": MlpPolicy,
        "env": env_gym,
        "verbose": 0,
        "learning_rate": 1e-3,
        "tensorboard_log": ...,
        "policy_kwargs": {
            "net_arch": [100, 100, 100]
        }
    }
    
    # create a grid2gop agent based on that (this will reload the save weights)
    grid2op_agent = PPO_SB3(env.action_space,
                            env_gym.action_space,
                            env_gym.observation_space,
                            nn_kwargs=nn_kwargs  # don't load it from anywhere
                           )