Source code for l2rpn_baselines.PPO_RLLIB.train

# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import os
import grid2op
import copy
from grid2op.gym_compat import GymEnv, BoxGymObsSpace, BoxGymActSpace

from l2rpn_baselines.PPO_RLLIB.env_rllib import Env_RLLIB
from l2rpn_baselines.PPO_SB3 import (default_obs_attr_to_keep, 
                                     default_act_attr_to_keep,
                                     save_used_attribute,
                                     remove_non_usable_attr
                                    )
from l2rpn_baselines.PPO_RLLIB.rllibagent import RLLIBAgent

try:
    import ray
    from ray.tune.logger import pretty_print
    _CAN_USE_RLLIB = True
except ImportError as exc_:
    _CAN_USE_RLLIB = False


[docs]def train(env, name="ppo_rllib", iterations=1, save_path=None, load_path=None, net_arch=None, learning_rate=3e-4, verbose=False, save_every_xxx_steps=None, obs_attr_to_keep=copy.deepcopy(default_obs_attr_to_keep), act_attr_to_keep=copy.deepcopy(default_act_attr_to_keep), env_kwargs=None, **kwargs): """ This function will use the rllib to train a PPO agent on a grid2op environment "env". It will use the grid2op "gym_compat" module to convert the action space to a BoxActionSpace and the observation to a BoxObservationSpace. It is suited for the studying the impact of continuous actions: - on storage units - on dispatchable generators - on generators with renewable energy sources .. warning:: The environment used by RLLIB is copied and remade. This class does not work if you over specialize the environment ! For example, opponent is not taken into account (yet), nor the chronics class etc. If you want such level of control, please use the `env_kwargs` parameters ! Parameters ---------- env: :class:`grid2op.Environment` Then environment on which you need to train your agent. Only the name of the environment, and its backend is used. The rest will be created by rllib. name: ``str``` The name of your agent. iterations: ``int`` For how many iterations do you want to train the model. These are **NOT** steps, but ray internal number of iterations. For some experiments we performed, save_path: ``str`` Where do you want to save your baseline. load_path: ``str`` If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded some of the argument provided to this function will not be used. net_arch: The neural network architecture, used to create the neural network of the PPO (see https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html) learning_rate: ``float`` The learning rate, see https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html save_every_xxx_steps: ``int`` If set (by default it's None) the stable baselines3 model will be saved to the hard drive each `save_every_xxx_steps` steps performed in the environment. obs_attr_to_keep: list of string Grid2op attribute to use to build the BoxObservationSpace. It is passed as the "attr_to_keep" value of the BoxObservation space (see https://grid2op.readthedocs.io/en/latest/gym.html#grid2op.gym_compat.BoxGymObsSpace) act_attr_to_keep: list of string Grid2op attribute to use to build the BoxGymActSpace. It is passed as the "attr_to_keep" value of the BoxAction space (see https://grid2op.readthedocs.io/en/latest/gym.html#grid2op.gym_compat.BoxGymActSpace) verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) env_kwargs: Optional[dict] Extra key word arguments passed to the building of the grid2op environment. kwargs: extra parameters passed to the trainer from rllib Returns ------- baseline: The trained baseline as a stable baselines PPO element. .. _Example-ppo_stable_baseline: Examples --------- Here is an example on how to train a ppo_stablebaseline . First define a python script, for example .. code-block:: python import re import grid2op import ray from grid2op.Reward import LinesCapacityReward # or any other rewards from grid2op.Chronics import MultifolderWithCache # highly recommended from lightsim2grid import LightSimBackend # highly recommended for training ! env_name = "l2rpn_case14_sandbox" env = grid2op.make(env_name, backend=LightSimBackend()) ray.init() # if needed (you might have it already working somewhere) try: train(env, iterations=10, # any number of iterations you want save_path="./saved_model", # where the NN weights will be saved name="test", # name of the baseline net_arch=[100, 100, 100], # architecture of the NN save_every_xxx_steps=2, # save the NN every 2 training steps env_kwargs={"reward_class": LinesCapacityReward, "chronics_class": MultifolderWithCache, # highly recommended "data_feeding_kwargs": { 'filter_func': lambda x: re.match(".*00$", x) is not None #use one over 100 chronics to train (for speed) } }, verbose=True ) finally: env.close() ray.shutdown() # if needed (you might have it already working somewhere) """ import jsonpickle if not _CAN_USE_RLLIB: raise ImportError("RLLIB is not installed on your machine") if save_path is not None: if not os.path.exists(save_path): os.mkdir(save_path) path_expe = os.path.join(save_path, name) if not os.path.exists(path_expe): os.mkdir(path_expe) # save the attributes kept act_attr_to_keep = remove_non_usable_attr(env, act_attr_to_keep) need_saving_final = save_used_attribute(save_path, name, obs_attr_to_keep, act_attr_to_keep) need_saving = need_saving_final and save_every_xxx_steps is not None if env_kwargs is None: env_kwargs = {} env_params = env.get_kwargs() env_config = {"env_name": env.env_name, "backend_class": env_params["_raw_backend_class"], "obs_attr_to_keep": obs_attr_to_keep, "act_attr_to_keep": act_attr_to_keep, **env_kwargs} model_dict = {} if net_arch is not None: model_dict["fcnet_hiddens"] = net_arch env_config_ppo = { # config to pass to env class "env_config": env_config, #neural network config "lr": learning_rate, "model": model_dict, **kwargs } # store it encoded = jsonpickle.encode(env_config_ppo) with open(os.path.join(path_expe, "env_config.json"), "w", encoding="utf-8") as f: f.write(encoded) # define the gym environment from the grid2op env env_gym = GymEnv(env) env_gym.observation_space.close() env_gym.observation_space = BoxGymObsSpace(env.observation_space, attr_to_keep=obs_attr_to_keep) env_gym.action_space.close() env_gym.action_space = BoxGymActSpace(env.action_space, attr_to_keep=act_attr_to_keep) # then define a "trainer" agent = RLLIBAgent(g2op_action_space=env.action_space, gym_act_space=env_gym.action_space, gym_obs_space=env_gym.observation_space, nn_config=env_config_ppo, nn_path=load_path) for step in range(iterations): # Perform one iteration of training the policy with PPO result = agent.nn_model.train() if verbose: print(pretty_print(result)) if need_saving and step % save_every_xxx_steps == 0: agent.nn_model.save(checkpoint_dir=path_expe) if need_saving_final: agent.nn_model.save(checkpoint_dir=path_expe) return agent
if __name__ == "__main__": import re import grid2op from grid2op.Reward import LinesCapacityReward # or any other rewards from grid2op.Chronics import MultifolderWithCache # highly recommended from lightsim2grid import LightSimBackend # highly recommended for training ! import ray env_name = "l2rpn_case14_sandbox" env = grid2op.make(env_name, backend=LightSimBackend()) ray.init() try: train(env, iterations=10, # any number of iterations you want save_path="./saved_model", # where the NN weights will be saved name="test3", # name of the baseline net_arch=[100, 100, 100], # architecture of the NN save_every_xxx_steps=2, # save the NN every 2 training steps env_kwargs={"reward_class": LinesCapacityReward, "chronics_class": MultifolderWithCache, # highly recommended "data_feeding_kwargs": { 'filter_func': lambda x: re.match(".*00$", x) is not None #use one over 100 chronics to train (for speed) } }, verbose=True ) finally: env.close() ray.shutdown()