Source code for l2rpn_baselines.PPO_RLLIB.evaluate

# Copyright (c) 2020-2022 RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import os
import json
from grid2op.Runner import Runner

from l2rpn_baselines.utils.save_log_gif import save_log_gif

from grid2op.gym_compat import BoxGymActSpace, BoxGymObsSpace

from l2rpn_baselines.PPO_RLLIB.rllibagent import RLLIBAgent

[docs]def evaluate(env, name="ppo_rllib", load_path=".", logs_path=None, nb_episode=1, nb_process=1, max_steps=-1, verbose=False, save_gif=False, **kwargs): """ This function will use rllib package to evalute a previously trained PPO agent (with rllib) on a grid2op environment "env". It will use the grid2op "gym_compat" module to convert the action space to a BoxActionSpace and the observation to a BoxObservationSpace. It is suited for the studying the impact of continuous actions: - on storage units - on dispatchable generators - on generators with renewable energy sources Parameters ---------- env: :class:`grid2op.Environment` Then environment on which you need to train your agent. name: ``str``` The name of your agent. load_path: ``str`` If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded some of the argument provided to this function will not be used. logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. nb_episode: ``str`` How many episodes to run during the assessment of the performances nb_process: ``int`` On how many process the assessment will be made. (setting this > 1 can lead to some speed ups but can be unstable on some plaform) max_steps: ``int`` How many steps at maximum your agent will be assessed verbose: ``bool`` Currently un used save_gif: ``bool`` Whether or not you want to save, as a gif, the performance of your agent. It might cause memory issues (might take a lot of ram) and drastically increase computation time. kwargs: extra parameters passed to the PPO from stable baselines 3 Returns ------- baseline: The loaded baseline as a stable baselines PPO element. Examples --------- Here is an example on how to evaluate a PPO agent (trained using RLLIB): .. code-block:: python import grid2op from grid2op.Reward import LinesCapacityReward # or any other rewards from lightsim2grid import LightSimBackend # highly recommended ! from l2rpn_baselines.PPO_RLLIB import evaluate nb_episode = 7 nb_process = 1 verbose = True env_name = "l2rpn_case14_sandbox" env = grid2op.make(env_name, reward_class=LinesCapacityReward, backend=LightSimBackend() ) try: evaluate(env, nb_episode=nb_episode, load_path="./saved_model", # should be the same as what has been called in the train function ! name="test", # should be the same as what has been called in the train function ! nb_process=1, verbose=verbose, ) # you can also compare your agent with the do nothing agent relatively # easily runner_params = env.get_params_for_runner() runner = Runner(**runner_params) res = runner.run(nb_episode=nb_episode, nb_process=nb_process ) # Print summary if verbose: print("Evaluation summary for DN:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) finally: env.close() """ import jsonpickle # lazy loading to save import time # load the attributes kept my_path = os.path.join(load_path, name) if not os.path.exists(load_path): os.mkdir(load_path) if not os.path.exists(my_path): os.mkdir(my_path) with open(os.path.join(my_path, "obs_attr_to_keep.json"), encoding="utf-8", mode="r") as f: obs_attr_to_keep = json.load(fp=f) with open(os.path.join(my_path, "act_attr_to_keep.json"), encoding="utf-8", mode="r") as f: act_attr_to_keep = json.load(fp=f) # create the action and observation space gym_observation_space = BoxGymObsSpace(env.observation_space, attr_to_keep=obs_attr_to_keep) gym_action_space = BoxGymActSpace(env.action_space, attr_to_keep=act_attr_to_keep) # retrieve the env config (for rllib) with open(os.path.join(my_path, "env_config.json"), "r", encoding="utf-8") as f: str_ = f.read() env_config_ppo = jsonpickle.decode(str_) # create a grid2gop agent based on that (this will reload the save weights) full_path = os.path.join(load_path, name) grid2op_agent = RLLIBAgent(env.action_space, gym_action_space, gym_observation_space, nn_config=env_config_ppo, nn_path=os.path.join(full_path)) # Build runner runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose runner = Runner(**runner_params, agentClass=None, agentInstance=grid2op_agent) # Run the agent on the scenarios if logs_path is not None: os.makedirs(logs_path, exist_ok=True) res = runner.run(path_save=logs_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=verbose, **kwargs) # Print summary if verbose: print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) if save_gif: if verbose: print("Saving the gif of the episodes") save_log_gif(logs_path, res) return grid2op_agent, res
if __name__ == "__main__": import grid2op from grid2op.Reward import LinesCapacityReward # or any other rewards from lightsim2grid import LightSimBackend # highly recommended ! nb_episode = 7 nb_process = 1 verbose = True env_name = "l2rpn_case14_sandbox" env = grid2op.make(env_name, reward_class=LinesCapacityReward, backend=LightSimBackend() ) try: evaluate(env, nb_episode=nb_episode, load_path="./saved_model", # should be the same as what has been called in the train function ! name="test3", # should be the same as what has been called in the train function ! nb_process=1, verbose=verbose, ) # you can also compare your agent with the do nothing agent relatively # easily runner_params = env.get_params_for_runner() runner = Runner(**runner_params) res = runner.run(nb_episode=nb_episode, nb_process=nb_process ) # Print summary if verbose: print("Evaluation summary for DN:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "chronics at: {}".format(chron_name) msg_tmp += "\ttotal score: {:.6f}".format(cum_reward) msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts) print(msg_tmp) finally: env.close()