Source code for l2rpn_baselines.DuelQSimple.evaluate

#!/usr/bin/env python3

# Copyright (c) 2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import os

from grid2op.MakeEnv import make
from grid2op.Runner import Runner

from l2rpn_baselines.utils.save_log_gif import save_log_gif
from l2rpn_baselines.DuelQSimple.duelQSimple import DuelQSimple, DEFAULT_NAME
from l2rpn_baselines.DuelQSimple.duelQ_NNParam import DuelQ_NNParam
from l2rpn_baselines.DuelQSimple.duelQ_NN import DuelQ_NN


DEFAULT_LOGS_DIR = "./logs-eval/do-nothing-baseline"
DEFAULT_NB_EPISODE = 1
DEFAULT_NB_PROCESS = 1
DEFAULT_MAX_STEPS = -1


[docs]def evaluate(env,
             name=DEFAULT_NAME,
             load_path=None,
             logs_path=DEFAULT_LOGS_DIR,
             nb_episode=DEFAULT_NB_EPISODE,
             nb_process=DEFAULT_NB_PROCESS,
             max_steps=DEFAULT_MAX_STEPS,
             verbose=False,
             save_gif=False,
             filter_action_fun=None):
    """
    How to evaluate the performances of the trained DuelQSimple agent.

    .. warning::
        This baseline recodes entire the RL training procedure. You can use it if you
        want to have a deeper look at Deep Q Learning algorithm and a possible (non 
        optimized, slow, etc. implementation ).
        
        For a much better implementation, you can reuse the code of "PPO_RLLIB" 
        or the "PPO_SB3" baseline.
        
    Parameters
    ----------
    env: :class:`grid2op.Environment`
        The environment on which you evaluate your agent.

    name: ``str``
        The name of the trained baseline

    load_path: ``str``
        Path where the agent has been stored

    logs_path: ``str``
        Where to write the results of the assessment

    nb_episode: ``str``
        How many episodes to run during the assessment of the performances

    nb_process: ``int``
        On how many process the assessment will be made. (setting this > 1 can lead to some speed ups but can be
        unstable on some plaform)

    max_steps: ``int``
        How many steps at maximum your agent will be assessed

    verbose: ``bool``
        Currently un used

    save_gif: ``bool``
        Whether or not you want to save, as a gif, the performance of your agent. It might cause memory issues (might
        take a lot of ram) and drastically increase computation time.

    filter_action_fun: ``function``
        A function to filter the action space. See
        `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_
        documentation.

    Returns
    -------
    agent: :class:`l2rpn_baselines.utils.DeepQAgent`
        The loaded agent that has been evaluated thanks to the runner.

    res: ``list``
        The results of the Runner on which the agent was tested.

    Examples
    -------
    You can evaluate a DuelQSimpleBaseline this way:

    .. code-block:: python

        from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward
        from l2rpn_baselines.DuelQSimple import eval

        # Create dataset env
        env = make("l2rpn_case14_sandbox",
                   reward_class=L2RPNSandBoxScore,
                   other_rewards={
                       "reward": L2RPNReward
                   })

        # Call evaluation interface
        evaluate(env,
                 name="MyAwesomeAgent",
                 load_path="/WHERE/I/SAVED/THE/MODEL",
                 logs_path=None,
                 nb_episode=10,
                 nb_process=1,
                 max_steps=-1,
                 verbose=False,
                 save_gif=False)


    """

    import tensorflow as tf
    # Limit gpu usage
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices):
        tf.config.experimental.set_memory_growth(physical_devices[0], True)

    runner_params = env.get_params_for_runner()
    runner_params["verbose"] = verbose

    if load_path is  None:
        raise RuntimeError("Cannot evaluate a model if there is nothing to be loaded.")

    path_model, path_target_model = DuelQ_NN.get_path_model(load_path, name)
    nn_archi = DuelQ_NNParam.from_json(os.path.join(path_model, "nn_architecture.json"))

    # Run
    # Create agent
    agent = DuelQSimple(action_space=env.action_space,
                        name=name,
                        store_action=nb_process == 1,
                        nn_archi=nn_archi,
                        observation_space=env.observation_space,
                        filter_action_fun=filter_action_fun)

    # Load weights from file
    agent.load(load_path)
    # Build runner
    runner = Runner(**runner_params,
                    agentClass=None,
                    agentInstance=agent)

    # Print model summary
    stringlist = []
    agent.deep_q._model.summary(print_fn=lambda x: stringlist.append(x))
    short_model_summary = "\n".join(stringlist)
    if verbose:
        print(short_model_summary)

    # Run
    os.makedirs(logs_path, exist_ok=True)
    res = runner.run(path_save=logs_path,
                     nb_episode=nb_episode,
                     nb_process=nb_process,
                     max_iter=max_steps,
                     pbar=verbose)

    # Print summary
    if verbose:
        print("Evaluation summary:")
        for _, chron_name, cum_reward, nb_time_step, max_ts in res:
            msg_tmp = "chronics at: {}".format(chron_name)
            msg_tmp += "\ttotal score: {:.6f}".format(cum_reward)
            msg_tmp += "\ttime steps: {:.0f}/{:.0f}".format(nb_time_step, max_ts)
            print(msg_tmp)

        if len(agent.dict_action):
            # I output some of the actions played
            print("The agent played {} different action".format(len(agent.dict_action)))
            for id_, (nb, act, types) in agent.dict_action.items():
                print("Action with ID {} was played {} times".format(id_, nb))
                print("{}".format(act))
                print("-----------")

    # if logs_path is not None:
    #     for path_dhron, chron_name, cum_reward, nb_time_step, max_ts in res:
    #         ep_data = EpisodeData.from_disk(logs_path, chron_name)

    if save_gif:
        if verbose:
            print("Saving the gif of the episodes")
        save_log_gif(logs_path, res)
    return agent, res


if __name__ == "__main__":
    from grid2op.Reward import L2RPNSandBoxScore, L2RPNReward
    from l2rpn_baselines.utils import cli_eval

    # Parse command line
    args = cli_eval().parse_args()

    # Create dataset env
    env = make(args.env_name,
               reward_class=L2RPNSandBoxScore,
               other_rewards={
                   "reward": L2RPNReward
               })

    # Call evaluation interface
    evaluate(env,
             name=args.name,
             load_path=os.path.abspath(args.load_path),
             logs_path=args.logs_dir,
             nb_episode=args.nb_episode,
             nb_process=args.nb_process,
             max_steps=args.max_steps,
             verbose=args.verbose,
             save_gif=args.save_gif)