Source code for l2rpn_baselines.LeapNetEncoded.leapNetEncoded_NN

# Copyright (c) 2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import numpy as np
import os

# tf2.0 friendly
import warnings

try:
    import tensorflow as tf
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=FutureWarning)
        from tensorflow.keras.models import Sequential, Model
        from tensorflow.keras.layers import Activation
        from tensorflow.keras.layers import Input, Lambda, subtract, add
        import tensorflow.keras.backend as K
    # TODO implement that in the leap net package too
    from tensorflow.keras.layers import Dense
    _CAN_USE_TENSORFLOW = True
except ImportError:
    _CAN_USE_TENSORFLOW = False
    
from l2rpn_baselines.utils import BaseDeepQ, TrainingParam
from l2rpn_baselines.DuelQLeapNet.duelQLeapNet_NN import LtauBis


[docs]class LeapNetEncoded_NN(BaseDeepQ):
    """
    .. warning::
        This baseline recodes entire the RL training procedure. You can use it if you
        want to have a deeper look at Deep Q Learning algorithm and a possible (non 
        optimized, slow, etc. implementation ).
        
        For a much better implementation, you can reuse the code of "PPO_RLLIB" 
        or the "PPO_SB3" baseline.
        
    Constructs the desired neural networks.

    More information on the leap net can be found at `Leap Net on Github <https://github.com/BDonnot/leap_net>`_

    These are:

    - a "state encoder" that uses a leap net to "encode" the observation, or at least the part
      related to powergrid
    - a q network, that uses the output of the state encoder to predict which action is best.

    The Q network can have other types of input, and can also be a leap net, see the class
    :class:`l2rpn_baselines.LeapNetEncoded.leapNetEncoded_NNParam.LeapNetEncoded_NNParam` for more information

    """
    def __init__(self,
                 nn_params,
                 training_param=None):
        if not _CAN_USE_TENSORFLOW:
            raise RuntimeError("Cannot import tensorflow, this function cannot be used.")
        
        if training_param is None:
            training_param = TrainingParam()
        BaseDeepQ.__init__(self,
                           nn_params,
                           training_param)
        self._custom_objects = {"LtauBis": LtauBis}
        self._max_global_norm_grad = training_param.max_global_norm_grad
        self._max_value_grad = training_param.max_value_grad
        self._max_loss = training_param.max_loss

        self.train_lr = 1.0

        # added
        self.encoded_state = None
        self.grid_model = None
        self._schedule_grid_model = None
        self._optimizer_grid_model = None
        self._qnet_variables = []
        self.grid_model_losses_npy = None

        self.construct_q_network()

[docs]    def construct_q_network(self):
        """
        Builds the Q network.
        """
        # Uses the network architecture found in DeepMind paper
        # The inputs and outputs size have changed, as well as replacing the convolution by dense layers.
        self._model = Sequential()
        inputs_x = [Input(shape=(el,), name="x_{}".format(nm_)) for el, nm_ in
                    zip(self._nn_archi.x_dims, self._nn_archi.list_attr_obs_x)]
        inputs_q = [Input(shape=(el,), name="input_q_{}".format(nm_)) for el, nm_ in
                    zip(self._nn_archi.input_q_dims, self._nn_archi.list_attr_obs_input_q)]
        inputs_tau = [Input(shape=(el,), name="tau_{}".format(nm_)) for el, nm_ in
                      zip(self._nn_archi.tau_dims, self._nn_archi.list_attr_obs_tau)]
        input_topo = Input(shape=(2*self._nn_archi.dim_topo,), name="topo")
        models_all_inputs = [*inputs_x, *inputs_q, *inputs_tau, input_topo]

        # encode each data type in initial layers
        encs_out = []
        for init_val, nm_ in zip(inputs_x, self._nn_archi.list_attr_obs_x):
            lay = init_val
            for i, size in enumerate(self._nn_archi.sizes_enc):
                lay = Dense(size, name="enc_{}_{}".format(nm_, i))(lay) # TODO resnet instead of Dense
                lay = Activation("relu")(lay)
            encs_out.append(lay)

        # concatenate all that
        lay = tf.keras.layers.concatenate(encs_out)
        # now "lay" is the encoded observation

        # i do a few layer
        for i, size in enumerate(self._nn_archi.sizes_main):
            lay = Dense(size, name="main_{}".format(i))(lay)  # TODO resnet instead of Dense
            lay = Activation("relu")(lay)

        # now i do the leap net to encode the state
        encoded_state = tf.keras.layers.add([lay, LtauBis(name="leap_topo")([lay, input_topo])],
                                            name="encoded_state")
        self.encoded_state = tf.keras.backend.stop_gradient(encoded_state)

        # i predict the full state of the grid given the "control" variables
        outputs_gm = []
        grid_model_losses = {}
        lossWeights = {}  # TODO
        for sz_out, nm_ in zip(self._nn_archi.gm_out_dims,
                               self._nn_archi.list_attr_obs_gm_out):
            lay = encoded_state  # carefull i need my gradients here ! (don't use self.encoded_state)
            for i, size in enumerate(self._nn_archi.sizes_out_gm):
                lay = Dense(size, name="{}_{}".format(nm_, i))(lay)
                lay = Activation("relu")(lay)

            # predict now the variable
            name_output = "{}_hat".format(nm_)
            pred_ = Dense(sz_out, name=name_output)(lay)
            outputs_gm.append(pred_)
            grid_model_losses[name_output] = "mse"

        # NB grid_model does not use inputs_tau
        self.grid_model = Model(inputs=models_all_inputs, outputs=outputs_gm, name="grid_model")
        self._schedule_grid_model, self._optimizer_grid_model = self.make_optimiser()
        self.grid_model.compile(loss=grid_model_losses, optimizer=self._optimizer_grid_model) # , loss_weights=lossWeights

        # And now let's predict the Q values of each actions given the encoded grid state
        input_Qnet = inputs_q + [self.encoded_state]
        # TODO do i pre process the data coming from inputs_q ???

        lay = tf.keras.layers.concatenate(input_Qnet, name="input_Q_network")
        for i, size in enumerate(self._nn_archi.sizes_Qnet):
            tmp = Dense(size, name="qvalue_{}".format(i))  # TODO resnet instead of Dense
            lay = tmp(lay)
            lay = Activation("relu")(lay)
            self._qnet_variables += tmp.trainable_weights

        # And i predict the Q value of the action
        l_tau = lay
        for el, nm_ in zip(inputs_tau, self._nn_archi.list_attr_obs_tau):
            tmp = LtauBis(name="leap_{}".format(nm_))
            l_tau = l_tau + tmp([lay, el])
            self._qnet_variables += tmp.trainable_weights

        tmp = Dense(self._action_size)
        advantage = tmp(l_tau)
        self._qnet_variables += tmp.trainable_weights
        tmp = Dense(1, name="value")
        value = tmp(l_tau)
        self._qnet_variables += tmp.trainable_weights

        meaner = Lambda(lambda x: K.mean(x, axis=1))
        mn_ = meaner(advantage)
        tmp = subtract([advantage, mn_])
        policy = add([tmp, value], name="policy")

        model_all_outputs = [policy]
        self._model = Model(inputs=models_all_inputs, outputs=model_all_outputs)
        self._schedule_model, self._optimizer_model = self.make_optimiser()
        self._model.compile(loss='mse', optimizer=self._optimizer_model)

        self._target_model = Model(inputs=models_all_inputs, outputs=model_all_outputs)

    def _make_x_tau(self, data):
        # for the x's
        data_x = []
        prev = 0
        for sz, add_, mul_ in zip(self._nn_archi.x_dims,
                                  self._nn_archi.x_adds,
                                  self._nn_archi.x_mults):
            tmp = (data[:, prev:(prev+sz)] + add_) * mul_
            data_x.append(tmp)
            prev += sz

        # for the input of the q network
        data_q = []
        for sz, add_, mul_ in zip(self._nn_archi.input_q_dims,
                                  self._nn_archi.input_q_adds,
                                  self._nn_archi.input_q_mults):
            data_q.append((data[:, prev:(prev+sz)] + add_) * mul_)
            prev += sz

        # for the taus
        data_tau = []
        for sz, add_, mul_ in zip(self._nn_archi.tau_dims,
                                  self._nn_archi.tau_adds,
                                  self._nn_archi.tau_mults):
            data_tau.append((data[:, prev:(prev+sz)] + add_) * mul_)
            prev += sz

        # TODO pre process that into different vector
        data_topo = self._process_topo(data[:, prev:(prev+self._nn_archi.dim_topo)])

        prev += self._nn_archi.dim_topo
        # TODO predict also gen_q and load_v here, and p_or and q_or and p_ex and q_ex
        data_flow = []
        for sz, add_, mul_ in zip(self._nn_archi.gm_out_dims,
                                  self._nn_archi.gm_out_adds,
                                  self._nn_archi.gm_out_mults):
            data_flow.append((data[:, prev:(prev+sz)] + add_) * mul_)
            prev += sz

        res = [*data_x, *data_q, *data_tau, data_topo], data_flow
        return res

    def _process_topo(self, topo_vect):
        """process the topology vector.

         As input grid2op encode it:
         
         - -1 disconnected
         - 1 connected to bus 1
         - 2 connected to bus 2

         I transform it in a vector having twice as many component with the encoding, if we move
         "by pairs":
         
         - [1,0] -> disconnected
         - [0,0] -> connected to bus 1  # normal situation
         - [0,1] -> connected to bus 2
         """
        res = np.zeros((topo_vect.shape[0], 2*topo_vect.shape[1]),
                       dtype=np.float32)
        tmp_ = np.where(topo_vect == -1.)
        res[tmp_[0], 2*tmp_[1]] = 1.
        tmp_ = np.where(topo_vect == 2.)
        res[tmp_[0], 2*tmp_[1]+1] = 1.
        return res

[docs]    def predict_movement(self, data, epsilon, batch_size=None, training=False):
        """Predict movement of game controller where is epsilon
        probability randomly move."""
        if batch_size is None:
            batch_size = data.shape[0]
        data_nn, true_output_grid = self._make_x_tau(data)
        res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=training)
        return res

[docs]    def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None):
        if batch_size is None:
            batch_size = s_batch.shape[0]
        data_nn, true_output_grid = self._make_x_tau(s_batch)
        data_nn2, true_output_grid2 = self._make_x_tau(s2_batch)

        # train the grid model to accurately predict the state of the grid
        # TODO predict also gen_q and load_v here, and p_or and q_or and p_ex and q_ex
        loss1 = self.grid_model.train_on_batch(data_nn, true_output_grid)
        loss2 = self.grid_model.train_on_batch(data_nn2, true_output_grid2)

        # and now train the q network
        res = super().train(data_nn,
                            a_batch,
                            r_batch,
                            d_batch,
                            data_nn2,
                            tf_writer=tf_writer,
                            batch_size=batch_size)

        self.grid_model_losses_npy = 0.5*(np.array(loss1) + np.array(loss2))
        return res

[docs]    def train_on_batch(self, model, optimizer_model, x, y_true):
        """
        clip the loss
        """
        with tf.GradientTape() as tape:
            # Get y_pred for batch
            y_pred = model(x)
            # Compute loss for each sample in the batch
            # and then clip it
            batch_loss = self._clipped_batch_loss(y_true, y_pred)
            # Compute mean scalar loss
            loss = tf.math.reduce_mean(batch_loss)
        loss_npy = loss.numpy()

        # Compute gradients
        grads = tape.gradient(loss, self._qnet_variables)

        # clip gradients
        if self._max_global_norm_grad is not None:
            grads, _ = tf.clip_by_global_norm(grads, self._max_global_norm_grad)
        if self._max_value_grad is not None:
            grads = [tf.clip_by_value(grad, -self._max_value_grad, self._max_value_grad)
                     for grad in grads]

        # Apply gradients
        optimizer_model.apply_gradients(zip(grads, self._qnet_variables))
        # Store LR
        if hasattr(optimizer_model, "_decayed_lr"):
            self.train_lr = optimizer_model._decayed_lr('float32').numpy()
        else:
            self.train_lr = optimizer_model.learning_rate.numpy()

        # Return loss scalar
        return loss_npy

    def _clipped_batch_loss(self, y_true, y_pred):
        sq_error = tf.math.square(y_true - y_pred, name="sq_error")
        batch_sq_error = tf.math.reduce_sum(sq_error, axis=1, name="batch_sq_error")
        if self._max_loss is not None:
            res = tf.clip_by_value(batch_sq_error, 0.0, self._max_loss, name="batch_sq_error_clip")
        else:
            res = batch_sq_error
        return res

[docs]    def save_tensorboard(self, current_step):
        if self.grid_model_losses_npy is not None:
            for i, el in enumerate(self._nn_archi.list_attr_obs_gm_out):
                tf.summary.scalar("loss_gridmodel_{}".format(el),
                                  self.grid_model_losses_npy[i],
                                  current_step,
                                  description="Loss of the neural network representing the powergrid "
                                              "for predicting {}"
                                              "".format(el))

    @staticmethod
    def _get_path_model(path, name=None):
        if name is None:
            path_model = path
        else:
            path_model = os.path.join(path, name)
        path_target_model = "{}_target".format(path_model)
        path_grid_model = "{}_grid_model".format(path_model)
        return path_model, path_target_model, path_grid_model

[docs]    def save_network(self, path, name=None, ext="h5"):
        """
        Saves all the models with unique names
        """
        path_model, path_target_model, path_grid_model = self._get_path_model(path, name)
        self._model.save('{}.{}'.format(path_model, ext))
        self._target_model.save('{}.{}'.format(path_target_model, ext))
        self.grid_model.save('{}.{}'.format(path_grid_model, ext))

[docs]    def load_network(self, path, name=None, ext="h5"):
        """
        We load all the models using the keras "load_model" function.
        """
        path_model, path_target_model, path_grid_model = self._get_path_model(path, name)
        self.construct_q_network()
        self._model.load_weights('{}.{}'.format(path_model, ext))
        self._target_model.load_weights('{}.{}'.format(path_target_model, ext))
        self.grid_model.load_weights('{}.{}'.format(path_grid_model, ext))
        if self.verbose:
            print("Succesfully loaded network.")