Source code for l2rpn_baselines.LeapNetEncoded.leapNetEncoded_NN

# Copyright (c) 2020, RTE (https://www.rte-france.com)
# See AUTHORS.txt
# This Source Code Form is subject to the terms of the Mozilla Public License, version 2.0.
# If a copy of the Mozilla Public License, version 2.0 was not distributed with this file,
# you can obtain one at http://mozilla.org/MPL/2.0/.
# SPDX-License-Identifier: MPL-2.0
# This file is part of L2RPN Baselines, L2RPN Baselines a repository to host baselines for l2rpn competitions.

import numpy as np
import os

# tf2.0 friendly
import warnings

try:
    import tensorflow as tf
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=FutureWarning)
        from tensorflow.keras.models import Sequential, Model
        from tensorflow.keras.layers import Activation
        from tensorflow.keras.layers import Input, Lambda, subtract, add
        import tensorflow.keras.backend as K
    # TODO implement that in the leap net package too
    from tensorflow.keras.layers import Dense
    _CAN_USE_TENSORFLOW = True
except ImportError:
    _CAN_USE_TENSORFLOW = False
    
from l2rpn_baselines.utils import BaseDeepQ, TrainingParam
from l2rpn_baselines.DuelQLeapNet.duelQLeapNet_NN import LtauBis


[docs]class LeapNetEncoded_NN(BaseDeepQ): """ .. warning:: This baseline recodes entire the RL training procedure. You can use it if you want to have a deeper look at Deep Q Learning algorithm and a possible (non optimized, slow, etc. implementation ). For a much better implementation, you can reuse the code of "PPO_RLLIB" or the "PPO_SB3" baseline. Constructs the desired neural networks. More information on the leap net can be found at `Leap Net on Github <https://github.com/BDonnot/leap_net>`_ These are: - a "state encoder" that uses a leap net to "encode" the observation, or at least the part related to powergrid - a q network, that uses the output of the state encoder to predict which action is best. The Q network can have other types of input, and can also be a leap net, see the class :class:`l2rpn_baselines.LeapNetEncoded.leapNetEncoded_NNParam.LeapNetEncoded_NNParam` for more information """ def __init__(self, nn_params, training_param=None): if not _CAN_USE_TENSORFLOW: raise RuntimeError("Cannot import tensorflow, this function cannot be used.") if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self._custom_objects = {"LtauBis": LtauBis} self._max_global_norm_grad = training_param.max_global_norm_grad self._max_value_grad = training_param.max_value_grad self._max_loss = training_param.max_loss self.train_lr = 1.0 # added self.encoded_state = None self.grid_model = None self._schedule_grid_model = None self._optimizer_grid_model = None self._qnet_variables = [] self.grid_model_losses_npy = None self.construct_q_network()
[docs] def construct_q_network(self): """ Builds the Q network. """ # Uses the network architecture found in DeepMind paper # The inputs and outputs size have changed, as well as replacing the convolution by dense layers. self._model = Sequential() inputs_x = [Input(shape=(el,), name="x_{}".format(nm_)) for el, nm_ in zip(self._nn_archi.x_dims, self._nn_archi.list_attr_obs_x)] inputs_q = [Input(shape=(el,), name="input_q_{}".format(nm_)) for el, nm_ in zip(self._nn_archi.input_q_dims, self._nn_archi.list_attr_obs_input_q)] inputs_tau = [Input(shape=(el,), name="tau_{}".format(nm_)) for el, nm_ in zip(self._nn_archi.tau_dims, self._nn_archi.list_attr_obs_tau)] input_topo = Input(shape=(2*self._nn_archi.dim_topo,), name="topo") models_all_inputs = [*inputs_x, *inputs_q, *inputs_tau, input_topo] # encode each data type in initial layers encs_out = [] for init_val, nm_ in zip(inputs_x, self._nn_archi.list_attr_obs_x): lay = init_val for i, size in enumerate(self._nn_archi.sizes_enc): lay = Dense(size, name="enc_{}_{}".format(nm_, i))(lay) # TODO resnet instead of Dense lay = Activation("relu")(lay) encs_out.append(lay) # concatenate all that lay = tf.keras.layers.concatenate(encs_out) # now "lay" is the encoded observation # i do a few layer for i, size in enumerate(self._nn_archi.sizes_main): lay = Dense(size, name="main_{}".format(i))(lay) # TODO resnet instead of Dense lay = Activation("relu")(lay) # now i do the leap net to encode the state encoded_state = tf.keras.layers.add([lay, LtauBis(name="leap_topo")([lay, input_topo])], name="encoded_state") self.encoded_state = tf.keras.backend.stop_gradient(encoded_state) # i predict the full state of the grid given the "control" variables outputs_gm = [] grid_model_losses = {} lossWeights = {} # TODO for sz_out, nm_ in zip(self._nn_archi.gm_out_dims, self._nn_archi.list_attr_obs_gm_out): lay = encoded_state # carefull i need my gradients here ! (don't use self.encoded_state) for i, size in enumerate(self._nn_archi.sizes_out_gm): lay = Dense(size, name="{}_{}".format(nm_, i))(lay) lay = Activation("relu")(lay) # predict now the variable name_output = "{}_hat".format(nm_) pred_ = Dense(sz_out, name=name_output)(lay) outputs_gm.append(pred_) grid_model_losses[name_output] = "mse" # NB grid_model does not use inputs_tau self.grid_model = Model(inputs=models_all_inputs, outputs=outputs_gm, name="grid_model") self._schedule_grid_model, self._optimizer_grid_model = self.make_optimiser() self.grid_model.compile(loss=grid_model_losses, optimizer=self._optimizer_grid_model) # , loss_weights=lossWeights # And now let's predict the Q values of each actions given the encoded grid state input_Qnet = inputs_q + [self.encoded_state] # TODO do i pre process the data coming from inputs_q ??? lay = tf.keras.layers.concatenate(input_Qnet, name="input_Q_network") for i, size in enumerate(self._nn_archi.sizes_Qnet): tmp = Dense(size, name="qvalue_{}".format(i)) # TODO resnet instead of Dense lay = tmp(lay) lay = Activation("relu")(lay) self._qnet_variables += tmp.trainable_weights # And i predict the Q value of the action l_tau = lay for el, nm_ in zip(inputs_tau, self._nn_archi.list_attr_obs_tau): tmp = LtauBis(name="leap_{}".format(nm_)) l_tau = l_tau + tmp([lay, el]) self._qnet_variables += tmp.trainable_weights tmp = Dense(self._action_size) advantage = tmp(l_tau) self._qnet_variables += tmp.trainable_weights tmp = Dense(1, name="value") value = tmp(l_tau) self._qnet_variables += tmp.trainable_weights meaner = Lambda(lambda x: K.mean(x, axis=1)) mn_ = meaner(advantage) tmp = subtract([advantage, mn_]) policy = add([tmp, value], name="policy") model_all_outputs = [policy] self._model = Model(inputs=models_all_inputs, outputs=model_all_outputs) self._schedule_model, self._optimizer_model = self.make_optimiser() self._model.compile(loss='mse', optimizer=self._optimizer_model) self._target_model = Model(inputs=models_all_inputs, outputs=model_all_outputs)
def _make_x_tau(self, data): # for the x's data_x = [] prev = 0 for sz, add_, mul_ in zip(self._nn_archi.x_dims, self._nn_archi.x_adds, self._nn_archi.x_mults): tmp = (data[:, prev:(prev+sz)] + add_) * mul_ data_x.append(tmp) prev += sz # for the input of the q network data_q = [] for sz, add_, mul_ in zip(self._nn_archi.input_q_dims, self._nn_archi.input_q_adds, self._nn_archi.input_q_mults): data_q.append((data[:, prev:(prev+sz)] + add_) * mul_) prev += sz # for the taus data_tau = [] for sz, add_, mul_ in zip(self._nn_archi.tau_dims, self._nn_archi.tau_adds, self._nn_archi.tau_mults): data_tau.append((data[:, prev:(prev+sz)] + add_) * mul_) prev += sz # TODO pre process that into different vector data_topo = self._process_topo(data[:, prev:(prev+self._nn_archi.dim_topo)]) prev += self._nn_archi.dim_topo # TODO predict also gen_q and load_v here, and p_or and q_or and p_ex and q_ex data_flow = [] for sz, add_, mul_ in zip(self._nn_archi.gm_out_dims, self._nn_archi.gm_out_adds, self._nn_archi.gm_out_mults): data_flow.append((data[:, prev:(prev+sz)] + add_) * mul_) prev += sz res = [*data_x, *data_q, *data_tau, data_topo], data_flow return res def _process_topo(self, topo_vect): """process the topology vector. As input grid2op encode it: - -1 disconnected - 1 connected to bus 1 - 2 connected to bus 2 I transform it in a vector having twice as many component with the encoding, if we move "by pairs": - [1,0] -> disconnected - [0,0] -> connected to bus 1 # normal situation - [0,1] -> connected to bus 2 """ res = np.zeros((topo_vect.shape[0], 2*topo_vect.shape[1]), dtype=np.float32) tmp_ = np.where(topo_vect == -1.) res[tmp_[0], 2*tmp_[1]] = 1. tmp_ = np.where(topo_vect == 2.) res[tmp_[0], 2*tmp_[1]+1] = 1. return res
[docs] def predict_movement(self, data, epsilon, batch_size=None, training=False): """Predict movement of game controller where is epsilon probability randomly move.""" if batch_size is None: batch_size = data.shape[0] data_nn, true_output_grid = self._make_x_tau(data) res = super().predict_movement(data_nn, epsilon=epsilon, batch_size=batch_size, training=training) return res
[docs] def train(self, s_batch, a_batch, r_batch, d_batch, s2_batch, tf_writer=None, batch_size=None): if batch_size is None: batch_size = s_batch.shape[0] data_nn, true_output_grid = self._make_x_tau(s_batch) data_nn2, true_output_grid2 = self._make_x_tau(s2_batch) # train the grid model to accurately predict the state of the grid # TODO predict also gen_q and load_v here, and p_or and q_or and p_ex and q_ex loss1 = self.grid_model.train_on_batch(data_nn, true_output_grid) loss2 = self.grid_model.train_on_batch(data_nn2, true_output_grid2) # and now train the q network res = super().train(data_nn, a_batch, r_batch, d_batch, data_nn2, tf_writer=tf_writer, batch_size=batch_size) self.grid_model_losses_npy = 0.5*(np.array(loss1) + np.array(loss2)) return res
[docs] def train_on_batch(self, model, optimizer_model, x, y_true): """ clip the loss """ with tf.GradientTape() as tape: # Get y_pred for batch y_pred = model(x) # Compute loss for each sample in the batch # and then clip it batch_loss = self._clipped_batch_loss(y_true, y_pred) # Compute mean scalar loss loss = tf.math.reduce_mean(batch_loss) loss_npy = loss.numpy() # Compute gradients grads = tape.gradient(loss, self._qnet_variables) # clip gradients if self._max_global_norm_grad is not None: grads, _ = tf.clip_by_global_norm(grads, self._max_global_norm_grad) if self._max_value_grad is not None: grads = [tf.clip_by_value(grad, -self._max_value_grad, self._max_value_grad) for grad in grads] # Apply gradients optimizer_model.apply_gradients(zip(grads, self._qnet_variables)) # Store LR if hasattr(optimizer_model, "_decayed_lr"): self.train_lr = optimizer_model._decayed_lr('float32').numpy() else: self.train_lr = optimizer_model.learning_rate.numpy() # Return loss scalar return loss_npy
def _clipped_batch_loss(self, y_true, y_pred): sq_error = tf.math.square(y_true - y_pred, name="sq_error") batch_sq_error = tf.math.reduce_sum(sq_error, axis=1, name="batch_sq_error") if self._max_loss is not None: res = tf.clip_by_value(batch_sq_error, 0.0, self._max_loss, name="batch_sq_error_clip") else: res = batch_sq_error return res
[docs] def save_tensorboard(self, current_step): if self.grid_model_losses_npy is not None: for i, el in enumerate(self._nn_archi.list_attr_obs_gm_out): tf.summary.scalar("loss_gridmodel_{}".format(el), self.grid_model_losses_npy[i], current_step, description="Loss of the neural network representing the powergrid " "for predicting {}" "".format(el))
@staticmethod def _get_path_model(path, name=None): if name is None: path_model = path else: path_model = os.path.join(path, name) path_target_model = "{}_target".format(path_model) path_grid_model = "{}_grid_model".format(path_model) return path_model, path_target_model, path_grid_model
[docs] def save_network(self, path, name=None, ext="h5"): """ Saves all the models with unique names """ path_model, path_target_model, path_grid_model = self._get_path_model(path, name) self._model.save('{}.{}'.format(path_model, ext)) self._target_model.save('{}.{}'.format(path_target_model, ext)) self.grid_model.save('{}.{}'.format(path_grid_model, ext))
[docs] def load_network(self, path, name=None, ext="h5"): """ We load all the models using the keras "load_model" function. """ path_model, path_target_model, path_grid_model = self._get_path_model(path, name) self.construct_q_network() self._model.load_weights('{}.{}'.format(path_model, ext)) self._target_model.load_weights('{}.{}'.format(path_target_model, ext)) self.grid_model.load_weights('{}.{}'.format(path_grid_model, ext)) if self.verbose: print("Succesfully loaded network.")