Source code for epynn.commons.maths

# EpyNN/epynn/commons/maths.py
# Related third party imports
import numpy as np


# To prevent from divide floatting points errors
E_SAFE = 1e-16


[docs]def activation_tune(se_hPars): """Set layer's hyperparameters as temporary globals. For each forward and backward pass the function is called from within the layer. :param se_hPars: Local hyperparameters for layers. :type se_hPars: dict[str, str or float] """ # Declare global global layer_hPars # Set global layer_hPars = se_hPars return None
[docs]def hadamard(dA, dLinear): """Element-wise matrix multiplication with support for softmax derivative. This is implemented for Dense layer and is compatible with other layers satisfying requirements. :param dA: Input of backward propagation of shape (m, n). :type dA: :class:`numpy.ndarray` :param dLinear: Linear activation product passed through the derivative of the non-linear activation function with shape (m, n) or (m, n, n). :type dLinear: :class:`numpy.ndarray` """ # Non-softmax processing if dLinear.ndim == 2: dZ = dA * dLinear # Softmax processing elif dLinear.ndim == 3: dA = np.expand_dims(dA, 2) dZ = dA * dLinear dZ = np.sum(dZ, axis=1) return dZ
### Activation functions and derivatives # Identity function
[docs]def identity(x, deriv=False): """Compute identity activation or derivative. Note this is for testing purpose, cannot be used with backpropagation. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ if not deriv: pass elif deriv: x = np.ones_like(x) return x
# Rectifier Linear Unit (ReLU)
[docs]def relu(x, deriv=False): """Compute ReLU activation or derivative. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ if not deriv: x = np.maximum(0, x) elif deriv: x = np.greater(x, 0).astype(int) return x
# Leaky Rectifier Linear Unit (LReLU)
[docs]def lrelu(x, deriv=False): """Compute LReLU activation or derivative. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ # Retrieve alpha from layers hyperparameters (temporary globals) a = layer_hPars['LRELU_alpha'] if not deriv: x = np.maximum(a * x, x) elif deriv: x = np.where(x > 0, 1, a) return x
# Exponential Linear Unit (ELU)
[docs]def elu(x, deriv=False): """Compute ELU activation or derivative. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ # Retrieve alpha from layers hyperparameters (temporary globals) a = layer_hPars['ELU_alpha'] if not deriv: x = np.where(x > 0, x, a * (np.exp(x, where=x<=0)-1)) elif deriv: x = np.where(x > 0, 1, elu(x) + a) return x
# Sigmoid (σ)
[docs]def sigmoid(x, deriv=False): """Compute Sigmoid activation or derivative. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ if not deriv: # Numerically stable version of sigmoid function x = np.where( x >= 0, # condition 1 / (1+np.exp(-x)), # For positive values np.exp(x) / (1+np.exp(x)) # For negative values ) elif deriv: x = sigmoid(x) * (1-sigmoid(x)) return x
# Hyperbolic tangent (tanh)
[docs]def tanh(x, deriv=False): """Compute tanh activation or derivative. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ if not deriv: x = (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x)) elif deriv: x = 1 - tanh(x)**2 return x
# Softmax
[docs]def softmax(x, deriv=False): """Compute softmax activation or derivative. For Dense layer only. For other layers, you can change element-wise matrix multiplication operator '*' by :func:`epynn.maths.hadamard` which handles the softmax derivative jacobian matrix. :param x: Input array to pass in function. :type x: class:`numpy.ndarray` :param deriv: To compute derivative, defaults to False. :type deriv: bool, optional :return: Output array passed in function. :rtype: :class:`numpy.ndarray` """ # Retrieve temperature from layers hyperparameters (temporary globals) T = 1 if not deriv: # Numerically stable version of softmax function x_safe = x - np.max(x, axis=1, keepdims=True) x_exp = np.exp(x_safe / T) x_sum = np.sum(x_exp, axis=1, keepdims=True) x = x_exp / x_sum elif deriv: x = np.array([np.diag(x) - np.outer(x, x) for x in softmax(x)]) return x
### Weight initialization # Xavier
[docs]def xavier(shape, rng=np.random): """Xavier Normal Distribution initialization for weight array. :param shape: Shape of weight array. :type shape: tuple[int] :param rng: Pseudo-random number generator, defaults to `np.random`. :type rng: :class:`numpy.random` :return: Initialized weight array. :rtype: :class:`numpy.ndarray` """ W = rng.standard_normal(shape) # Normal distribution, zero-centered W *= np.sqrt(2 / sum(list(shape))) # Scale return W
# Orthogonal
[docs]def orthogonal(shape, rng=np.random): """Orthogonal initialization for weight array. :param shape: Shape of weight array. :type shape: tuple[int] :param rng: Pseudo-random number generator, defaults to `np.random`. :type rng: :class:`numpy.random` :return: Initialized weight array. :rtype: :class:`numpy.ndarray` """ W = rng.standard_normal(shape) W = W.T if shape[0] < shape[1] else W # Compute QR factorization q, r = np.linalg.qr(W) # Make Q uniform according d = np.diag(r, 0) ph = np.sign(d) q *= ph W = q.T if shape[0] < shape[1] else q return W
### Gradients clipping
[docs]def clip_gradient(layer, max_norm=0.25): """Clip to avoid vanishing or exploding gradients. :param layer: An instance of trainable layer. :type layer: Object :param max_norm: Maximal clipping coefficient allowed, defaults to 0.25. :type max_norm: float, optional """ total_norm = 0 # Compute L2 norm squared for each gradient for grad in layer.g.values(): grad_norm = np.sum(np.power(grad + E_SAFE, 2)) total_norm += grad_norm # Add to total norm total_norm = np.sqrt(total_norm) # Compute clipping coeficient clip_coef = max_norm / (total_norm+E_SAFE) # Clip the gradient if norm is greater than max norm if clip_coef < 1: for g in layer.g.keys(): layer.g[g] *= clip_coef return None