Source code for epynn.commons.maths

# EpyNN/epynn/commons/maths.py
# Related third party imports
import numpy as np


# To prevent from divide floatting points errors
E_SAFE = 1e-16


[docs]def activation_tune(se_hPars):
    """Set layer's hyperparameters as temporary globals.

    For each forward and backward pass the function is called from within the layer.

    :param se_hPars: Local hyperparameters for layers.
    :type se_hPars: dict[str, str or float]
    """
    # Declare global
    global layer_hPars
    # Set global
    layer_hPars = se_hPars

    return None


[docs]def hadamard(dA, dLinear):
    """Element-wise matrix multiplication with support for softmax derivative.

    This is implemented for Dense layer and is compatible with other layers satisfying requirements.

    :param dA: Input of backward propagation of shape (m, n).
    :type dA: :class:`numpy.ndarray`

    :param dLinear: Linear activation product passed through the derivative of the non-linear activation function with shape (m, n) or (m, n, n).
    :type dLinear: :class:`numpy.ndarray`
    """

    # Non-softmax processing
    if dLinear.ndim == 2:
        dZ = dA * dLinear

    # Softmax processing
    elif dLinear.ndim == 3:
        dA = np.expand_dims(dA, 2)
        dZ = dA * dLinear
        dZ = np.sum(dZ, axis=1)

    return dZ


### Activation functions and derivatives

# Identity function

[docs]def identity(x, deriv=False):
    """Compute identity activation or derivative.

    Note this is for testing purpose, cannot be used with backpropagation.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    if not deriv:
        pass

    elif deriv:
        x = np.ones_like(x)

    return x


# Rectifier Linear Unit (ReLU)

[docs]def relu(x, deriv=False):
    """Compute ReLU activation or derivative.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    if not deriv:
        x = np.maximum(0, x)

    elif deriv:
        x = np.greater(x, 0).astype(int)

    return x


# Leaky Rectifier Linear Unit (LReLU)

[docs]def lrelu(x, deriv=False):
    """Compute LReLU activation or derivative.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    # Retrieve alpha from layers hyperparameters (temporary globals)
    a = layer_hPars['LRELU_alpha']

    if not deriv:
        x = np.maximum(a * x, x)

    elif deriv:
        x = np.where(x > 0, 1, a)

    return x


# Exponential Linear Unit (ELU)

[docs]def elu(x, deriv=False):
    """Compute ELU activation or derivative.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    # Retrieve alpha from layers hyperparameters (temporary globals)
    a = layer_hPars['ELU_alpha']

    if not deriv:
        x = np.where(x > 0, x, a * (np.exp(x, where=x<=0)-1))

    elif deriv:
        x = np.where(x > 0, 1, elu(x) + a)

    return x


# Sigmoid (σ)

[docs]def sigmoid(x, deriv=False):
    """Compute Sigmoid activation or derivative.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    if not deriv:
        # Numerically stable version of sigmoid function
        x = np.where(
            x >= 0, # condition
            1 / (1+np.exp(-x)), # For positive values
            np.exp(x) / (1+np.exp(x)) # For negative values
        )

    elif deriv:
        x = sigmoid(x) * (1-sigmoid(x))

    return x


# Hyperbolic tangent (tanh)

[docs]def tanh(x, deriv=False):
    """Compute tanh activation or derivative.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    if not deriv:
        x = (np.exp(x)-np.exp(-x)) / (np.exp(x)+np.exp(-x))

    elif deriv:
        x = 1 - tanh(x)**2

    return x


# Softmax

[docs]def softmax(x, deriv=False):
    """Compute softmax activation or derivative.

    For Dense layer only.

    For other layers, you can change element-wise matrix multiplication
    operator '*' by :func:`epynn.maths.hadamard` which handles
    the softmax derivative jacobian matrix.

    :param x: Input array to pass in function.
    :type x: class:`numpy.ndarray`

    :param deriv: To compute derivative, defaults to False.
    :type deriv: bool, optional

    :return: Output array passed in function.
    :rtype: :class:`numpy.ndarray`
    """
    # Retrieve temperature from layers hyperparameters (temporary globals)
    T = 1

    if not deriv:
        # Numerically stable version of softmax function
        x_safe = x - np.max(x, axis=1, keepdims=True)

        x_exp = np.exp(x_safe / T)
        x_sum = np.sum(x_exp, axis=1, keepdims=True)

        x = x_exp / x_sum

    elif deriv:

        x = np.array([np.diag(x) - np.outer(x, x) for x in softmax(x)])

    return x


### Weight initialization

# Xavier

[docs]def xavier(shape, rng=np.random):
    """Xavier Normal Distribution initialization for weight array.

    :param shape: Shape of weight array.
    :type shape: tuple[int]

    :param rng: Pseudo-random number generator, defaults to `np.random`.
    :type rng: :class:`numpy.random`

    :return: Initialized weight array.
    :rtype: :class:`numpy.ndarray`
    """
    W = rng.standard_normal(shape)        # Normal distribution, zero-centered
    W *= np.sqrt(2 / sum(list(shape)))    # Scale

    return W


# Orthogonal

[docs]def orthogonal(shape, rng=np.random):
    """Orthogonal initialization for weight array.

    :param shape: Shape of weight array.
    :type shape: tuple[int]

    :param rng: Pseudo-random number generator, defaults to `np.random`.
    :type rng: :class:`numpy.random`

    :return: Initialized weight array.
    :rtype: :class:`numpy.ndarray`
    """
    W = rng.standard_normal(shape)

    W = W.T if shape[0] < shape[1] else W

    # Compute QR factorization
    q, r = np.linalg.qr(W)

    # Make Q uniform according
    d = np.diag(r, 0)
    ph = np.sign(d)
    q *= ph

    W = q.T if shape[0] < shape[1] else q

    return W


### Gradients clipping


[docs]def clip_gradient(layer, max_norm=0.25):
    """Clip to avoid vanishing or exploding gradients.

    :param layer: An instance of trainable layer.
    :type layer: Object

    :param max_norm: Maximal clipping coefficient allowed, defaults to 0.25.
    :type max_norm: float, optional
    """
    total_norm = 0

    # Compute L2 norm squared for each gradient
    for grad in layer.g.values():
        grad_norm = np.sum(np.power(grad + E_SAFE, 2))
        total_norm += grad_norm    # Add to total norm

    total_norm = np.sqrt(total_norm)

    # Compute clipping coeficient
    clip_coef = max_norm / (total_norm+E_SAFE)

    # Clip the gradient if norm is greater than max norm
    if clip_coef < 1:
        for g in layer.g.keys():
            layer.g[g] *= clip_coef

    return None