# EpyNN/epynn/commons/io.py
# Related third party imports
import numpy as np
[docs]def index_elements_auto(X_data):
"""Determine elements size and generate dictionary for one-hot encoding or features or label.
:param X_data: Dataset containing samples features or samples label.
:type X_data: :class:`numpy.ndarray`
:return: One-hot encoding converter.
:rtype: dict[str or int or float, int]
:return: One-hot decoding converter.
:rtype: dict[int, str or int or float]
:return: Vocabulary size.
:rtype: int
"""
X_data = X_data.flatten().tolist() # All elements in 1D list
elements = sorted(list(set(X_data))) # Unique elements list
elements_size = len(elements) # Number of elements
# Converters to encode and decode sequences
element_to_idx = {w: i for i, w in enumerate(elements)}
idx_to_element = {i: w for w, i in element_to_idx.items()}
return element_to_idx, idx_to_element, elements_size
[docs]def scale_features(X_data):
"""Scale input array within [0, 1].
:param X_data: Raw data.
:type X_data: :class:`numpy.ndarray`
:return: Normalized data.
:rtype: :class:`numpy.ndarray`
"""
X_data = (X_data-np.min(X_data)) / (np.max(X_data)-np.min(X_data))
return X_data
[docs]def one_hot_encode(i, elements_size):
"""Generate one-hot encoding array.
:param i: One-hot index for current word.
:type i: int
:param elements_size: Number of keys in the word to index encoder.
:type elements_size: int
:return: One-hot encoding array for current word.
:rtype: :class:`numpy.ndarray`
"""
one_hot = np.zeros(elements_size)
one_hot[i] = 1.0 # Set 1 at index assigned to word
return one_hot
[docs]def one_hot_encode_sequence(sequence, element_to_idx, elements_size):
"""One-hot encode sequence.
:param sequence: Sequential data.
:type sequence: list or :class:`numpy.ndarray`
:param element_to_idx: Converter with word as key and index as value.
:type element_to_idx: dict[str or int or float, int]
:param elements_size: Number of keys in converter.
:type elements_size: int
:return: One-hot encoded sequence.
:rtype: :class:`numpy.ndarray`
"""
encoding = np.array([one_hot_encode(element_to_idx[word], elements_size) for word in sequence])
return encoding
[docs]def one_hot_decode_sequence(sequence, idx_to_element):
"""One-hot decode sequence.
:param sequence: One-hot encoded sequence.
:type sequence: list or :class:`numpy.ndarray`
:param idx_to_element: Converter with index as key and word as value.
:type idx_to_element: dict[int, str or int or float]
:return: One-hot decoded sequence.
:rtype: list[str or int or float]
"""
decoding = [idx_to_element[np.argmax(encoded)] for encoded in sequence]
return decoding
[docs]def encode_dataset(X_data, element_to_idx, elements_size):
"""One-hot encode a set of sequences.
:param X_data: Contains sequences.
:type X_data: :class:`numpy.ndarray`
:param element_to_idx: Converter with word as key and index as value.
:type element_to_idx: dict[str or int or float, int]
:param elements_size: Number of keys in converter.
:type elements_size: int
:return: One-hot encoded dataset.
:rtype: list[:class:`numpy.ndarray`]
"""
X_encoded = []
# Iterate over sequences
for i in range(X_data.shape[0]):
sequence = X_data[i] # Retrieve sequence
encoded_sequence = one_hot_encode_sequence(sequence, element_to_idx, elements_size)
X_encoded.append(encoded_sequence) # Append to dataset of encoded sequences
return X_encoded
[docs]def padding(X_data, padding, forward=True):
"""Image padding.
:param X_data: Array representing a set of images.
:type X_data: :class:`numpy.ndarray`
:param padding: Number of zeros to add in each side of the image.
:type padding: int
:param forward: Set to False to remove padding, defaults to `True`.
:type forward: bool, optional
"""
if padding and forward:
# Pad image
shape = ((0, 0), (padding, padding), (padding, padding), (0, 0))
X_data = np.pad(X_data, shape, mode='constant', constant_values=(0, 0))
elif padding and not forward:
# Remove padding
X_data = X_data[:, padding:-padding, padding:-padding, :]
return X_data