Source code for mzutils.gym_space_management

import numpy as np


[docs]def normalize_spaces(space, max_space=None, min_space=None, skip_columns=None, fill_value=0.0): """ normalize each column of observation/action space to be in [-1,1] such that it looks like a Box space can be the whole original space (X by D) or just one row in the original space (D,) :param space: numpy array :param max_space: numpy array, the maximum value of each column of the space, normally we would get this from reading the dataset or prior knowledge :param min_space: numpy array, the minimum value of each column of the space, normally we would get this from reading the dataset or prior knowledge :param skip_columns: numpy array or list, columns to skip from normalization :param fill_value: float, the value to fill in the normalized space if the original space is masked here. so, if you don't want a part of the space to be normalized, you can pass in a masked array. The value will be automatically filled with fill_value in the normalized space. The returned will be tuple of filled_re_space, max_space, min_space, and the original re_space with mask. e.g. a = np.array(range(24), dtype=np.float64).reshape(4,6) a = np.where(a > 21, np.nan, a) a = np.ma.array(a, mask=np.isnan(a)) b, max, min = mzutils.normalize_spaces(a) """ assert not isinstance(space, list) if max_space is None: max_space = space.max(axis=0) if min_space is None: min_space = space.min(axis=0) gap = max_space - min_space gap += 1e-8 # to avoid div by 0 full_sum = max_space + min_space re_space = (2 * space - full_sum) / gap if skip_columns is not None: if len(space.shape) == 1: re_space[skip_columns] = space[skip_columns] else: re_space[:, skip_columns] = space[:, skip_columns] if np.ma.is_masked(re_space): # if re_space has all masks is False, this sentence can also be false. return re_space.filled(fill_value=fill_value), max_space, min_space, re_space return np.array(re_space), max_space, min_space
[docs]def denormalize_spaces(space_normalized, max_space=None, min_space=None, skip_columns=None, fill_value=0.0): """ same as above, and space_normalized can be the whole normalized original space or just one row in the normalized space """ assert not isinstance(space_normalized, list) if max_space is None: max_space = space_normalized.max(axis=0) if min_space is None: min_space = space_normalized.min(axis=0) gap = max_space - min_space gap += 1e-8 # to avoid div by 0 full_sum = max_space + min_space re_space = (space_normalized * gap + full_sum) / 2 if skip_columns is not None: if len(space_normalized.shape) == 1: re_space[skip_columns] = space_normalized[skip_columns] else: re_space[:, skip_columns] = space_normalized[:, skip_columns] if np.ma.is_masked(re_space): # if re_space has all masks is False, this sentence can also be false. return re_space.filled(fill_value=fill_value), max_space, min_space, re_space return np.array(re_space), max_space, min_space
[docs]def list_of_str_to_numpy_onehot_dict(lst): """ create a onehot lookup dictionary according to the list of strings passed in """ re_dict = {} for i, key in enumerate(lst): onehot_vec = np.zeros(len(lst), dtype=np.float64) onehot_vec[i] = 1.0 re_dict[key] = onehot_vec return re_dict