Source code for ruckus.base

import numpy as _np
from functools import reduce as _reduce

from sklearn.base import BaseEstimator as _BaseEstimator
from sklearn.base import TransformerMixin as _TransformerMixin
from sklearn.utils.validation import check_is_fitted as _check_is_fitted
from sklearn.exceptions import NotFittedError as _NotFittedError
from sklearn.pipeline import Pipeline as _Pipeline
from sklearn.linear_model import Ridge as _Ridge
# UTILITY RKHS's

[docs]class RKHS(_TransformerMixin,_BaseEstimator): r""" Base instance of a Reproducing Kernel Hilbert Space [1]. An RKHS consists of a Hilbert space :math:`H`, a feature mapping :math:`\phi:X \rightarrow H` from the data space :math:`X` into :math:`H`, and a kernel :math:`k(x,y)` on :math:`X^2` defined by :math:`k(x,y) = \left<\phi(x),\phi(y)\right>_H`. This base RKHS sets :math:`H=X` by default, with :math:`\phi(x)=x` and :math:`k(x,y)=x^T y`. Certain functions :math:`f` may be represented in :math:`H` with a vector :math:`F` satisfying :math:`\left<F,\phi(x)\right>_H=f(x)` for all :math:`x \in X`. This representation can be discovered using ridge regression [2]. The set of valid functions depends on :math:`H` and :math:`k`. This base RKHS class can only represent *linear* functions. The :py:func:`fit` method will typically determine the dimensions and shapes of :math:`H` and :math:`X`, as well as any other necessary parameters for determining the feature mapping :math:`\phi`. The :py:func:`transform` method will implement the feature mapping :math:`\phi`. The :py:func:`kernel` method will evaluate the kernel :math:`k`. The :py:func:`fit_function` method will find the representation of a function :math:`f` given the vector :math:`y_i=f(x_i)` of its values on the predictor variables. RKHS instances can be combined with one another via composition, direct sum and tensor product. These produce compound RKHS classes, :py:class:`CompositeRKHS`, :py:class:`DirectSumRKHS`, and :py:class:`ProductRKHS`. These combinations can be instantiated with the corresponding class, or generated from arbitrary RKHS instances using the operations ``@`` for composition, ``+`` for direct sum, and ``*`` for tensor product. See the corresponding classes for further details. 1. `Aronszajn, N. "Theory of reproducing kernels." Trans. Amer. Math. Soc. 68 (1950), 337-404. <https://www.ams.org/journals/tran/1950-068-03/S0002-9947-1950-0051437-7/>`_ 2. Murphy, K. P. "Machine Learning: A Probabilistic Perspective", The MIT Press. chapter 14.4.3, pp. 492-493 ========== Parameters ========== :param take: Default = ``None``. Specifies which values to take from the datapoint for transformation. If ``None``, the entire datapoint will be taken in its original shape. If ``bool`` array, acts as a mask setting values marked ``False`` to ``0`` and leaving values marked True unchanged. If ``int`` array, the integers specify the indices (along the first feature dimension) which are to be taken, in the order/shape of the desired input. If ``tuple`` of ``int`` arrays, allows for drawing indices across multiple dimensions, similar to passing a ``tuple`` to a ``numpy`` array. :type take: :py:class:`numpy.ndarray` of ``dtype int`` or ``bool``, or ``tuple`` of :py:class:`numpy.ndarray` instances of type ``int``, or ``None`` :param filter: Default = ``None``. Specifies a linear preprocessing of the data. Applied after take. If ``None``, no changes are made to the input data. If the same shape as the input datapoints, ``filter`` and the datapoint are multiplied elementwise. If ``filter`` has a larger dimension than the datapoint, then its first dimensions will be contracted with the datapoint via :py:func:`numpy.tensordot`. The final shape is determined by the remaining dimensions of filter. :type filter: :py:class:`numpy.ndarray` of ``dtype float`` or ``None`` :param copy_X: Default = ``True``. If ``True``, input ``X`` is copied and stored by the model in the ``X_fit_`` attribute. If no further changes will be done to ``X``, setting ``copy_X=False`` saves memory by storing a reference. :type copy_X: ``bool`` ========== Attributes ========== :param shape_in\_: The required shape of the input datapoints, aka the shape of the domain space :math:`X`. :type shape_in\_: ``tuple`` :param shape_out\_: The final shape of the transformed datapoints, aka the shape of the Hilbert space :math:`H`. :type shape_out\_: ``tuple`` :param X_fit\_: The data which was used to fit the model. :type X_fit\_: :py:class:`numpy.ndarray` of shape `(n_samples,)+self.shape_in_` """ def __init__(self,*,take=None,filter=None,copy_X = True): self.take = take self.filter = filter self.copy_X = copy_X return None
[docs] def fit(self,X,y=None): """ Fit the model from data in ``X``. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in ``self.take`` and ``self.filter``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param y: Not used, present for API consistency by convention. :type y: Ignored :returns: The instance itself :rtype: :py:class:`RKHS` """ self.X_fit_ = self._validate_data(X, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) self.shape_in_ = self.X_fit_.shape[1:] self.shape_out_ = self._apply_filter(self.X_fit_).shape[1:] return self
[docs] def transform(self,X): """ Transform ``X``. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ _check_is_fitted(self) X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) if X.shape[1:] == self.shape_in_: return self._apply_filter(X).reshape((X.shape[0],)+self.shape_out_) else: raise ValueError('The input shape of the data, %s, does not match the required input type, %s' % (str(X.shape[1]),str(self.shape_in_)))
[docs] def fit_transform(self,X,y=None): """ Fit the model from data in ``X`` and transform ``X``. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in ``self.take`` and ``self.filter``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ self.fit(X, y) X_transformed = self.transform(X) return X_transformed
[docs] def kernel(self,X,Y=None): """ Evaluates the kernel on ``X`` and ``Y`` (or ``X`` and ``X``). :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param Y: Default = ``None``. Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. If ``None``, ``X`` is used. :type Y: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The matrix ``K[i,j] = k(X[i],Y[j])`` :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples_1,n_samples_2)`` """ X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) if Y is None: Y = X else: Y = self._validate_data(Y, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) return _np.tensordot(self.transform(X),self.transform(Y),axes=[tuple(range(1,len(self.shape_out_)+1))]*2)
[docs] def fit_function(self,y,X=None,regressor=None,alpha=1): """ Fit a function using its values on the predictor data and a regressor. :param y: Target vector, where ``n_samples`` is the number of samples and ``n_targets`` is the number of target functions. :type y: :py:class:`numpy.ndarray` of shape ``(n_samples, n_targets)`` :param X: Default = ``None``. Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. If ``None``, ``self.X_fit_`` is used. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param regressor: The regressor object to use to fit the function. If ``None``, a :py:class:`sklearn.linear_model.Ridge` instance is used with ``fit_intercept=False`` and ``alpha`` specified below. :type regressor: :py:class:`sklearn.base.BaseEstimator` :param alpha: The ridge parameter used in the default Ridge regressor. :param type: float :returns: ``regressor``, fitted to provide the function representation. :rtype: object """ if X is None: X = self.X_fit_ else: X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) if regressor is None: regressor = _Ridge(alpha=alpha,fit_intercept=False) X_reps = self.transform(X) y = self._validate_data(y, accept_sparse="csr", copy=self.copy_X,allow_nd=True,ensure_2d=False) return regressor.fit(X_reps,y)
def __matmul__(self,other): """ Constructs the :py:class:`CompositeRKHS` from ``self`` and ``other``. """ return CompositeRKHS([other,self],copy_X = other.copy_X) def __mul__(self,other): """ Constructs the :py:class:`ProductRKHS` from ``self`` and ``other``. """ return ProductRKHS([self,other],copy_X = self.copy_X or other.copy_X) def __add__(self,other): """ Constructs the :py:class:`DirectSumRKHS` from ``self`` and ``other``. """ return DirectSumRKHS([self,other],copy_X = self.copy_X or other.copy_X) def _apply_filter(self,X,): """ Applies ``self.take`` and ``self.filter`` to the input data as a preprocessing step. """ X = self._apply_take(X,) if self.filter is None: return X elif self.filter.ndim == X.ndim-1: return X*self.filter[None] elif self.filter.ndim > X.ndim-1: if self.filter.shape[:X.ndim-1] == X.shape[1:]: return _np.tensordot(X,self.filter,axes=(tuple(range(1,X.ndim)),tuple(range(0,X.ndim-1)))) else: raise ValueError('First %d axes of filter must have same shape as the last %d axes of apply_take(X,take).' % (X.ndim-1,)*2) else: raise ValueError('Dimension of filter must be at least %d' % (X.ndim-1,)) def _apply_take(self,X,): """ Applies ``take`` to the input data as a preprocessing step. """ if self.take is None: return X elif type(self.take) is tuple: return _np.array([X[k][self.take] for k in range(X.shape[0])]) elif self.take.dtype is bool or self.take.dtype is _np.dtype('int64'): return X[:,self.take] else: raise ValueError('take is not of the right form (must either be a boolean mask, an array of indices, or a tuple of integer arrays')
# Compound RKHS's
[docs]class CompositeRKHS(RKHS): r""" Given a sequence of RKHS's with Hilbert spaces :math:`H_1`, ..., :math:`H_n` and feature maps :math:`\phi_1`, ..., :math:`\phi_n`, their composition lives in the final Hilbert space :math:`H_n` but has feature map :math:`\phi_n \circ \dots \circ \phi_1` [1]. Correspondingly, a ``CompositeRKHS`` class has the ``shape_out_`` of its final component, the ``shape_in_`` of its first component, and :py:func:`transform` is applied to the data by implementing ``transform`` sequentially for each of the component spaces. This is useful for building pipelines and deep kernels. 1. `Cho, Y., Lawrence, S. "Kernel Methods for Deep Learning." Advances in Neural Information Processing Systems 22 (NIPS 2009) <https://papers.nips.cc/paper/2009/hash/5751ec3e9a4feab575962e78e006250d-Abstract.html>`_ ========== Parameters ========== :param components: The component :py:class:`RKHS` objects, listed from the first to be applied to the last. :type components: list of :py:class:`RKHS` objects :param copy_X: Default = ``True``. If ``True``, input ``X`` is copied and stored by the model in the ``X_fit_`` attribute. If no further changes will be done to ``X``, setting ``copy_X=False`` saves memory by storing a reference. :type copy_X: ``bool`` ========== Attributes ========== :param shape_in\_: The required shape of the input datapoints, aka the shape of the domain space :math:`X`. :type shape_in\_: ``tuple`` :param shape_out\_: The final shape of the transformed datapoints, aka the shape of the Hilbert space :math:`H`. :type shape_out\_: ``tuple`` :param X_fit\_: The data which was used to fit the model. :type X_fit\_: :py:class:`numpy.ndarray` of shape `(n_samples,)+self.shape_in_` """ def __init__( self, components, *, copy_X=True ): self.components = components self.copy_X = copy_X
[docs] def fit_transform(self,X,y=None): """ Fit the model from data in ``X`` and transform ``X``. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in `self.components[0].take` and `self.components[0].filter`. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ if self.copy_X: X = X.copy() self.X_fit_ = X self.shape_in_ = self.X_fit_.shape[1:] current_X = self.X_fit_ for rkhs in self.components[:-1]: new_X = rkhs.fit_transform(current_X) current_X = new_X self.components[-1].fit(current_X) self.shape_out_ = self.components[-1].shape_out_ return current_X
[docs] def fit(self,X,y=None): """ Fit the model from data in ``X`. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in `self.components[0].take` and `self.components[0].filter`. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The instance itself :rtype: :py:class:`RKHS` """ self.fit_transform(X,y) return self
[docs] def transform(self,X): """ Transform ``X``. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ if self.copy_X: X = X.copy() current_X = X for rkhs in self.components: new_X = rkhs.transform(current_X) current_X = new_X return current_X
[docs] def kernel(self,X,Y=None): """ Evaluates the kernel on ``X`` and ``Y`` (or ``X`` and ``X``) by iterating over component embeddings. As such, ``CompositeRKHS`` kernels can only be evaluated after fitting to data. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param Y: Default = ``None``. Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. If ``None``, ``X`` is used. :type Y: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The matrix ``K[i,j] = k(X[i],Y[j])`` :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples_1,n_samples_2)`` """ try: _check_is_fitted(self) if self.copy_X: X = X.copy() current_X = X if Y is None: current_Y = current_X else: if self.copy_X: Y = Y.copy() current_Y = Y for rkhs in self.components[:-1]: current_X = rkhs.transform(current_X) current_Y = rkhs.transform(current_Y) return self.components[-1](current_X,current_Y) except: raise _NotFittedError("Composite RKHS's must be fitted before their kernels can be evaluated.")
def __matmul__(self,other): """ Constructs the ``CompositeRKHS`` from ``self` and `other``. Flattens the component list to avoid unnecessary recursion. """ if type(other) is CompositeRKHS: return CompositeRKHS(other.components+self.components,copy_X = other.copy_X) else: return CompositeRKHS(self.components.insert(0,other),copy_X = self.copy_X)
[docs]class ProductRKHS(RKHS): r""" Given a sequence of RKHS's with Hilbert spaces :math:`H_1`, ..., :math:`H_n` and feature maps :math:`\phi_1`, ..., :math:`\phi_n`, their composition lives in the tensor product Hilbert space :math:`H_1\otimes \dots \otimes H_n` and has feature map :math:`\phi_1 \otimes \dots \otimes \phi_n` [1]. Correspondingly, the ``shape_out_`` of a ``ProductRKHS`` instance is the tuple-sum of the ``shape_out_`` tuples of its factors, while all its factors share the same ``shape_in_``. Product RKHS's are particularly useful for working with kernel embeddings of distributions and their conditional probabilities [2]. A ``ProductRKHS`` can be reduced to its marginal along a set of factors using the :py:func:`marginal` method, and can be reduced into a marginal space paired with a ridge-regressed conditional map using the :py:func:`conditional` method. 1. `Aronszajn, N. "Theory of reproducing kernels." Trans. Amer. Math. Soc. 68 (1950), 337-404. <https://www.ams.org/journals/tran/1950-068-03/S0002-9947-1950-0051437-7/>`_ 2. `Muandet, K., Fukuzimu, K., Sriperumbudur, B., Schölkopf, B. "Kernel Mean Embedding of Distributions: A Review and Beyond." Foundations and Trends in Machine Learning: Vol. 10: No. 1-2, pp 1-141 (2017) <https://arxiv.org/abs/1605.09522/>`_ ========== Parameters ========== :param factors: The factor :py:class:`RKHS` objects, listed in the order that their dimensions will appear in indexing. :type factors: list of :py:class:`RKHS` objects :param copy_X: Default = ``True``. If ``True``, input ``X`` is copied and stored by the model in the ``X_fit_`` attribute. If no further changes will be done to ``X``, setting ``copy_X=False`` saves memory by storing a reference. :type copy_X: ``bool`` ========== Attributes ========== :param shape_in\_: The required shape of the input datapoints, aka the shape of the domain space :math:`X`. :type shape_in\_: ``tuple`` :param shape_out\_: The final shape of the transformed datapoints, aka the shape of the Hilbert space :math:`H`. :type shape_out\_: ``tuple`` :param X_fit\_: The data which was used to fit the model. :type X_fit\_: :py:class:`numpy.ndarray` of shape `(n_samples,)+self.shape_in_` """ def __init__( self, factors, *, copy_X=True ): self.factors = factors self.copy_X = copy_X
[docs] def fit(self,X,y=None): """ Fit the model from data in ``X``. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in `fac.take` and `fac.filter` for each `fac` in `self.factors`. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The instance itself :rtype: :py:class:`RKHS` """ if self.copy_X: X = X.copy() self.X_fit_ = X self.shape_in_ = self.X_fit_.shape[1:] for j in range(len(self.factors)): self.factors[j].fit(self.X_fit_) self.shape_out_ = _reduce(lambda x,y:x+y, [f.shape_out_ for f in self.factors], ()) return self
[docs] def transform(self,X): """ Transform ``X``. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ if self.copy_X: X = X.copy() Ys = [] for j in range(len(self.factors)): Ys.append(self.factors[j].transform(X)) # Performs a vectorized tensor product of the feature dimensions tensor_func = lambda A,B: A.reshape(A.shape+(1,)*(B.ndim-1))*B.reshape((B.shape[0],)+(1,)*(A.ndim-1)+B.shape[1:]) return _reduce(tensor_func,Ys[1:],Ys[0])
[docs] def kernel(self,X,Y=None): """ Evaluates the kernel on ``X`` and ``Y`` (or ``X`` and ``X``) by multiplying the kernels of the factors. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param Y: Default = ``None``. Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. If ``None``, ``X`` is used. :type Y: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The matrix ``K[i,j] = k(X[i],Y[j])`` :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples_1,n_samples_2)`` """ if self.copy_X: X = X.copy() if Y is None: Y = X else: if self.copy_X: Y = Y.copy() Ks = [] for j in range(len(self.factors)): Ks.append(self.factors[j].kernel(X,Y)) return _np.product(Ks,axis=0)
[docs] def marginal(self,var_inds,copy_X=False): """ Construct a ``ProductRKHS`` from only the factors specified by ``var_inds``. Only to be used if ``ProductRKHS`` is already fit, and you'd rather not fit again. :param var_inds: List of indices of the factors in ``self.factors`` from which to the marginal ``ProductRKHS``. :type var_inds: ``array`` -like of ``int`` :param copy_X: Default = ``True``. If ``True``, input ``self.X_fit_`` is copied and stored as the new model's ``X_fit_`` attribute. If no further changes will be done to ``X``, setting ``copy_X=False`` saves memory by storing a reference. :type copy_X: ``bool`` :returns: The marginal ``ProductRKHS`` of the ``var_inds``. :rtype: ``ProductRKHS`` """ new_rkhs = ProductRKHS(list(map(self.factors.__getitem__, var_inds))) if copy_X: new_rkhs.X_fit_ = self.X_fit_.copy() else: new_rkhs.X_fit_ = self.X_fit_ new_rkhs.shape_in_ = self.X_fit_.shape[1:] new_rkhs.shape_out_ = _reduce(lambda x,y:x+y, [f.shape_out_ for f in new_rkhs.factors], ()) return new_rkhs
[docs] def conditional(self,predictor_inds,response_inds,regressor=None,alpha=1.0): """ Returns a pair of outputs, the first being a :py:class:`sklearn.pipelines.Pipeline` consisting of the marginal RKHS of ``predictor_inds`` and a regressor which represents the conditional distribution embedding, and the second being the marginal RKHS of ``response_inds``. For two systems :math:`X` and :math:`Y`, embedded in Hilbert spaces :math:`H_1` and :math:`H_2` respectively, the conditional distribution embedding is a linear map :math:`C_{Y|X}:H_1\\rightarrow H_2` such that :math:`C_{Y|X}\phi_1(x)` gives the kernel embedding of the distribution of :math:`Y` conditioned on :math:`X=x`. This is typically determined by using a ridge regression, though we allow the user to pass a custom regressor for model selection purposes. See [1] for details. 1. `Muandet, K., Fukuzimu, K., Sriperumbudur, B., Schölkopf, B. "Kernel Mean Embedding of Distributions: A Review and Beyond." Foundations and Trends in Machine Learning: Vol. 10: No. 1-2, pp 1-141 (2017) <https://arxiv.org/abs/1605.09522/>`_ :param predictor_inds: List of indices of the factors in ``self.factors`` on which the ``response_inds`` will be conditioned. :type predictor_inds: ``array`` -like of ``int`` :param response_inds: List of indices of the factors in ``self.factors`` which are to be conditioned on the ``predictor_inds``. :type predictor_inds: ``array`` -like of ``int`` :param regressor: The regressor object to use to fit the conditional embedding. If ``None``, a :py:class:`sklearn.linear_model.Ridge` instance is used with ``fit_intercept=False`` and ``alpha`` specified below. :type regressor: :py:class:`sklearn.base.BaseEstimator` :param alpha: The ridge parameter used in the default Ridge regressor. :type alpha: float :returns: (``pipe``,``response``), where ``pipe`` is a pipeline consisting of the marginal of ``predictor_inds`` and the fitted ``regressor``, and ``response`` is the marginal of ``response_inds``. :rtype: (:py:class:`sklearn.pipelines.Pipeline`, ``ProductRKHS``) """ if regressor is None: regressor = _Ridge(fit_intercept=False,alpha=alpha) rkhs_predictor = self.marginal(predictor_inds) rkhs_response = self.marginal(response_inds) X_in = rkhs_predictor.transform(rkhs_predictor.X_fit_) y_in = rkhs_response.transform(rkhs_response.X_fit_) regressor.fit(X_in.reshape([X_in.shape[0],_np.prod(X_in.shape[1:],dtype=int)]), y_in.reshape([y_in.shape[0],_np.prod(y_in.shape[1:],dtype=int)])) pipe = _Pipeline([('embedding',rkhs_predictor),('regressor',regressor)]) return pipe,rkhs_response
def __mul__(self,other): """ Constructs the ``ProductRKHS`` from ``self` and `other``. Flattens the factor list to avoid unnecessary recursion. """ if type(other) is ProductRKHS: return ProductRKHS(self.factors+other.factors,self.filters+other.filters,copy_X = self.copy_X or other.copy_X) else: return ProductRKHS(self.factors.append[other],self.filters.append[None],copy_X = self.copy_X)
[docs]class DirectSumRKHS(RKHS): r""" Given a sequence of RKHS's with Hilbert spaces :math:`H_1`, ..., :math:`H_n` and feature maps :math:`\phi_1`, ..., :math:`\phi_n`, their direct sum lives in the tensor product Hilbert space :math:`H_1\oplus \dots \oplus H_n` and has feature map of stacked vectors :math:`[\phi_1^T,\ \dots,\ \phi_n^T]^T` [1]. Correspondingly, the ``shape_out_`` of a ``DirectRKHS`` instance is determined the the same manner as when using :py:func:`numpy.concatenate` on the specified axis, while all its subspaces share the same ``shape_in_``. 1. `Aronszajn, N. "Theory of reproducing kernels." Trans. Amer. Math. Soc. 68 (1950), 337-404. <https://www.ams.org/journals/tran/1950-068-03/S0002-9947-1950-0051437-7/>`_ ========== Parameters ========== :param subspaces: The subspace :py:class:`RKHS` objects, listed in the order that their indices will appear along the first axis. :type subspaces: list of :py:class:`RKHS` objects :param axis: The axis along which the data will be concatenated. Data dimension must match on all other axes. :type axis: int :param copy_X: Default = ``True``. If ``True``, input ``X`` is copied and stored by the model in the ``X_fit_`` attribute. If no further changes will be done to ``X``, setting ``copy_X=False`` saves memory by storing a reference. :type copy_X: ``bool`` ========== Attributes ========== :param shape_in\_: The required shape of the input datapoints, aka the shape of the domain space :math:`X`. :type shape_in\_: ``tuple`` :param shape_out\_: The final shape of the transformed datapoints, aka the shape of the Hilbert space :math:`H`. :type shape_out\_: ``tuple`` :param X_fit\_: The data which was used to fit the model. :type X_fit\_: :py:class:`numpy.ndarray` of shape `(n_samples,)+self.shape_in_` """ def __init__( self, subspaces, axis=0, *, copy_X=True ): self.subspaces = subspaces self.axis = axis self.copy_X = copy_X
[docs] def fit(self,X,y=None): """ Fit the model from data in ``X``. :param X: Training vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. Must be consistent with preprocessing instructions in `sub.take` and `sub.filter` for each `sub` in `self.subspaces`. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The instance itself :rtype: :py:class:`RKHS` """ if self.copy_X: X = X.copy() self.X_fit_ = X self.shape_in_ = self.X_fit_.shape[1:] for j in range(len(self.subspaces)): self.subspaces[j].fit(X) shapes_out = _np.array([list(s.shape_out_) for s in self.subspaces]) axes = list(range(shapes_out.shape[1])) axes.remove(self.axis) axis_mask = _np.zeros(shapes_out.shape[1]) axis_mask[self.axis] = 1 if _np.all(shapes_out[1:,axes]==shapes_out[None,0,axes]): shapesum = lambda sh1,sh2:sh1+axis_mask*sh2 self.shape_out_ = tuple(_reduce(shapesum,shapes_out[1:],shapes_out[0])) else: raise ValueError('Subspaces have incompatible shapes for direct sum') return self
[docs] def transform(self,X): """ Transform ``X``. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The transformed data :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples,)+self.shape_out_`` """ if self.copy_X: X = X.copy() Ys = [] for j in range(len(self.subspaces)): self.subspaces[j].fit(X) Ys.append(self.subspaces[j].transform(X)) return _np.concatenate(Ys,axis=1+self.axis)
[docs] def kernel(self,X,Y=None): """ Evaluates the kernel on ``X`` and ``Y`` (or ``X`` and ``X``) by summing the kernels of the factors. :param X: Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. :type X: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :param Y: Default = ``None``. Data vector, where ``n_samples`` is the number of samples and ``(n_features_1,...,n_features_d)`` is the shape of the input data. These must match ``self.shape_in_``. If ``None``, ``X`` is used. :type Y: :py:class:`numpy.ndarray` of shape ``(n_samples, n_features_1,...,n_features_d)`` :returns: The matrix ``K[i,j] = k(X[i],Y[j])`` :rtype: :py:class:`numpy.ndarray` of shape ``(n_samples_1,n_samples_2)`` """ if self.copy_X: X = X.copy() if Y is None: Y = X else: if self.copy_X: Y = Y.copy() Ks = [] for j in range(len(self.subspaces)): Ks.append(self.subspaces[j].kernel(X,Y)) return _np.sum(Ks,axis=0)
def __add__(self,other): """ Constructs the ``DirectSumRKHS`` from ``self` and `other``. Flattens the subspace list to avoid unnecessary recursion. """ if type(other) is DirectSumRKHS: return DirectSumRKHS(self.subspaces+other.subspaces,self.filters+other.filters,copy_X = self.copy_X or other.copy_X) else: return DirectSumRKHS(self.subspaces.append[other],self.filters.append[None],copy_X = self.copy_X)