Source code for sklmer._estimators

"""
This module contains sklearn wrappers for pymer4
"""
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_is_fitted
from pymer4 import Lmer
import inspect
import pandas as pd


[docs]class LmerRegressor(BaseEstimator, RegressorMixin):
    """ A regressor that wraps pymer4's lme4 implementation.

    This regressor requires a formula be defined in LME4's style, see pymer4's cheatsheet: http://eshinjolly.com/pymer4/rfx_cheatsheet.html

    Parameters
    ----------
    formula : str 
        Lmer formatted formula string.
    X_cols : list
        List of the names of the X columns.
    predict_rfx: bool, default='False'
        Whether or not the predict method should use random effects in the prediction.
    family: str, default='gausian'
        What family of distributions to use for the link function for the generalized model.
    fit_kwargs: dict, defalut='{}'
        Dictionary of options to pass to lmer fit. See http://eshinjolly.com/pymer4/api.html
    """

[docs]    def __init__(
        self, formula, X_cols, predict_rfx=False, family="gaussian", fit_kwargs={}
    ):
        args, _, _, values = inspect.getargvalues(inspect.currentframe())
        values.pop("self")
        for arg, val in values.items():
            setattr(self, arg, val)

    def _make_data(self, X=None, y=None, data=None, x_only=False):

        if data is None:
            if x_only:
                if X is None:
                    raise ValueError("If you don't pass data you must pass X")

                # Make a dataframe out of X
                data = pd.DataFrame(X, columns=self.X_cols)
            else:
                if X is None or y is None:
                    raise ValueError("If you don't pass data you must pass X and y")

                # Check that X and y have correct shape
                X, y = check_X_y(X, y)

                # Make a dataframe out of X and y
                data = pd.DataFrame(X, columns=self.X_cols)
                data[self._response_name] = y
        else:
            data = data.copy()
        return data

[docs]    def fit(self, X=None, y=None, data=None):
        """ Fit the specified mixed effects model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,)
            The target values (class labels in classification, real numbers in
            regression).
        data: pandas.DataFrame
            Data can also be passed to fit as a dataframe.

        Returns
        -------
        self : object
            Returns self.
        """

        self._response_name = self.formula.split("~")[0].strip()

        self.data_ = self._make_data(X=X, y=y, data=data)
        self.model = Lmer(self.formula, data=self.data_, family=self.family)
        self.model.fit(summarize=False, verbose=False, **self.fit_kwargs)
        if self.model.warnings is not None:
            if ("converge" in self.model.warnings) | np.any(
                ["converge" in mw for mw in self.model.warnings]
            ):
                self.converged = False
            else:
                self.converged = True
        else:
            self.converged = True
        self.coef_ = self.model.coefs.iloc[1:, 0].values
        self.intercept_ = self.model.coefs.iloc[0, 0]
        return self

[docs]    def predict(self, X=None, data=None, **kwargs):
        """ Predict based on the fitted mixed effects model.

        Will use random effects if the estimators predict_rfx attribute is true.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            The training input samples.
        data: pandas.DataFrame
            Data can also be passed as a dataframe.
        **kwargs:
            Passed through to the pymer4.Lmer.predict method

        Returns
        -------
        y : ndarray, shape (n_samples,)
            Returns predicted values
        """
        check_is_fitted(self, ["data_", "converged"])

        data = self._make_data(X, data=data, x_only=True)
        try:
            use_rfx = kwargs["use_rfx"]
            kwargs.pop("use_rfx")
        except KeyError:
            use_rfx = self.predict_rfx
        return self.model.predict(data, use_rfx, **kwargs)