Source code for sklmer._estimators

"""
This module contains sklearn wrappers for pymer4
"""
import numpy as np
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_is_fitted
from pymer4 import Lmer
import inspect
import pandas as pd


[docs]class LmerRegressor(BaseEstimator, RegressorMixin): """ A regressor that wraps pymer4's lme4 implementation. This regressor requires a formula be defined in LME4's style, see pymer4's cheatsheet: http://eshinjolly.com/pymer4/rfx_cheatsheet.html Parameters ---------- formula : str Lmer formatted formula string. X_cols : list List of the names of the X columns. predict_rfx: bool, default='False' Whether or not the predict method should use random effects in the prediction. family: str, default='gausian' What family of distributions to use for the link function for the generalized model. fit_kwargs: dict, defalut='{}' Dictionary of options to pass to lmer fit. See http://eshinjolly.com/pymer4/api.html """
[docs] def __init__( self, formula, X_cols, predict_rfx=False, family="gaussian", fit_kwargs={} ): args, _, _, values = inspect.getargvalues(inspect.currentframe()) values.pop("self") for arg, val in values.items(): setattr(self, arg, val)
def _make_data(self, X=None, y=None, data=None, x_only=False): if data is None: if x_only: if X is None: raise ValueError("If you don't pass data you must pass X") # Make a dataframe out of X data = pd.DataFrame(X, columns=self.X_cols) else: if X is None or y is None: raise ValueError("If you don't pass data you must pass X and y") # Check that X and y have correct shape X, y = check_X_y(X, y) # Make a dataframe out of X and y data = pd.DataFrame(X, columns=self.X_cols) data[self._response_name] = y else: data = data.copy() return data
[docs] def fit(self, X=None, y=None, data=None): """ Fit the specified mixed effects model. Parameters ---------- X : array-like, shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) The target values (class labels in classification, real numbers in regression). data: pandas.DataFrame Data can also be passed to fit as a dataframe. Returns ------- self : object Returns self. """ self._response_name = self.formula.split("~")[0].strip() self.data_ = self._make_data(X=X, y=y, data=data) self.model = Lmer(self.formula, data=self.data_, family=self.family) self.model.fit(summarize=False, verbose=False, **self.fit_kwargs) if self.model.warnings is not None: if ("converge" in self.model.warnings) | np.any( ["converge" in mw for mw in self.model.warnings] ): self.converged = False else: self.converged = True else: self.converged = True self.coef_ = self.model.coefs.iloc[1:, 0].values self.intercept_ = self.model.coefs.iloc[0, 0] return self
[docs] def predict(self, X=None, data=None, **kwargs): """ Predict based on the fitted mixed effects model. Will use random effects if the estimators predict_rfx attribute is true. Parameters ---------- X : array-like, shape (n_samples, n_features) The training input samples. data: pandas.DataFrame Data can also be passed as a dataframe. **kwargs: Passed through to the pymer4.Lmer.predict method Returns ------- y : ndarray, shape (n_samples,) Returns predicted values """ check_is_fitted(self, ["data_", "converged"]) data = self._make_data(X, data=data, x_only=True) try: use_rfx = kwargs["use_rfx"] kwargs.pop("use_rfx") except KeyError: use_rfx = self.predict_rfx return self.model.predict(data, use_rfx, **kwargs)