Source code for lalegpl.lib.r.util

# Copyright 2019 IBM Corporation
#
# Licensed under the GNU General Public License 3.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.gnu.org/licenses/gpl-3.0.txt
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
import os
import pandas
try:
  import rpy2.robjects
  import rpy2.robjects.packages
except ImportError:
  raise ImportError("""This functionality needs a Python package called `rpy2`. 
  Please install it using `pip install rpy2` or install lalegpl[full] which will install it for you.""")

import lale.helpers

[docs]def install_r_package(pkg_name): if 'R_LIBS_USER' in os.environ: lib_dir = os.environ['R_LIBS_USER'] else: lib_dir = os.path.expanduser(os.path.join('~', 'R', 'lib')) if not os.path.exists(lib_dir): os.makedirs(lib_dir) libPaths_fun = rpy2.robjects.r['.libPaths'] libPaths_fun(lib_dir) rutils = rpy2.robjects.packages.importr('utils') rutils.chooseCRANmirror(ind=1) #https://cran.r-project.org/mirrors.html if not rpy2.robjects.packages.isinstalled(pkg_name): lale.helpers.println_pos(f'installing R package {pkg_name} to libPaths {libPaths_fun()}') rutils.install_packages(pkg_name) if not rpy2.robjects.packages.isinstalled(pkg_name): lale.helpers.println_pos(f'failed to install R package {pkg_name}') raise ValueError(f'failed to install R package {pkg_name}') pkg = rpy2.robjects.packages.importr(pkg_name) return pkg
[docs]def create_r_dataframe(X, y=None): if not isinstance(X, pandas.DataFrame): X = pandas.DataFrame(X, columns=['f'+str(i) for i in range(X.shape[1])], index=[i for i in range(X.shape[0])]) if y is not None and not isinstance(y, pandas.Series): y = pandas.Series(y, name="target") def create_r_vec(pd_df, col_name): col = pd_df[col_name] #TODO: make this code work for other types besides categorical strings str_vec = rpy2.robjects.IntVector(col) #This will work for car dataset for now. return str_vec col_rvecs_X = {name: create_r_vec(X, name) for name in X.columns} if y is None: col_rvecs_y = {} else: col_rvecs_y = {y.name: rpy2.robjects.FactorVector(rpy2.robjects.IntVector(y))} col_rvecs_all = {**col_rvecs_X, **col_rvecs_y} result = rpy2.robjects.DataFrame(col_rvecs_all) return result