Source code for phdu.stats.rtopy.resample

"""
Adaptation of resample R package: https://cran.r-project.org/web/packages/resample/resample.pdf
Based on the article: https://arxiv.org/abs/1411.5279
"""
import numpy as np
import pandas as pd
try:
    import rpy2.robjects as ro
    from rpy2.robjects import r, pandas2ri, numpy2ri
    from rpy2.robjects.packages import importr
    pandas2ri.activate()
    ro.numpy2ri.activate()
    from ._helper import attr_preprocess, load_R_pkg
except:
    pass

    
[docs]def stat_computer(obj_name, nsamples=1): load_R_pkg("data.table") if nsamples == 1: return r(f"{obj_name}$stats") else: return dict(stats=r(f"{obj_name}$stats"), individual_stats=r(f"rbindlist(lapply(lapply({obj_name}$resultsBoth, (function (l) l$stats)), as.data.frame.list), idcol=TRUE)").set_index(".id") )
[docs]def bootstrap(x, method, y=None, stat="mean", N=int(1e5), seed=0, block_size=1000, **kwargs): """ Attrs: - method: ["t", "percentile", "bca", "bootstrapT"] Returns: bootstrap object (R) """ kwargs_str = attr_preprocess(bootstrap, locals()) load_R_pkg("resample") if y is None: # one sample if method == "bootstrapT": ro.globalenv["bs"] = r(f"bootstrap(x, c({stat} = {stat}(x), sd = sd(x)), R=N, seed=0, block.size=block.size {kwargs_str})") else: ro.globalenv["bs"] = r(f"bootstrap(x, c({stat} = {stat}(x)), R=N, seed=0, block.size=block.size {kwargs_str})") else: if method == "bootstrapT": raise ValueError(f"method {method} not implemented for the 2-sample case") else: ro.globalenv["DF"] = pd.concat([pd.DataFrame(dict(val=x)).assign(Group="x"), pd.DataFrame(dict(val=y)).assign(Group="y") ], ignore_index=True, axis=0) ro.globalenv["bs"] = r(f"bootstrap2(DF, {stat}(val), treatment=Group, R=N, seed=seed, block.size=block.size {kwargs_str})") return ro.globalenv["bs"]
[docs]def CI_bootstrap(x, y=None, stat="mean", method="bootstrapT", expand=True, alpha=0.05, probs=None, alternative="two-sided", return_stats=False, **kwargs): """ Check for the stats. Unbiased, functional statistics such as the mean should have zero bootstrap bias. If not, increase N. NOTE: s2 will have non-zero bootstrap bias because it is not functional. Attrs: - method: ["t", "percentile", "bca", "bootstrapT"]. - alternative: ["two-sided", "less", "greater"]. """ if probs is None: if alternative == "less": ro.globalenv["probs"] = np.array([1 - alpha]) elif alternative == "greater": ro.globalenv["probs"] = np.array([alpha]) elif alternative == "two-sided": ro.globalenv["probs"] = np.array([alpha/2, 1 - alpha/2]) else: raise ValueError(f"alternative {alternative} not valid. Available: 'greater', 'less', 'two-sided'") bs = bootstrap(x, method, y=y, stat=stat, **kwargs) if method == "bootstrapT": CI = r(f"CI.{method}(bs, probs=probs)") # bootstrap T is not affected by narrowness bias. Table 6 else: ro.globalenv["expand"] = expand CI = r(f"CI.{method}(bs, probs=probs, expand=expand)") if CI.shape[0] == 1: CI = CI[0] if probs is None: if alternative == "less": CI = np.array([-np.inf, CI[0]]) # Only implemented for one at a time. elif alternative == "greater": CI = np.array([CI[0], np.inf]) if return_stats: return bs, stat_computer("bs", nsamples=1 + int(y is not None)), CI else: return CI
[docs]def permutation(x, y=None, stat="mean", N=int(1e5), seed=0, block_size=1000, **kwargs): """ Returns: permutationTest object (R) and the results of the test. The implementation in numba is faster and includes pairing option, which this one does not. """ kwargs_str = attr_preprocess(permutation, locals()) load_R_pkg("resample") if y is None: # one sample ro.globalenv["perm"] = r(f"permutationTest(x, c({stat} = {stat}(x)), R=N, seed=0, block.size=block.size {kwargs_str})") else: ro.globalenv["DF"] = pd.concat([pd.DataFrame(dict(val=x)).assign(Group="x"), pd.DataFrame(dict(val=y)).assign(Group="y") ], ignore_index=True, axis=0) ro.globalenv["perm"] = r(f"permutationTest2(DF, {stat}(val), treatment=Group, R=N, seed=seed, block.size=block.size {kwargs_str})") return ro.globalenv["perm"], stat_computer("perm", nsamples= 1 + int(y is not None))