Source code for phdu.stats.rtopy.resample

"""
Adaptation of resample R package: https://cran.r-project.org/web/packages/resample/resample.pdf
Based on the article: https://arxiv.org/abs/1411.5279
"""
import numpy as np
import pandas as pd
try:
    import rpy2.robjects as ro
    from rpy2.robjects import r, pandas2ri, numpy2ri
    from rpy2.robjects.packages import importr
    pandas2ri.activate()
    ro.numpy2ri.activate()
    from ._helper import attr_preprocess, load_R_pkg
except:
    pass

    
[docs]def stat_computer(obj_name, nsamples=1):
    load_R_pkg("data.table")
    if nsamples == 1:
        return r(f"{obj_name}$stats") 
    else:
        return dict(stats=r(f"{obj_name}$stats"),
                    individual_stats=r(f"rbindlist(lapply(lapply({obj_name}$resultsBoth, (function (l) l$stats)), as.data.frame.list), idcol=TRUE)").set_index(".id")
                   )

[docs]def bootstrap(x, method, y=None, stat="mean", N=int(1e5), seed=0, block_size=1000, **kwargs):
    """
    Attrs:
        - method: ["t", "percentile", "bca", "bootstrapT"]
        
    Returns: bootstrap object (R)
    """
    kwargs_str = attr_preprocess(bootstrap, locals())
    load_R_pkg("resample")
    if y is None: # one sample       
        if method == "bootstrapT":
            ro.globalenv["bs"] = r(f"bootstrap(x, c({stat} = {stat}(x), sd = sd(x)), R=N, seed=0, block.size=block.size {kwargs_str})")
        else:
            ro.globalenv["bs"] = r(f"bootstrap(x, c({stat} = {stat}(x)), R=N, seed=0, block.size=block.size {kwargs_str})")
    else:
        if method == "bootstrapT":
            raise ValueError(f"method {method} not implemented for the 2-sample case")
        else:
            ro.globalenv["DF"] = pd.concat([pd.DataFrame(dict(val=x)).assign(Group="x"),
                                            pd.DataFrame(dict(val=y)).assign(Group="y")
                                           ], ignore_index=True, axis=0)
            ro.globalenv["bs"] = r(f"bootstrap2(DF, {stat}(val), treatment=Group, R=N, seed=seed, block.size=block.size {kwargs_str})")
    return ro.globalenv["bs"]

[docs]def CI_bootstrap(x, y=None, stat="mean", method="bootstrapT", expand=True, alpha=0.05, probs=None, alternative="two-sided", return_stats=False, **kwargs):
    """
    Check for the stats. Unbiased, functional statistics such as the mean should have zero bootstrap bias. If not, increase N.
    NOTE: s2 will have non-zero bootstrap bias because it is not functional.
    
    Attrs:
        - method: ["t", "percentile", "bca", "bootstrapT"].
        - alternative: ["two-sided", "less", "greater"].
    """
    if probs is None:
        if alternative == "less":
            ro.globalenv["probs"] = np.array([1 - alpha])
        elif alternative == "greater":
            ro.globalenv["probs"] = np.array([alpha])
        elif alternative == "two-sided":
            ro.globalenv["probs"] = np.array([alpha/2, 1 - alpha/2])
        else:
            raise ValueError(f"alternative {alternative} not valid. Available: 'greater', 'less', 'two-sided'")
            
    bs = bootstrap(x, method, y=y, stat=stat, **kwargs)
    
    if method == "bootstrapT":
        CI = r(f"CI.{method}(bs, probs=probs)") # bootstrap T is not affected by narrowness bias. Table 6
    else:
        ro.globalenv["expand"] = expand
        CI = r(f"CI.{method}(bs, probs=probs, expand=expand)")
    if CI.shape[0] == 1:
        CI = CI[0]
    
    if probs is None:
        if alternative == "less":
            CI = np.array([-np.inf, CI[0]]) # Only implemented for one at a time.
        elif alternative == "greater":
            CI = np.array([CI[0], np.inf])
            
    if return_stats:
        return bs, stat_computer("bs", nsamples=1 + int(y is not None)), CI
    else:
        return CI
    
[docs]def permutation(x, y=None, stat="mean", N=int(1e5), seed=0, block_size=1000,  **kwargs):
    """
    Returns: permutationTest object (R) and the results of the test. 
    
    The implementation in numba is faster and includes pairing option, which this one does not.
    """
    kwargs_str = attr_preprocess(permutation, locals())
    load_R_pkg("resample")   
    if y is None: # one sample       
        ro.globalenv["perm"] = r(f"permutationTest(x, c({stat} = {stat}(x)), R=N, seed=0, block.size=block.size {kwargs_str})")
    else:
        ro.globalenv["DF"] = pd.concat([pd.DataFrame(dict(val=x)).assign(Group="x"),
                                        pd.DataFrame(dict(val=y)).assign(Group="y")
                                       ], ignore_index=True, axis=0)
        ro.globalenv["perm"] = r(f"permutationTest2(DF, {stat}(val), treatment=Group, R=N, seed=seed, block.size=block.size {kwargs_str})")
        
    return ro.globalenv["perm"], stat_computer("perm", nsamples= 1 + int(y is not None))