Source code for phdu.pd_utils

"""
pandas utils

Includes    latex_table:      f:  pd.DataFrame -> latex table.
"""
import pandas as pd
import numpy as np
from functools import reduce     

[docs]def latex_table(df, index=False, **kwargs): """Pandas DataFrame -> Latex table.""" col_format = "c" if isinstance(df, pd.core.series.Series) else "c"*len(df.columns) if index: col_format += "c" table_replacements = (("\\toprule", "\\toprule "*2), ("\\bottomrule", "\\bottomrule "*2) ) text_replacements = (("\\textbackslash ", "\\"), ("\{", "{"), ("\}", "}"), ("\$", "$"), ("\_", "_"), ("\\textasciicircum ", "^") ) table_formatter = lambda x: reduce(lambda a, kv: a.replace(*kv), table_replacements, x) text_formatter = lambda x: reduce(lambda a, kv: a.replace(*kv), text_replacements, x) formatter = lambda x: text_formatter(table_formatter(x)) print(formatter(df.to_latex(index=index, column_format=col_format, **kwargs))) return
[docs]def expand_sequences(df, dt=1, maxlen=None): """ Input: DataFrame. Each element is an array and all arrays start at the same time and have the same time step dt. Returns: MultiColumn DataFrame: (df.index, (df.columns, time_steps)) """ if df.isna().values.any(): if maxlen is None: maxlen = int(df.applymap(lambda x: x.size if isinstance(x, np.ndarray) else np.NaN).max().max()) df_padded = df.applymap(lambda x: np.hstack((x, np.NaN*np.empty((maxlen-x.size)))) if isinstance(x, np.ndarray) else np.NaN*np.empty((maxlen))) else: if maxlen is None: maxlen = int(df.applymap(lambda x: x.size).values.max()) df_padded = df.applymap(lambda x: np.hstack((x, np.NaN*np.empty((maxlen-x.size))))) df_padded_arr = np.stack([np.vstack(x) for x in df_padded.values]) # shape (df.shape[0], df.shape[1], time_steps) return pd.DataFrame(df_padded_arr.reshape((df.shape[0], -1)), index = df.index, columns = pd.MultiIndex.from_product([df.columns, dt*np.arange(maxlen)]))