Source code for baloo.io.csv

from pandas import read_csv as pd_read_csv

from ..core import DataFrame


[docs]def read_csv(filepath, sep=',', header='infer', names=None, usecols=None, dtype=None, converters=None, skiprows=None, nrows=None): """Read CSV into DataFrame. Eager implementation using pandas, i.e. entire file is read at this point. Only common/relevant parameters available at the moment; for full list, could use pandas directly and then convert to baloo. Parameters ---------- filepath : str sep : str, optional Separator used between values. header : 'infer' or None, optional Whether to infer the column names from the first row or not. names : list of str, optional List of column names to use. Overrides inferred header. usecols : list of (int or str), optional Which columns to parse. dtype : dict, optional Dict of column -> type to parse as. converters : dict, optional Dict of functions for converting values in certain columns. skiprows : int, optional Number of lines to skip at start of file. nrows : int, optional Number of rows to read. Returns ------- DataFrame See Also -------- pandas.read_csv : https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html """ pd_df = pd_read_csv(filepath, sep=sep, header=header, names=names, usecols=usecols, dtype=dtype, converters=converters, skiprows=skiprows, nrows=nrows) return DataFrame.from_pandas(pd_df)
# TODO: should avoid going to Pandas def to_csv(df, filepath, sep=',', header=True, index=True): """Save DataFrame as csv. Note data is expected to be evaluated. Currently delegates to Pandas. Parameters ---------- df : DataFrame filepath : str sep : str, optional Separator used between values. header : bool, optional Whether to save the header. index : bool, optional Whether to save the index columns. Returns ------- None See Also -------- pandas.DataFrame.to_csv : https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_csv.html """ df.to_pandas().to_csv(filepath, sep=sep, header=header, index=index)