class paguro.Collection

A collection of Datasets.

Constructors

Collection(data, ...)

Initialize self. See help(type(self)) for accurate signature.

Config

set_scope(key: str | Iterable[str] | None = None) Self
set_scope_by_index(idx: int | Iterable[int] | None = None) Self

Delegated

set_sorted(column: str, *, descending: bool = False) Self
approx_n_unique() Self
bottom_k(k: int, *, by: IntoExpr | Iterable[IntoExpr], ...) Self
cast(dtypes, ...) Self
clear(n: int = 0) Self
clone() Self
collect_model_blueprint(...) dict[str, str]
collect_schema() dict[str, Schema]
corr(**kwargs: Any) Self
count() Self
describe(...) Self
drop(*columns, ...) Self
drop_in_place(...) dict[str, TypeAliasForwardRef('polars.Series')]
drop_nans(...) Self
drop_nulls(...) Self
equals(other: polars.DataFrame, *, ...) dict[str, bool]
estimated_size(unit: SizeUnit = 'b') dict[str, int | float]
explode(columns, ...) Self
extend(other: polars.DataFrame) Self
fill_nan(value: Expr | int | float | None) Self
fill_null(value: Any | Expr | None = None, ...) Self
filter(*predicates, ...) Self
fold(operation) dict[str, TypeAliasForwardRef('polars.Series')]
gather_every(n: int, offset: int = 0) Self
get_column(name: str, *, ...) dict[str, polars.Series | Any]
get_column_index(name: str) dict[str, int]
get_columns() dict[str, list[TypeAliasForwardRef('polars.Series')]]
glimpse(*, ...) dict[str, str | None]
hash_rows(...) dict[str, TypeAliasForwardRef('polars.Series')]
head(n: int = 5) Self
hstack(columns: list[polars.Series] | polars.DataFrame, ...) Self
insert_column(index: int, column: IntoExprColumn) Self
interpolate() Self
is_duplicated() dict[str, TypeAliasForwardRef('polars.Series')]
is_empty() dict[str, bool]
is_unique() dict[str, TypeAliasForwardRef('polars.Series')]
item(row: int | None = None, ...) dict[str, Any]
iter_columns() dict[str, Iterator[polars.Series]]
iter_rows(...) dict[str, Iterator[tuple[Any, ...]] | Iterator[dict[str, Any]]]
iter_slices(...) dict[str, Iterator[polars.DataFrame]]
limit(n: int = 5) Self
map_columns(column_names, ...) Self
map_rows(function: Callable[[tuple[Any, ...]], Any], ...) Self
match_to_schema(schema: SchemaDict | Schema, *, ...) Self
max() Self
max_horizontal() dict[str, TypeAliasForwardRef('polars.Series')]
mean() Self
mean_horizontal(...) dict[str, TypeAliasForwardRef('polars.Series')]
median() Self
melt(...) Self
merge_sorted(other: Dataset[U] | polars.DataFrame, key: str) Self
min() Self
min_horizontal() dict[str, TypeAliasForwardRef('polars.Series')]
n_chunks(...) dict[str, int | list[int]]
n_unique(...) dict[str, int]
null_count() Self
partition_by(...) dict[str, list[polars.DataFrame] | dict[tuple[Any, ...], polars.DataFrame]]
pipe(function, ...) dict[str, T]
pivot(on, ...) Self
product() Self
quantile(quantile: float, ...) Self
rechunk() Self
remove(*predicates, ...) Self
rename(mapping, ...) Self
replace_column(index: int, column: polars.Series) Self
reverse() Self
row(...) dict[str, tuple[Any, ...] | dict[str, Any]]
rows(...) dict[str, list[tuple[Any, ...]] | list[dict[str, Any]]]
rows_by_key(key, ...) dict[str, dict[Any, Any]]
sample(n: int | polars.Series | None = None, *, ...) Self
select(*exprs: IntoExpr | Iterable[IntoExpr], **named_exprs) Self
select_seq(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
shift(n: int = 1, *, fill_value: IntoExpr | None = None) Self
shrink_to_fit(*, in_place: bool = False) Self
skim(...) dict[str, Collection]
slice(offset: int, length: int | None = None) Self
sort(by: IntoExpr | Iterable[IntoExpr], *more_by, ...) Self
sql(query: str, *, table_name: str = 'self') Self
std(ddof: int = 1) Self
sum() Self
sum_horizontal(...) dict[str, TypeAliasForwardRef('polars.Series')]
tail(n: int = 5) Self
top_k(k: int, *, by: IntoExpr | Iterable[IntoExpr], ...) Self
transpose(*, include_header: bool = False, ...) Self
unique(...) Self
unnest(columns, ...) Self
unpivot(...) Self
unstack(*, step: int, ...) Self
update(other: polars.DataFrame, ...) Self
upsample(time_column: str, *, every: str | timedelta, ...) Self
var(ddof: int = 1) Self
vstack(other: Dataset[VFM] | polars.DataFrame, *, ...) Self
with_columns(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
with_columns_seq(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
with_name(name: str | None) Self
with_row_count(name: str = 'row_nr', offset: int = 0) Self
with_row_index(name: str = 'index', offset: int = 0) Self
without_model() dict[str, Dataset[Any]]
to_arrow(*, ...) dict[str, pa.Table]
to_dataframe() dict[str, TypeAliasForwardRef('polars.DataFrame')]
to_dummies(...) Self
to_init_repr(n: int = 1000) dict[str, str]
to_jax(...) dict[str, jax.Array | dict[str, jax.Array]]
to_lazyframe() dict[str, TypeAliasForwardRef('polars.LazyFrame')]
to_numpy(*, ...) dict[str, np.ndarray[Any, Any]]
to_pandas(*, ...) dict[str, pd.DataFrame]
to_polars() dict[str, TypeAliasForwardRef('polars.DataFrame')]
to_series(...) dict[str, TypeAliasForwardRef('polars.Series')]
to_struct(...) dict[str, TypeAliasForwardRef('polars.Series')]
to_torch(...) dict[str, torch.Tensor | dict[str, torch.Tensor] | PolarsDataset]

Methods

lazy() LazyCollection
items() ItemsView[str, _DST]
keys() KeysView[str]
values() ValuesView[_DST]
concat(*, with_key_column: bool | str = False, ...) _DST

Concatenate all items into a single Dataset/LazyDataset.

group_by(*by, ...) _CollectionGroupBy
join(other, ...) Self

Properties

property columns : dict[str, Any]

group: Delegated

property dtypes : dict[str, Any]

group: Delegated

property flags : dict[str, Any]

group: Delegated

property height : dict[str, Any]

group: Delegated

property model : dict[str, Any]

group: Delegated

property plot : dict[str, Any]

group: Delegated

property schema : dict[str, Any]

group: Delegated

property shape : dict[str, Any]

group: Delegated

property style : dict[str, Any]

group: Delegated

property vcol : dict[str, Any]

group: Delegated

property width : dict[str, Any]

group: Delegated

Export

to_dict(*, to_polars: bool = False) dict