class paguro.LazyCollection

A collection of LazyDatasets.

Constructors

LazyCollection(data, ...)

Initialize self. See help(type(self)) for accurate signature.

Config

set_scope(key: str | Iterable[str] | None = None) Self
set_scope_by_index(idx: int | Iterable[int] | None = None) Self

Delegated

set_sorted(column: str, *, descending: bool = False) Self
collect(**kwargs: Any) Collection
approx_n_unique() Self
bottom_k(k: int, *, by: IntoExpr | Iterable[IntoExpr], ...) Self
cache() Self
cast(dtypes, ...) Self
clear(n: int = 0) Self
clone() Self
collect_async(...) dict[str, Awaitable[polars.DataFrame] | _GeventDataFrameResult[polars.DataFrame]]
collect_batches(*, ...) dict[str, Iterator[polars.DataFrame]]
collect_model_blueprint(...) dict[str, str]
collect_schema() dict[str, Schema]
count() Self
describe(...) dict[str, polars.DataFrame]
drop(*columns, ...) Self
drop_nans(...) Self
drop_nulls(...) Self
explain(*, format: ExplainFormat = 'plain', ...) dict[str, str]
explode(columns, ...) Self
fetch(...) dict[str, TypeAliasForwardRef('polars.DataFrame')]
fill_nan(value: int | float | Expr | None) Self
fill_null(value: Any | Expr | None = None, ...) Self
filter(*predicates, ...) Self
first() Self
gather_every(n: int, offset: int = 0) Self
head(n: int = 5) Self
inspect(fmt: str = '{}') Self
interpolate() Self
last() Self
limit(n: int = 5) Self
map_batches(function, ...) Self
match_to_schema(schema: SchemaDict | Schema, *, ...) Self
max() Self
mean() Self
median() Self
melt(...) Self
merge_sorted(other: LazyDataset[U] | polars.LazyFrame, key) Self
min() Self
null_count() Self
pipe(function, ...) dict[str, T]
pipe_with_schema(function) Self
profile(...) dict[str, tuple[polars.DataFrame, polars.DataFrame]]
quantile(quantile: float | Expr, ...) Self
remote(...) dict[str, pc.LazyFrameRemote]
remove(*predicates, ...) Self
rename(mapping, ...) Self
reverse() Self
select(*exprs: IntoExpr | Iterable[IntoExpr], **named_exprs) Self
select_seq(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
shift(n: int | IntoExprColumn = 1, *, ...) Self
show_graph(*, optimized: bool = True, ...) dict[str, str | None]
skim(...) dict[str, Collection]
slice(offset: int, length: int | None = None) Self
sort(by: IntoExpr | Iterable[IntoExpr], *more_by, ...) Self
sql(query: str, *, table_name: str = 'self') Self
std(ddof: int = 1) Self
sum() Self
tail(n: int = 5) Self
top_k(k: int, *, by: IntoExpr | Iterable[IntoExpr], ...) Self
unique(...) Self
unnest(columns, ...) Self
unpivot(...) Self
update(other: polars.LazyFrame, ...) Self
var(ddof: int = 1) Self
with_columns(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
with_columns_seq(*exprs: IntoExpr | Iterable[IntoExpr], ...) Self
with_context(other: Self | list[Self]) Self
with_name(name: str | None) Self
with_row_count(name: str = 'row_nr', offset: int = 0) Self
with_row_index(name: str = 'index', offset: int = 0) Self
without_model() dict[str, LazyDataset[Any]]
to_dataframe() dict[str, TypeAliasForwardRef('polars.DataFrame')]
to_lazyframe() dict[str, TypeAliasForwardRef('polars.LazyFrame')]
to_polars() dict[str, TypeAliasForwardRef('polars.LazyFrame')]

Methods

items() ItemsView[str, _DST]
keys() KeysView[str]
values() ValuesView[_DST]
concat(*, with_key_column: bool | str = False, ...) _DST

Concatenate all items into a single Dataset/LazyDataset.

group_by(*by, ...) _CollectionGroupBy
join(other, ...) Self

Properties

property columns : dict[str, Any]

group: Delegated

property dtypes : dict[str, Any]

group: Delegated

property model : dict[str, Any]

group: Delegated

property schema : dict[str, Any]

group: Delegated

property vcol : dict[str, Any]

group: Delegated

property width : dict[str, Any]

group: Delegated

Export

to_dict(*, to_polars: bool = False) dict