User Guide¶
Please refer to the index page for some basic usage examples.
Versions used in the tutorials
In [1]: import paguro as pg
In [2]: pg.show_versions()
-------- Version info ---------
paguro: 0.3.1
---- Required dependencies ----
polars: 1.35.2
---- Optional dependencies ----
pyarrow: <not installed>
numpy: 2.3.4
rich: 14.2.0
-------------------------------
Platform: Linux-6.11.0-1018-azure-x86_64-with-glibc2.39
Python: 3.12.12 (main, Oct 10 2025, 01:01:16) [GCC 13.3.0]
Data Used in Tutorials
customers
customers = pl.DataFrame(
{
"id": ["C001", "C002", "C003", "C004"],
"name": ["Alice Wong", "Bob Smith", "Carol Jones", None],
"email": ["alice@company.com", None, "caroljones", "david@company.com"],
"age": [29, 34, 41, -5],
}
)
In [3]: print(customers)
shape: (4, 4)
┌──────┬─────────────┬───────────────────┬─────┐
│ id ┆ name ┆ email ┆ age │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ str ┆ str ┆ i64 │
╞══════╪═════════════╪═══════════════════╪═════╡
│ C001 ┆ Alice Wong ┆ alice@company.com ┆ 29 │
│ C002 ┆ Bob Smith ┆ null ┆ 34 │
│ C003 ┆ Carol Jones ┆ caroljones ┆ 41 │
│ C004 ┆ null ┆ david@company.com ┆ -5 │
└──────┴─────────────┴───────────────────┴─────┘
orders
orders = (
pl.DataFrame(
{
"id": [1001, 1002, 1003, 1004],
"customer_id": ["C001", "C002", "C003", "C005"],
"order_date": ["2024-03-10", "2025-01-01", "2025-03-15", None],
"delivery_date": ["2024-03-14", "2024-09-01", "2025-03-18", "2025-03-20"],
"total_amount": [None, 180, -50, 120],
}
).with_columns(
pl.col("order_date", "delivery_date").cast(pl.Date),
)
)
In [4]: print(orders)
shape: (4, 5)
┌──────┬─────────────┬────────────┬───────────────┬──────────────┐
│ id ┆ customer_id ┆ order_date ┆ delivery_date ┆ total_amount │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ date ┆ date ┆ i64 │
╞══════╪═════════════╪════════════╪═══════════════╪══════════════╡
│ 1001 ┆ C001 ┆ 2024-03-10 ┆ 2024-03-14 ┆ null │
│ 1002 ┆ C002 ┆ 2025-01-01 ┆ 2024-09-01 ┆ 180 │
│ 1003 ┆ C003 ┆ 2025-03-15 ┆ 2025-03-18 ┆ -50 │
│ 1004 ┆ C005 ┆ null ┆ 2025-03-20 ┆ 120 │
└──────┴─────────────┴────────────┴───────────────┴──────────────┘
customers_nested
customers_nested = pl.DataFrame([
{
"id": "C001",
"contact": {"name": "Alice Wong", "email": "alice@company.com"},
"meta": {"age": 29, "country": "US"},
},
{
"id": "C002",
"contact": {"name": "Bob Smith", "email": None},
"meta": {"age": 34, "country": "Canada"},
},
{
"id": "C003",
"contact": {"name": "Carol Jones", "email": "caroljones"},
"meta": {"age": 41, "country": "US"},
},
{
"id": "C004",
"contact": {"name": None, "email": "david@company.com"},
"meta": {"age": -5, "country": "England"},
},
])
In [5]: print(customers_nested)
shape: (4, 3)
┌──────┬─────────────────────────────────┬────────────────┐
│ id ┆ contact ┆ meta │
│ --- ┆ --- ┆ --- │
│ str ┆ struct[2] ┆ struct[2] │
╞══════╪═════════════════════════════════╪════════════════╡
│ C001 ┆ {"Alice Wong","alice@company.c… ┆ {29,"US"} │
│ C002 ┆ {"Bob Smith",null} ┆ {34,"Canada"} │
│ C003 ┆ {"Carol Jones","caroljones"} ┆ {41,"US"} │
│ C004 ┆ {null,"david@company.com"} ┆ {-5,"England"} │
└──────┴─────────────────────────────────┴────────────────┘
orders_nested
orders_nested = (
orders
.with_columns(
customer_id=pl.DataFrame(customers_nested).to_struct()
)
.rename({"customer_id": "customer"})
)
In [6]: print(orders_nested)
shape: (4, 5)
┌──────┬─────────────────────────────────┬────────────┬───────────────┬──────────────┐
│ id ┆ customer ┆ order_date ┆ delivery_date ┆ total_amount │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ struct[3] ┆ date ┆ date ┆ i64 │
╞══════╪═════════════════════════════════╪════════════╪═══════════════╪══════════════╡
│ 1001 ┆ {"C001",{"Alice Wong","alice@c… ┆ 2024-03-10 ┆ 2024-03-14 ┆ null │
│ 1002 ┆ {"C002",{"Bob Smith",null},{34… ┆ 2025-01-01 ┆ 2024-09-01 ┆ 180 │
│ 1003 ┆ {"C003",{"Carol Jones","carolj… ┆ 2025-03-15 ┆ 2025-03-18 ┆ -50 │
│ 1004 ┆ {"C004",{null,"david@company.c… ┆ null ┆ 2025-03-20 ┆ 120 │
└──────┴─────────────────────────────────┴────────────┴───────────────┴──────────────┘
🔊 Stay tuned for tutorial releases! 🔊