User Guide

Please refer to the index page for some basic usage examples.

Versions used in the tutorials
In [1]: import paguro as pg

In [2]: pg.show_versions()

-------- Version info ---------
paguro:   0.3.1

---- Required dependencies ----
polars:   1.35.2

---- Optional dependencies ----
pyarrow:  <not installed>
numpy:    2.3.4
rich:     14.2.0

-------------------------------
Platform: Linux-6.11.0-1018-azure-x86_64-with-glibc2.39
Python:   3.12.12 (main, Oct 10 2025, 01:01:16) [GCC 13.3.0]
Data Used in Tutorials
customers
customers = pl.DataFrame(
    {
        "id": ["C001", "C002", "C003", "C004"],
        "name": ["Alice Wong", "Bob Smith", "Carol Jones", None],
        "email": ["alice@company.com", None, "caroljones", "david@company.com"],
        "age": [29, 34, 41, -5],
    }
)
In [3]: print(customers)
shape: (4, 4)
┌──────┬─────────────┬───────────────────┬─────┐
│ id   ┆ name        ┆ email             ┆ age │
│ ---  ┆ ---         ┆ ---               ┆ --- │
│ str  ┆ str         ┆ str               ┆ i64 │
╞══════╪═════════════╪═══════════════════╪═════╡
│ C001 ┆ Alice Wong  ┆ alice@company.com ┆ 29  │
│ C002 ┆ Bob Smith   ┆ null              ┆ 34  │
│ C003 ┆ Carol Jones ┆ caroljones        ┆ 41  │
│ C004 ┆ null        ┆ david@company.com ┆ -5  │
└──────┴─────────────┴───────────────────┴─────┘
orders
orders = (
    pl.DataFrame(
        {
            "id": [1001, 1002, 1003, 1004],
            "customer_id": ["C001", "C002", "C003", "C005"],
            "order_date": ["2024-03-10", "2025-01-01", "2025-03-15", None],
            "delivery_date": ["2024-03-14", "2024-09-01", "2025-03-18", "2025-03-20"],
            "total_amount": [None, 180, -50, 120],
        }
    ).with_columns(
        pl.col("order_date", "delivery_date").cast(pl.Date),
    )
)
In [4]: print(orders)
shape: (4, 5)
┌──────┬─────────────┬────────────┬───────────────┬──────────────┐
│ id   ┆ customer_id ┆ order_date ┆ delivery_date ┆ total_amount │
│ ---  ┆ ---         ┆ ---        ┆ ---           ┆ ---          │
│ i64  ┆ str         ┆ date       ┆ date          ┆ i64          │
╞══════╪═════════════╪════════════╪═══════════════╪══════════════╡
│ 1001 ┆ C001        ┆ 2024-03-10 ┆ 2024-03-14    ┆ null         │
│ 1002 ┆ C002        ┆ 2025-01-01 ┆ 2024-09-01    ┆ 180          │
│ 1003 ┆ C003        ┆ 2025-03-15 ┆ 2025-03-18    ┆ -50          │
│ 1004 ┆ C005        ┆ null       ┆ 2025-03-20    ┆ 120          │
└──────┴─────────────┴────────────┴───────────────┴──────────────┘
customers_nested
customers_nested = pl.DataFrame([
    {
        "id": "C001",
        "contact": {"name": "Alice Wong", "email": "alice@company.com"},
        "meta": {"age": 29, "country": "US"},
    },
    {
        "id": "C002",
        "contact": {"name": "Bob Smith", "email": None},
        "meta": {"age": 34, "country": "Canada"},
    },
    {
        "id": "C003",
        "contact": {"name": "Carol Jones", "email": "caroljones"},
        "meta": {"age": 41, "country": "US"},
    },
    {
        "id": "C004",
        "contact": {"name": None, "email": "david@company.com"},
        "meta": {"age": -5, "country": "England"},
    },
])
In [5]: print(customers_nested)
shape: (4, 3)
┌──────┬─────────────────────────────────┬────────────────┐
│ id   ┆ contact                         ┆ meta           │
│ ---  ┆ ---                             ┆ ---            │
│ str  ┆ struct[2]                       ┆ struct[2]      │
╞══════╪═════════════════════════════════╪════════════════╡
│ C001 ┆ {"Alice Wong","alice@company.c… ┆ {29,"US"}      │
│ C002 ┆ {"Bob Smith",null}              ┆ {34,"Canada"}  │
│ C003 ┆ {"Carol Jones","caroljones"}    ┆ {41,"US"}      │
│ C004 ┆ {null,"david@company.com"}      ┆ {-5,"England"} │
└──────┴─────────────────────────────────┴────────────────┘
orders_nested
orders_nested = (
    orders
    .with_columns(
        customer_id=pl.DataFrame(customers_nested).to_struct()
    )
    .rename({"customer_id": "customer"})
)
In [6]: print(orders_nested)
shape: (4, 5)
┌──────┬─────────────────────────────────┬────────────┬───────────────┬──────────────┐
│ id   ┆ customer                        ┆ order_date ┆ delivery_date ┆ total_amount │
│ ---  ┆ ---                             ┆ ---        ┆ ---           ┆ ---          │
│ i64  ┆ struct[3]                       ┆ date       ┆ date          ┆ i64          │
╞══════╪═════════════════════════════════╪════════════╪═══════════════╪══════════════╡
│ 1001 ┆ {"C001",{"Alice Wong","alice@c… ┆ 2024-03-10 ┆ 2024-03-14    ┆ null         │
│ 1002 ┆ {"C002",{"Bob Smith",null},{34… ┆ 2025-01-01 ┆ 2024-09-01    ┆ 180          │
│ 1003 ┆ {"C003",{"Carol Jones","carolj… ┆ 2025-03-15 ┆ 2025-03-18    ┆ -50          │
│ 1004 ┆ {"C004",{null,"david@company.c… ┆ null       ┆ 2025-03-20    ┆ 120          │
└──────┴─────────────────────────────────┴────────────┴───────────────┴──────────────┘

🔊 Stay tuned for tutorial releases! 🔊