Skip to content

Getting Started

This guide shows you how to use the geozarr-toolkit library to create and validate GeoZarr-compliant metadata.

Installation

pip install "geozarr-toolkit"
# or with uv
uv pip install "geozarr-toolkit"

Quick Start

Creating Spatial Metadata

The spatial convention describes the relationship between array indices and spatial coordinates.

from geozarr_toolkit import (
    create_spatial_attrs,
    create_zarr_conventions,
    SpatialConventionMetadata,
)

# Create spatial attributes
spatial_attrs = create_spatial_attrs(
    dimensions=["Y", "X"],
    transform=[10.0, 0.0, 500000.0, 0.0, -10.0, 5000000.0],  # Affine coefficients
    bbox=[500000.0, 4900000.0, 600000.0, 5000000.0],
    shape=[10000, 10000],
)

# Add the zarr_conventions metadata
spatial_attrs["zarr_conventions"] = create_zarr_conventions(
    SpatialConventionMetadata()
)

import json
print(json.dumps(spatial_attrs, indent=2))
{
  "spatial:dimensions": [
    "Y",
    "X"
  ],
  "spatial:bbox": [
    500000.0,
    4900000.0,
    600000.0,
    5000000.0
  ],
  "spatial:transform_type": "affine",
  "spatial:transform": [
    10.0,
    0.0,
    500000.0,
    0.0,
    -10.0,
    5000000.0
  ],
  "spatial:shape": [
    10000,
    10000
  ],
  "spatial:registration": "pixel",
  "zarr_conventions": [
    {
      "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
      "schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
      "name": "spatial:",
      "description": "Spatial coordinate information"
    }
  ]
}

Adding CRS Information

The proj convention encodes Coordinate Reference System information.

from geozarr_toolkit import (
    create_proj_attrs,
    create_zarr_conventions,
    ProjConventionMetadata,
)

# Using EPSG code
proj_attrs = create_proj_attrs(code="EPSG:32633")  # UTM zone 33N

# Add the zarr_conventions metadata
proj_attrs["zarr_conventions"] = create_zarr_conventions(
    ProjConventionMetadata()
)

import json
print(json.dumps(proj_attrs, indent=2))
{
  "proj:code": "EPSG:32633",
  "zarr_conventions": [
    {
      "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
      "schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
      "name": "proj:",
      "description": "Coordinate reference system information for geospatial data"
    }
  ]
}

You can also use WKT2 or PROJJSON:

# Using WKT2
proj_attrs_wkt = create_proj_attrs(wkt2='GEOGCS["WGS 84",DATUM["WGS_1984"]]')

Building a Multiscales Pyramid

The multiscales convention describes hierarchical resolution levels.

from geozarr_toolkit import (
    create_multiscales_layout,
    create_zarr_conventions,
    MultiscalesConventionMetadata,
)

# Create a 3-level pyramid
multiscales = create_multiscales_layout([
    {"asset": "0"},
    {"asset": "1", "derived_from": "0", "transform": {"scale": [2.0, 2.0]}},
    {"asset": "2", "derived_from": "1", "transform": {"scale": [2.0, 2.0]}},
], resampling_method="average")

# Add the zarr_conventions metadata
multiscales["zarr_conventions"] = create_zarr_conventions(
    MultiscalesConventionMetadata()
)

import json
print(json.dumps(multiscales, indent=2))
{
  "multiscales": {
    "layout": [
      {
        "asset": "0"
      },
      {
        "asset": "1",
        "derived_from": "0",
        "transform": {
          "scale": [
            2.0,
            2.0
          ]
        }
      },
      {
        "asset": "2",
        "derived_from": "1",
        "transform": {
          "scale": [
            2.0,
            2.0
          ]
        }
      }
    ],
    "resampling_method": "average"
  },
  "zarr_conventions": [
    {
      "uuid": "d35379db-88df-4056-af3a-620245f8e347",
      "schema_url": "https://raw.githubusercontent.com/zarr-conventions/multiscales/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-conventions/multiscales/blob/v1/README.md",
      "name": "multiscales",
      "description": "Multiscale layout of zarr datasets"
    }
  ]
}

Complete GeoZarr Metadata

Combine multiple conventions with a single zarr_conventions array:

from geozarr_toolkit import create_geozarr_attrs

# Create complete metadata with zarr_conventions (spatial + proj)
attrs = create_geozarr_attrs(
    dimensions=["Y", "X"],
    crs="EPSG:32633",
    transform=[10.0, 0.0, 500000.0, 0.0, -10.0, 5000000.0],
    bbox=[500000.0, 4900000.0, 600000.0, 5000000.0],
    shape=[10000, 10000],
)

import json
print(json.dumps(attrs, indent=2))
{
  "spatial:dimensions": [
    "Y",
    "X"
  ],
  "spatial:bbox": [
    500000.0,
    4900000.0,
    600000.0,
    5000000.0
  ],
  "spatial:transform_type": "affine",
  "spatial:transform": [
    10.0,
    0.0,
    500000.0,
    0.0,
    -10.0,
    5000000.0
  ],
  "spatial:shape": [
    10000,
    10000
  ],
  "spatial:registration": "pixel",
  "proj:code": "EPSG:32633",
  "zarr_conventions": [
    {
      "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
      "schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
      "name": "spatial:",
      "description": "Spatial coordinate information"
    },
    {
      "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
      "schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
      "name": "proj:",
      "description": "Coordinate reference system information for geospatial data"
    }
  ]
}

Working with Existing Data

From rioxarray

Extract metadata from a rioxarray DataArray:

import rioxarray
import xarray as xr
from geozarr_toolkit import (
    from_rioxarray,
    create_zarr_conventions,
    SpatialConventionMetadata,
    ProjConventionMetadata,
)

# Load data with rioxarray
da = xr.open_dataarray("data.tif", engine="rasterio")

# Extract convention metadata (spatial + proj attributes)
attrs = from_rioxarray(da)

# Add the zarr_conventions metadata
attrs["zarr_conventions"] = create_zarr_conventions(
    SpatialConventionMetadata(),
    ProjConventionMetadata(),
)

From GDAL GeoTransform

Convert GDAL-style GeoTransform to convention attributes:

from geozarr_toolkit import (
    from_geotransform,
    create_zarr_conventions,
    SpatialConventionMetadata,
    ProjConventionMetadata,
)

# GDAL format: [origin_x, pixel_width, rotation, origin_y, rotation, pixel_height]
gdal_gt = (500000.0, 10.0, 0.0, 5000000.0, 0.0, -10.0)
crs_wkt = 'PROJCS["UTM zone 33N"]'
shape = (10000, 10000)

# Extract convention metadata (spatial + proj attributes)
attrs = from_geotransform(gdal_gt, crs_wkt, shape)

# Add the zarr_conventions metadata
attrs["zarr_conventions"] = create_zarr_conventions(
    SpatialConventionMetadata(),
    ProjConventionMetadata(),
)

import json
print(json.dumps(attrs, indent=2))
{
  "spatial:dimensions": [
    "Y",
    "X"
  ],
  "spatial:bbox": [
    500000.0,
    4900000.0,
    600000.0,
    5000000.0
  ],
  "spatial:transform_type": "affine",
  "spatial:transform": [
    10.0,
    0.0,
    500000.0,
    0.0,
    -10.0,
    5000000.0
  ],
  "spatial:shape": [
    10000,
    10000
  ],
  "spatial:registration": "pixel",
  "proj:wkt2": "PROJCS[\"UTM zone 33N\"]",
  "zarr_conventions": [
    {
      "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
      "schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
      "name": "spatial:",
      "description": "Spatial coordinate information"
    },
    {
      "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
      "schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
      "name": "proj:",
      "description": "Coordinate reference system information for geospatial data"
    }
  ]
}

Validation

Validate Attributes

from geozarr_toolkit import validate_spatial, validate_proj

# Valid spatial attributes
attrs = {"spatial:dimensions": ["Y", "X"]}
is_valid, errors = validate_spatial(attrs)
print(f"Valid: {is_valid}, Errors: {errors}")

# Invalid - missing required CRS
attrs = {}
is_valid, errors = validate_proj(attrs)
print(f"Valid: {is_valid}, Errors: {errors}")
Valid: True, Errors: []
Valid: False, Errors: ["{'type': 'value_error', 'loc': (), 'msg': 'Value error, At least one of proj:code, proj:wkt2, or proj:projjson must be provided', 'input': {}, 'ctx': {'error': ValueError('At least one of proj:code, proj:wkt2, or proj:projjson must be provided')}, 'url': 'https://errors.pydantic.dev/2.12/v/value_error'}"]

Validate a Zarr Store

import zarr
from geozarr_toolkit import validate_group, detect_conventions

# Open Zarr store
group = zarr.open_group("data.zarr", mode="r")

# Auto-detect and validate
conventions = detect_conventions(dict(group.attrs))
print(f"Detected: {conventions}")

results = validate_group(group)
for conv, errors in results.items():
    if errors:
        print(f"[FAIL] {conv}: {errors}")
    else:
        print(f"[OK] {conv}")

Using the Pydantic Models

For more control, use the Pydantic models directly:

from geozarr_toolkit import Spatial, SpatialConventionMetadata

# Create and validate a Spatial object
spatial = Spatial(**{
    "spatial:dimensions": ["Y", "X"],
    "spatial:transform": [10.0, 0.0, 500000.0, 0.0, -10.0, 5000000.0],
})

# Serialize to dict with aliases
attrs = spatial.model_dump(by_alias=True, exclude_none=True)

# Add zarr_conventions using the convention metadata model
attrs["zarr_conventions"] = [
    SpatialConventionMetadata().model_dump(exclude_none=True)
]

import json
print(json.dumps(attrs, indent=2))
{
  "spatial:dimensions": [
    "Y",
    "X"
  ],
  "spatial:transform_type": "affine",
  "spatial:transform": [
    10.0,
    0.0,
    500000.0,
    0.0,
    -10.0,
    5000000.0
  ],
  "spatial:registration": "pixel",
  "zarr_conventions": [
    {
      "uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
      "schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
      "name": "spatial:",
      "description": "Spatial coordinate information"
    }
  ]
}
from geozarr_toolkit import Proj, ProjConventionMetadata

# Create Proj with validation
proj = Proj(**{"proj:code": "EPSG:4326"})
attrs = proj.model_dump(by_alias=True, exclude_none=True)

# Add zarr_conventions
attrs["zarr_conventions"] = [
    ProjConventionMetadata().model_dump(exclude_none=True)
]

import json
print(json.dumps(attrs, indent=2))
{
  "proj:code": "EPSG:4326",
  "zarr_conventions": [
    {
      "uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
      "schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
      "spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
      "name": "proj:",
      "description": "Coordinate reference system information for geospatial data"
    }
  ]
}

Next Steps

  • See the API reference for complete documentation
  • See the CLI Reference for command-line usage