{ "cells": [ { "cell_type": "markdown", "id": "f72daea68b7013b9", "metadata": {}, "source": "# PyArrow Demo" }, { "cell_type": "code", "id": "21037cce3cad568c", "metadata": { "ExecuteTime": { "end_time": "2026-05-01T01:36:27.380008892Z", "start_time": "2026-05-01T01:36:27.059443693Z" } }, "source": [ "import footbridge as ft\n", "import pyarrow as pa\n", "import pyarrow.compute as pc" ], "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "ERROR 1: PROJ: proj_create_from_database: Open of /home/worker/footbridge/.pixi/envs/dev/share/proj failed\n" ] } ], "execution_count": 1 }, { "cell_type": "code", "id": "e28154137bdf7d42", "metadata": { "ExecuteTime": { "end_time": "2026-05-01T01:36:39.150343635Z", "start_time": "2026-05-01T01:36:27.381695608Z" } }, "source": [ "# Sample dataset of US National Highway System\n", "# https://hepgis-usdot.hub.arcgis.com/datasets/dce9f09392eb474c8ad8e6a78416279b_0\n", "\n", "gdb = ft.GeoDatabase(\"NHS.gdb\") # This can take a while\n", "gdb.fc_names" ], "outputs": [ { "data": { "text/plain": [ "['National_Highway_System__NHS_']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 2 }, { "cell_type": "code", "id": "7ec89a62104fb109", "metadata": { "ExecuteTime": { "end_time": "2026-05-01T01:36:39.249525301Z", "start_time": "2026-05-01T01:36:39.201982175Z" } }, "source": [ "fc = gdb[\"National_Highway_System__NHS_\"]\n", "len(fc)" ], "outputs": [ { "data": { "text/plain": [ "492005" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 3 }, { "cell_type": "code", "id": "f6da27eb099f863e", "metadata": { "ExecuteTime": { "end_time": "2026-05-01T01:36:39.973655315Z", "start_time": "2026-05-01T01:36:39.250179768Z" } }, "source": [ "arrow_table = fc.gdf.to_arrow()\n", "arrow_table = pa.table(arrow_table)" ], "outputs": [], "execution_count": 4 }, { "cell_type": "code", "id": "3c070bdbb2060025", "metadata": { "ExecuteTime": { "end_time": "2026-05-01T01:36:40.069522036Z", "start_time": "2026-05-01T01:36:40.021878737Z" } }, "source": [ "# This runs very fast!\n", "# Calculate the average difference between postmile distance and listed distance\n", "point_mileage = pc.subtract(arrow_table[\"ENDPOINT\"], arrow_table[\"BEGINPOINT\"])\n", "mileage_delta = pc.abs(pc.subtract(point_mileage, arrow_table[\"MILES\"]))\n", "mean_difference = pc.mean(mileage_delta).as_py()\n", "f\"{round(mean_difference * 10)} tenths of a mile average difference (distance between mileposts and stated mileage)\"" ], "outputs": [ { "data": { "text/plain": [ "'2 tenths of a mile average difference (distance between mileposts and stated mileage)'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 5 } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.3" } }, "nbformat": 4, "nbformat_minor": 5 }