PyArrow Demo#

[1]:
import footbridge as ft
import pyarrow as pa
import pyarrow.compute as pc
ERROR 1: PROJ: proj_create_from_database: Open of /home/worker/footbridge/.pixi/envs/dev/share/proj failed
[2]:
# Sample dataset of US National Highway System
# https://hepgis-usdot.hub.arcgis.com/datasets/dce9f09392eb474c8ad8e6a78416279b_0

gdb = ft.GeoDatabase("NHS.gdb")  # This can take a while
gdb.fc_names
[2]:
['National_Highway_System__NHS_']
[3]:
fc = gdb["National_Highway_System__NHS_"]
len(fc)
[3]:
492005
[4]:
arrow_table = fc.gdf.to_arrow()
arrow_table = pa.table(arrow_table)
[5]:
# This runs very fast!
# Calculate the average difference between postmile distance and listed distance
point_mileage = pc.subtract(arrow_table["ENDPOINT"], arrow_table["BEGINPOINT"])
mileage_delta = pc.abs(pc.subtract(point_mileage, arrow_table["MILES"]))
mean_difference = pc.mean(mileage_delta).as_py()
f"{round(mean_difference * 10)} tenths of a mile average difference (distance between mileposts and stated mileage)"
[5]:
'2 tenths of a mile average difference (distance between mileposts and stated mileage)'