Create a subset of digitised maps by searching for coordinates¶
How do you find digitised maps in Trove by their location? Trove provides a very broad 'Place' facet which lets you filter by state or country, but this is based on text values in the spatial
field of the metadata, not on their actual coordinates. The NLA's Mapsearch interface (which is strangely hidden away) does let you search by spatial coordinates, but it returns less than half the digitised maps I found with coordinates in their metadata and doesn't let you download a list of results. My own cluster map of digitised map locations is fun to explore, but doesn't help you create a dataset for further analysis or annotation.
This notebook helps you create subsets of digitised maps by searching for maps whose centre points fall within a specified bounding box. Alternatively, you can search for maps whose bounds intersect with a specified bounding box. It uses a dataset harvested from Trove and processed to find and parse geospatial coordinates. Be warned that the metadata is inconsistent and error-prone. Some maps have no coordinates, some only have centre points, some have bounding boxes. I've tried to clean up some of the obvious problems, but errors will remain.
How to use¶
- From the menu select 'Run' > 'Run All Cells'
- Scroll down the notebook until you see the map
- click on the map's rectangle button
- click on the map to set the first corner of the bounding box
- drag the bounding box to the desired size
- click again the set the other corner of the bounding box
Your dataset will be displayed below the map, as well as links to download it as a CSV file and a GeoJSON file.
import altair as alt
import buckaroo
import geopandas as gpd
import ipywidgets as widgets
import pandas as pd
from ipyleaflet import GeoData, GeomanDrawControl, Map
from IPython.display import HTML
from ipywidgets import Layout
from shapely.errors import GEOSException
from shapely.geometry import Point, Polygon, box
from vega_datasets import data as vega_data
Prepare the data¶
# Load the harvested datasets
dfm = pd.read_csv("https://raw.githubusercontent.com/GLAM-Workbench/trove-maps-data/main/single_maps.csv", keep_default_na=False)
dfc = pd.read_csv("https://raw.githubusercontent.com/GLAM-Workbench/trove-maps-data/main/single_maps_coordinates.csv", keep_default_na=False)
# Combine metadata and coordinates
df = pd.merge(dfc, dfm, how="left", on=["url", "title", "coordinates"])
def create_geometry(row):
"""
Create geometry from either points or boxes using Shapely.
"""
if row["east"]:
try:
# xmin, ymin, xmax, ymax
geometry = box(row["west"], row["south"], row["east"], row["north"])
except (GEOSException, ValueError):
geometry = None
elif row["latitude"]:
geometry = Point(row["longitude"], row["latitude"])
else:
geometry = None
return geometry
def add_centroid(row):
"""
Create centre points from lats and lons.
"""
if row["latitude"]:
centroid = Point(row["longitude"], row["latitude"])
else:
centroid = None
return centroid
# Add geometry and centroids to df
df["geometry"] = df.apply(create_geometry, axis=1)
df["centroid"] = df.apply(add_centroid, axis=1)
# Convert to a GeoDataFrame
gdf = gpd.GeoDataFrame(df.loc[df["geometry"].notnull()], crs="EPSG:4326")
# Change projection to get area in km
# Area could be useful in filtering out things like world maps
gdf = gdf.to_crs("EPSG:8859")
gdf["area"] = gdf.geometry.area / 10**6
gdf = gdf.to_crs("EPSG:4326")
# Set copy with geometry set to centroids
# We'll use centroids for the map
gdfc = gdf.set_geometry("centroid")
Create the map interface¶
out = widgets.Output()
def save_for_ghap(gdf, file_stub):
df = pd.DataFrame(gdf)
df = df.join(
df["date"]
.str.split("-", n=1, expand=True)
.rename(columns={0: "DateStart", 1: "DateEnd"})
)
df = df[
["title", "DateStart", "DateEnd", "description", "url", "latitude", "longitude"]
]
df.rename(
columns={
"title": "Placename",
"description": "Description",
"url": "Linkback",
"latitude": "Latitude",
"longitude": "Longitude",
},
inplace=True,
)
df.to_csv(f"{file_stub}.csv", index=False)
def search_maps(self, action, geo_json):
out.clear_output()
if action == "create":
# Get coordinates of bounding box
coords = geo_json[0]["geometry"]["coordinates"][0]
x_vals = sorted(set([c[0] for c in coords]))
y_vals = sorted(set([c[1] for c in coords]))
# Find maps with centroids in bounding box
results = gdfc.cx[x_vals[0] : x_vals[1], y_vals[0] : y_vals[1]]
# If you want to find intersections between the bounding box and maps bounds
# uncomment the following
# results = gdf.cx[x_vals[0] : x_vals[1], y_vals[0] : y_vals[1]]
results = results.set_geometry("geometry")
print(
[str(v).replace(".", "_") for v in x_vals]
+ [str(v).replace(".", "_") for v in y_vals]
)
file_stub = f"{'_'.join([str(v).replace('.', '-') for v in x_vals] + [str(v).replace('.', '-') for v in y_vals])}"
# results["description"] = df[["creators", "publication", "extent", "scale"]].agg(
# " | ".join, axis=1
# )
results["description"] = results.apply(
lambda x: " | ".join(
[
v
for v in x[
["creators", "publication", "extent", "scale"]
].values.tolist()
if v
]
),
axis=1,
)
results = results[
["title", "date", "description", "url", "latitude", "longitude", "geometry"]
]
results.to_file(
f"{file_stub}.geojson",
driver="GeoJSON",
)
save_for_ghap(results, file_stub)
with out:
display(HTML("<h4>Browse results</h4>"))
display(results)
display(HTML("<h4>Download results</h4>"))
display(
HTML(
f'<ul><li><a href="{file_stub}.csv" download>Download CSV</a></li>\n<li><a href="{file_stub}.geojson" download>Download GeoJSON</a></li></ul>'
)
)
m = Map(
center=(-25, 135),
zoom=3,
prefer_canvas=True,
layout=Layout(width="800px", height="500px"),
)
draw_control = GeomanDrawControl(edit=False, cut=False, rotate=False)
draw_control.rectangle = {
"pathOptions": {
"fillColor": "red",
"color": "red",
"opacity": 0.5,
"fillOpacity": 0.2,
},
"markerEditable": False,
"snappable": False,
}
draw_control.polygon = {}
draw_control.circlemarker = {}
draw_control.polyline = {}
draw_control.on_draw(search_maps)
geo_data = GeoData(
geo_dataframe=gdfc[["title", "centroid"]],
point_style={
"radius": 2,
"color": "blue",
"fillColor": "blue",
"opacity": 0.3,
"fillOpacity": 0.3,
"weight": 0,
},
)
m.add(geo_data)
m.add(draw_control)
display(HTML("<h2>Select an area</h2>\n<p>Draw a rectangle to select maps from a particular region.</p>"))
display(m)
display(out)
Created by Tim Sherratt for the GLAM Workbench.