# Import the libraries we need
import os

import folium
import pandas as pd
import requests
from dotenv import load_dotenv
from folium.plugins import HeatMapWithTime
from IPython.display import display
from tqdm.auto import tqdm

load_dotenv()

True

# Insert your Trove API key
API_KEY = "YOUR API KEY"

# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
    API_KEY = os.getenv("TROVE_API_KEY")

# Set up default parameters for our API query
params = {
    "category": "newspaper",
    "l-artType": "newspaper",
    "encoding": "json",
    "facet": "title",
    "n": 0,
}

headers = {"X-API-KEY": API_KEY}
API_URL = "http://api.trove.nla.gov.au/v3/result"

def format_facets(data):
    """
    Extract and normalise the facet data
    """
    # Check to make sure we have results
    try:
        facets = data["category"][0]["facets"]["facet"][0]["term"]
    except (TypeError, KeyError):
        # No results!
        raise
    else:
        # Convert to DataFrame
        df = pd.DataFrame(facets)
        # Select the columns we want
        df = df[["search", "count"]]
        # Rename the columns
        df.columns = ["title_id", "total"]
        # Make sure the total column is a number
        df["title_id"] = df["title_id"].astype("Int64")
        df["total"] = df["total"].astype("Int64")
    return df


def prepare_data(data):
    """
    Reformat the facet data, merge with locations, and then generate a list of locations.
    """
    # Check for results
    try:
        df = format_facets(data)
    except TypeError:
        # If there are no results just return and empty list
        hm_data = []
    else:
        # Merge facets data with geolocated list of titles
        df_located = pd.merge(df, locations, on="title_id", how="left")
        # Group results by place, and calculate the total results for each
        df_totals = df_located.groupby(["place", "latitude", "longitude"]).sum()
        hm_data = []
        for place in df_totals.index:
            # Get the total
            total = df_totals.loc[place]["total"]
            # Add the coordinates of the place to the list of locations as many times as there are articles
            hm_data += [[place[1], place[2]]] * total
    return hm_data


# Get the geolocated titles data
locations = pd.read_csv(
    "data/trove-newspaper-titles-locations.csv", dtype={"title_id": "int64"}
)
# Only keep the first instance of each title
locations.drop_duplicates(subset=["title_id"], keep="first", inplace=True)

# Enter your search parameters
# This can be anything you'd enter in the Trove simple search box
params["q"] = 'text:"White Australia"'

# Remove the "#" symbol from the line below to limit the results to the article category
# params['l-category'] = 'Article'

start_year = 1880
end_year = 1950

hm_series = []
time_index = []
for year in tqdm(range(start_year, end_year + 1)):
    time_index.append(year)
    decade = str(year)[:3]
    params["l-decade"] = decade
    params["l-year"] = year
    response = requests.get(API_URL, params=params, headers=headers)
    data = response.json()
    hm_data = prepare_data(data)
    hm_series.append(hm_data)

# Create the map
m = folium.Map(location=[-30, 135], zoom_start=4)

# Add the heatmap data!
HeatMapWithTime(hm_series, index=time_index, auto_play=True).add_to(m)

<folium.plugins.heat_map_withtime.HeatMapWithTime at 0x7761d71a03d0>

# <-- Click the run icon
display(m)

Map Trove newspaper results by place of publication over time¶

Setting things up¶

Construct your search¶

Set your date range¶

Get the data from Trove¶

Make an animated heatmap¶

Search for "White Australia" from 1880 to 1950¶