# Import the libraries we need
import os

import altair as alt
import folium
import pandas as pd
import requests
from dotenv import load_dotenv
from folium.plugins import HeatMap, MarkerCluster

load_dotenv()

True

# Insert your Trove API key
API_KEY = "YOUR API KEY"

# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
    API_KEY = os.getenv("TROVE_API_KEY")

# Set up default parameters for our API query
# <-- Click the run icon
params = {
    "category": "newspaper",
    "l-artType": "newspaper",
    "encoding": "json",
    "facet": "title",
    "n": 0,
}

headers = {"X-API-KEY": API_KEY}
API_URL = "http://api.trove.nla.gov.au/v3/result"

# Enter your search parameters
# This can be anything you'd enter in the Trove simple search box
params["q"] = "weather AND wragge date:[* TO 1954]"

# Remove the "#" symbol from the line below to limit the results to the article category
# params['l-category'] = 'Article'

# <-- Click the run icon
response = requests.get(API_URL, params=params, headers=headers)
data = response.json()

# <-- Click the run icon
def format_facets(data):
    facets = data["category"][0]["facets"]["facet"][0]["term"]
    df = pd.DataFrame(facets)
    df = df[["search", "count"]]
    df.columns = ["title_id", "total"]
    df["title_id"] = df["title_id"].astype("Int64")
    df["total"] = df["total"].astype("Int64")
    return df


df = format_facets(data)
df.head()

# Get the geolocated data
locations = pd.read_csv(
    "data/trove-newspaper-titles-locations.csv", dtype={"title_id": "Int64"}
)
# Only keep the first instance of each title
locations.drop_duplicates(subset=["title_id"], keep="first", inplace=True)
# Merge the facets and the geolocated data
df_located = pd.merge(df, locations, on="title_id", how="left")
df_located.head()

alt.Chart(df_located[:20]).mark_bar().encode(
    y=alt.Y(
        "newspaper_title:N", sort=df_located["newspaper_title"][:20].tolist(), title=""
    ),
    x=alt.X("total:Q", title="Number of articles"),
)

# Group newspapers by place
df_places = df_located.groupby(["place", "latitude", "longitude"])

m = folium.Map(location=[-30, 135], zoom_start=4)
# We'll cluster the markers for better readability
marker_cluster = MarkerCluster().add_to(m)

for place, group in df_places:
    # Get the total articles from the grouped titles
    total = group["total"].sum()
    # Turn all the grouped title_ids into a string that we can use in a Trove search url
    titles = group["title_id"].astype("str").str.cat(sep="&l-title=")
    # Create the content of the marker popup -- includes a search link back to Trove!
    html = '<b>{}</b><br><a target="_blank" href="https://trove.nla.gov.au/newspaper/result?q={}&l-title={}&l-category={}">{} articles'.format(
        place[0], params["q"], titles, params.get("l-category", ""), total
    )
    # Add the marker to the map
    folium.Marker([place[1], place[2]], popup=html).add_to(marker_cluster)

m

# Get the total number of articles for each place
df_totals = df_places.sum()
locations = []
# Loop through the places
for place in df_totals.index:
    # Get the total
    total = df_totals.loc[place]["total"]
    # Add the coordinates of the place to the list of locations as many times as there are articles
    locations += [[place[1], place[2]]] * total

# Create another map
m2 = folium.Map(location=[-30, 135], zoom_start=4)

# Add the heatmap data!
HeatMap(locations).add_to(m2)
m2

	title_id	total
0	16	5773
1	840	4857
2	10	2178
3	35	1745
4	508	1688

	title_id	total	newspaper_title	state	place_id	place	latitude	longitude
0	16	5773	The Brisbane Courier (Qld. : 1864 - 1933)	QLD	QLD4555	Brisbane	-27.467848	153.028013
1	840	4857	The Telegraph (Brisbane, Qld. : 1872 - 1947)	QLD	QLD4555	Brisbane	-27.467848	153.028013
2	10	2178	The Mercury (Hobart, Tas. : 1860 - 1954)	TAS	TAS00752	Hobart	-42.880001	147.320007
3	35	1745	The Sydney Morning Herald (NSW : 1842 - 1954)	NSW	NSW79218	Sydney	-33.873200	151.209600
4	508	1688	Evening News (Sydney, NSW : 1869 - 1931)	NSW	NSW79218	Sydney	-33.873200	151.209600

Map Trove newspaper results by place of publication¶

Setting things up¶

Construct your search¶

Get the data from Trove¶

Make an API request¶

Reformat the results¶

Load location data¶

Display top 20 newspapers¶

Map places of publication¶

Make a heatmap¶