import os
from datetime import datetime

import pandas as pd
import requests
from dotenv import load_dotenv
from IPython.display import FileLink, display

load_dotenv()

True

# Insert your Trove API key
API_KEY = "YOUR API KEY"

# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
    API_KEY = os.getenv("TROVE_API_KEY")

params = {
    "q": "date:[1955 TO *]",  # date range query
    "category": "newspaper",
    "l-artType": "newspaper",
    "facet": "title",  # get the newspaper facets
    "encoding": "json",
    "n": 0,  # no articles thanks
    "key": API_KEY,
}

headers = {"X-API-KEY": API_KEY}

# Make our API request
response = requests.get(
    "https://api.trove.nla.gov.au/v3/result", params=params, headers=headers
)
data = response.json()

# Get the facet data
facets = data["category"][0]["facets"]["facet"][0]["term"]

# Convert to a dataframe
df_articles = pd.DataFrame(facets)
# Get rid of some columns
df_articles = df_articles[["count", "search"]]
# Rename columns
df_articles.columns = ["number_of_articles", "id"]
# Change id to string, so we can merge on it later
df_articles["id"] = df_articles["id"].astype("str")
# Preview results
df_articles.head()

# Get ALL the newspapers
response = requests.get(
    "https://api.trove.nla.gov.au/v3/newspaper/titles",
    params={"encoding": "json"},
    headers=headers,
)
newspapers_data = response.json()

newspapers = newspapers_data["newspaper"]
# Convert to a dataframe
df_newspapers = pd.DataFrame(newspapers)

# Merge the two dataframes by doing a left join on the 'id' column
df_newspapers_post54 = pd.merge(df_articles, df_newspapers, how="left", on="id")
df_newspapers_post54.head()

# How many newspapers?
df_newspapers_post54.shape[0]

119

# Reorder columns and save as CSV
csv_file = f"newspapers_post_54_{datetime.now().strftime('%Y%m%d')}.csv"
df_newspapers_post54[
    [
        "title",
        "state",
        "id",
        "startDate",
        "endDate",
        "issn",
        "number_of_articles",
        "troveUrl",
    ]
].to_csv(csv_file, index=False)
# Display a link for easy download
display(FileLink(csv_file))

	number_of_articles	id
0	2567488	11
1	573658	1685
2	489896	370
3	417472	1376
4	263618	1694

	number_of_articles	id	title	state	issn	troveUrl	startDate	endDate
0	2567488	11	The Canberra Times (ACT : 1926 - 1995)	ACT	01576925	https://nla.gov.au/nla.news-title11	1926-09-03	1995-12-31
1	573658	1685	The Australian Jewish News (Melbourne, Vic. : ...	Victoria	NDP00187	https://nla.gov.au/nla.news-title1685	1935-05-24	1999-12-24
2	489896	370	Port Lincoln Times (SA : 1927 - 1988; 1992 - 2...	South Australia	13215272	https://nla.gov.au/nla.news-title370	1927-08-05	2002-12-31
3	417472	1376	Papua New Guinea Post-Courier (Port Moresby : ...	International	22087427	https://nla.gov.au/nla.news-title1376	1969-06-30	1981-06-30
4	263618	1694	The Australian Jewish Times (Sydney, NSW : 195...	New South Wales	NDP00196	https://nla.gov.au/nla.news-title1694	1953-10-16	1990-04-06

Beyond the copyright cliff of death¶

Search for articles published after 1955¶

Match the facets with newspapers¶

Results¶