{
    "article": {
        "id": "41697877",
        "url": "/newspaper/41697877",
        "heading": "WRAGGE AND WEATHER CYCLES.",
        "category": "Article",
        "title": {
            "id": "101",
            "value": "Western Mail (Perth, WA : 1885 - 1954)"
        },
        "date": "1922-11-23",
        "page": 4,
        "pageSequence": 4,
        "troveUrl": "https://trove.nla.gov.au/ndp/del/article/41697877",
        "illustrated": "N",
        "wordCount": 1054,
        "correctionCount": 1,
        "listCount": 0,
        "tagCount": 0,
        "commentCount": 0,
        "lastCorrection": {
            "by": "*anon*",
            "lastupdated": "2016-09-12T07:08:57Z"
        },
        "identifier": "https://nla.gov.au/nla.news-article41697877",
        "trovePageUrl": "https://trove.nla.gov.au/ndp/del/page/3522839",
        "pdf": "https://trove.nla.gov.au/ndp/imageservice/nla.news-page3522839/print"
    }
}

import os
from datetime import datetime
from operator import itemgetter  # used for sorting

import altair as alt
import pandas as pd  # makes manipulating the data easier
import requests
from dotenv import load_dotenv
from IPython.display import FileLink, clear_output, display
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from tqdm.auto import tqdm

# Make sure data directory exists
os.makedirs("data", exist_ok=True)

# Create a session that will automatically retry on server errors
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount("http://", HTTPAdapter(max_retries=retries))
s.mount("https://", HTTPAdapter(max_retries=retries))

load_dotenv()

True

# Insert your Trove API key
API_KEY = "YOUR API KEY"

# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
    API_KEY = os.getenv("TROVE_API_KEY")

# Basic parameters for Trove API
params = {
    "facet": "year",  # Get the data aggregated by year.
    "category": "newspaper",
    "l-artType": "newspaper",
    "encoding": "json",
    "n": 0,  # We don't need any records, just the facets!
}

headers = {"X-API-KEY": API_KEY}

def get_results(params):
    """
    Get JSON response data from the Trove API.
    Parameters:
        params
    Returns:
        JSON formatted response data from Trove API
    """
    response = s.get(
        "https://api.trove.nla.gov.au/v3/result",
        params=params,
        headers=headers,
        timeout=30,
    )
    response.raise_for_status()
    # print(response.url) # This shows us the url that's sent to the API
    data = response.json()
    return data

# Set the q parameter to a single space to get everything
# params["q"] = " "

# Get the data from the API
data = get_results(params)

# Extract the total number of results
total = int(data["category"][0]["records"]["total"])
print("{:,}".format(total))

249,805,769

# Set the q parameter to 'has:corrections' to limit results to articles with corrections
params["q"] = "has:corrections"

# Get the data from the API
data = get_results(params)

# Extract the total number of results
corrected = int(data["category"][0]["records"]["total"])
print("{:,}".format(corrected))

15,947,858

print("{:.2%} of articles have at least one correction".format(corrected / total))

6.38% of articles have at least one correction

def get_facets(data):
    """
    Loop through facets in Trove API response, saving terms and counts.
    Parameters:
        data  - JSON formatted response data from Trove API
    Returns:
        A list of dictionaries containing: 'term', 'total_results'
    """
    facets = []
    try:
        # The facets are buried a fair way down in the results
        # Note that if you ask for more than one facet, you'll have use the facet['name'] param to find the one you want
        # In this case there's only one facet, so we can just grab the list of terms (which are in fact the results by year)
        for term in data["category"][0]["facets"]["facet"][0]["term"]:

            # Get the year and the number of results, and convert them to integers, before adding to our results
            facets.append({"term": term["search"], "total_results": int(term["count"])})

        # Sort facets by year
        facets.sort(key=itemgetter("term"))
    except TypeError:
        pass
    return facets


def get_facet_data(params, start_decade=180, end_decade=201):
    """
    Loop throught the decades from 'start_decade' to 'end_decade',
    getting the number of search results for each year from the year facet.
    Combine all the results into a single list.
    Parameters:
        params - parameters to send to the API
        start_decade
        end_decade
    Returns:
        A list of dictionaries containing 'year', 'total_results' for the complete
        period between the start and end decades.
    """
    # Create a list to hold the facets data
    facet_data = []

    # Loop through the decades
    for decade in tqdm(range(start_decade, end_decade + 1)):

        # print(params)
        # Avoid confusion by copying the params before we change anything.
        search_params = params.copy()

        # Add decade value to params
        search_params["l-decade"] = decade

        # Get the data from the API
        data = get_results(search_params)

        # Get the facets from the data and add to facets_data
        facet_data += get_facets(data)

    # Reomve the progress bar (you can also set leave=False in tqdm, but that still leaves white space in Jupyter Lab)
    clear_output()
    return facet_data

facet_data = get_facet_data(params)

# Convert our data to a dataframe called df
df = pd.DataFrame(facet_data)

df.head()

df.loc[df["total_results"].idxmax()]

term               1915
total_results    311043
Name: 112, dtype: object

# Reset the 'q' parameter
# Use a an empty search (a single space) to get ALL THE ARTICLES
params["q"] = ""

# Get facet data for all articles
all_facet_data = get_facet_data(params)

# Convert the results to a dataframe
df_total = pd.DataFrame(all_facet_data)

def merge_df_with_total(df, df_total, how="left"):
    """
    Merge dataframes containing search results with the total number of articles by year.
    This is a left join on the year column. The total number of articles will be added as a column to
    the existing results.
    Once merged, do some reorganisation and calculate the proportion of search results.
    Parameters:
        df - the search results in a dataframe
        df_total - total number of articles per year in a dataframe
    Returns:
        A dataframe with the following columns - 'year', 'total_results', 'total_articles', 'proportion'
        (plus any other columns that are in the search results dataframe).
    """
    # Merge the two dataframes on year
    # Note that we're joining the two dataframes on the year column
    df_merged = pd.merge(df, df_total, how=how, on="term")

    # Rename the columns for convenience
    df_merged.rename(
        {"total_results_y": "total_articles"}, inplace=True, axis="columns"
    )
    df_merged.rename({"total_results_x": "total_results"}, inplace=True, axis="columns")

    # Set blank values to zero to avoid problems
    df_merged["total_results"] = df_merged["total_results"].fillna(0).astype(int)

    # Calculate proportion by dividing the search results by the total articles
    df_merged["proportion"] = df_merged["total_results"] / df_merged["total_articles"]
    return df_merged

# Merge the search results with the total articles
df_merged = merge_df_with_total(df, df_total)
df_merged.head()

# Number of articles with corrections
chart1 = (
    alt.Chart(df_merged)
    .mark_line(point=True)
    .encode(
        x=alt.X("term:Q", axis=alt.Axis(format="c", title="Year")),
        y=alt.Y(
            "total_results:Q",
            axis=alt.Axis(format=",d", title="Number of articles with corrections"),
        ),
        tooltip=[
            alt.Tooltip("term:Q", title="Year"),
            alt.Tooltip("total_results:Q", title="Articles", format=","),
        ],
    )
    .properties(width=700, height=250)
)

# Proportion of articles with corrections
chart2 = (
    alt.Chart(df_merged)
    .mark_line(point=True, color="red")
    .encode(
        x=alt.X("term:Q", axis=alt.Axis(format="c", title="Year")),
        # This time we're showing the proportion (formatted as a percentage) on the Y axis
        y=alt.Y(
            "proportion:Q",
            axis=alt.Axis(format="%", title="Proportion of articles with corrections"),
        ),
        tooltip=[
            alt.Tooltip("term:Q", title="Year"),
            alt.Tooltip("proportion:Q", title="Proportion", format="%"),
        ],
        # Make the charts different colors
        color=alt.value("orange"),
    )
    .properties(width=700, height=250)
)

# This is a shorthand way of stacking the charts on top of each other
chart1 & chart2

df_merged.to_csv(
    f"corrections_by_year_{datetime.now().strftime('%Y%m%d')}.csv", index=False
)

params["q"] = "has:corrections"
params["facet"] = "category"

data = get_results(params)
facets = []
for term in data["category"][0]["facets"]["facet"][0]["term"]:
    # Get the state and the number of results, and convert it to integers, before adding to our results
    facets.append({"term": term["search"], "total_results": int(term["count"])})
df_categories = pd.DataFrame(facets)

df_categories.head()

# Blank query
params["q"] = ""
data = get_results(params)
facets = []
for term in data["category"][0]["facets"]["facet"][0]["term"]:
    # Get the state and the number of results, and convert it to integers, before adding to our results
    facets.append({"term": term["search"], "total_results": int(term["count"])})
df_total_categories = pd.DataFrame(facets)

df_categories_merged = merge_df_with_total(df_categories, df_total_categories)
df_categories_merged

df_categories_filtered = df_categories_merged.loc[
    df_categories_merged["total_articles"] > 30000
]
df_categories_filtered

cat_chart1 = (
    alt.Chart(df_categories_filtered)
    .mark_bar()
    .encode(
        x=alt.X("term:N", title="Category"),
        y=alt.Y("total_results:Q", title="Articles with corrections"),
    )
)

cat_chart2 = (
    alt.Chart(df_categories_filtered)
    .mark_bar()
    .encode(
        x=alt.X("term:N", title="Category"),
        y=alt.Y(
            "proportion:Q",
            axis=alt.Axis(format="%", title="Proportion of articles with corrections"),
        ),
        color=alt.value("orange"),
    )
)

cat_chart1 | cat_chart2

df_categories_merged.to_csv(
    f"corrections_by_category_{datetime.now().strftime('%Y%m%d')}.csv", index=False
)

params["q"] = "has:corrections"
params["facet"] = "title"

data = get_results(params)
facets = []
for term in data["category"][0]["facets"]["facet"][0]["term"]:
    # Get the state and the number of results, and convert it to integers, before adding to our results
    facets.append({"term": term["search"], "total_results": int(term["count"])})
df_newspapers = pd.DataFrame(facets)

df_newspapers.head()

params["q"] = ""

data = get_results(params)
facets = []
for term in data["category"][0]["facets"]["facet"][0]["term"]:
    # Get the state and the number of results, and convert it to integers, before adding to our results
    facets.append({"term": term["search"], "total_results": int(term["count"])})
df_newspapers_total = pd.DataFrame(facets)

df_newspapers_merged = merge_df_with_total(
    df_newspapers, df_newspapers_total, how="right"
)

df_newspapers_merged.sort_values(by="proportion", ascending=False, inplace=True)
df_newspapers_merged.rename(columns={"term": "id"}, inplace=True)

df_newspapers_merged.head()

# Get all the newspaper titles
title_params = {
    "encoding": "json",
}

title_data = s.get(
    "https://api.trove.nla.gov.au/v3/newspaper/titles", params=params, headers=headers
).json()

titles = []
for newspaper in title_data["newspaper"]:
    titles.append({"title": newspaper["title"], "id": newspaper["id"]})
df_titles = pd.DataFrame(titles)

df_titles.head()

df_titles.shape

(1812, 2)

df_newspapers_with_titles = (
    pd.merge(df_titles, df_newspapers_merged, how="left", on="id")
    .fillna(0)
    .sort_values(by="proportion", ascending=False)
)

# Convert the totals back to integers
df_newspapers_with_titles[["total_results", "total_articles"]] = (
    df_newspapers_with_titles[["total_results", "total_articles"]].astype(int)
)

df_newspapers_with_titles[:25]

df_newspapers_with_titles.sort_values(by="proportion")[:25]

df_newspapers_with_titles_csv = df_newspapers_with_titles.copy()
df_newspapers_with_titles_csv.rename(
    {"total_results": "articles_with_corrections"}, axis=1, inplace=True
)
df_newspapers_with_titles_csv["percentage_with_corrections"] = (
    df_newspapers_with_titles_csv["proportion"] * 100
)
df_newspapers_with_titles_csv.sort_values(
    by=["percentage_with_corrections"], inplace=True
)

df_newspapers_with_titles_csv["title_url"] = df_newspapers_with_titles_csv["id"].apply(
    lambda x: f"http://nla.gov.au/nla.news-title{x}"
)

df_newspapers_with_titles_csv[
    [
        "id",
        "title",
        "title_url",
        "articles_with_corrections",
        "total_articles",
        "percentage_with_corrections",
    ]
].to_csv(f"corrections_by_title_{datetime.now().strftime('%Y%m%d')}.csv", index=False)

display(FileLink(f"titles_corrected_{datetime.now().strftime('%Y%m%d')}.csv"))

# Search for 'tbe' to get an indication of errors by newspaper
params["q"] = 'text:"tbe"~0'
params["facet"] = "title"

data = get_results(params)
facets = []
for term in data["category"][0]["facets"]["facet"][0]["term"]:
    # Get the state and the number of results, and convert it to integers, before adding to our results
    facets.append({"term": term["search"], "total_results": int(term["count"])})
df_errors = pd.DataFrame(facets)

df_errors_merged = merge_df_with_total(df_errors, df_newspapers_total, how="right")
df_errors_merged.sort_values(by="proportion", ascending=False, inplace=True)
df_errors_merged.rename(columns={"term": "id"}, inplace=True)

df_errors_merged.head()

df_errors_with_titles = (
    pd.merge(df_titles, df_errors_merged, how="left", on="id")
    .fillna(0)
    .sort_values(by="proportion", ascending=False)
)

df_errors_with_titles[:25]

df_errors_with_titles[-25:]

corrections_errors_merged_df = pd.merge(
    df_newspapers_with_titles, df_errors_with_titles, how="left", on="id"
)

corrections_errors_merged_df.head()

corrections_errors_merged_df["proportion_uncorrected"] = corrections_errors_merged_df[
    "proportion_x"
].apply(lambda x: 1 - x)
corrections_errors_merged_df.rename(
    columns={
        "title_x": "title",
        "proportion_x": "proportion_corrected",
        "proportion_y": "proportion_with_errors",
    },
    inplace=True,
)
corrections_errors_merged_df.sort_values(
    by=["proportion_with_errors", "proportion_uncorrected"],
    ascending=False,
    inplace=True,
)

corrections_errors_merged_df[
    ["title", "proportion_with_errors", "proportion_uncorrected"]
][:25]

	term	total_results
0	1803	526
1	1804	619
2	1805	430
3	1806	367
4	1807	134

	term	total_results	total_articles	proportion
0	1803	526	526	1.0
1	1804	619	619	1.0
2	1805	430	430	1.0
3	1806	367	367	1.0
4	1807	134	134	1.0

	term	total_results
0	Article	12294574
1	Advertising	1545286
2	Family Notices	1497995
3	Detailed Lists, Results, Guides	627036
4	Humour	17354

	term	total_results	total_articles	proportion
0	Article	12294574	173079270	0.071034
1	Advertising	1545286	46938500	0.032922
2	Family Notices	1497995	2031311	0.737452
3	Detailed Lists, Results, Guides	627036	27772615	0.022577
4	Humour	17354	37777	0.459380
5	Obituaries	14991	16434	0.912194
6	Literature	14240	37759	0.377129
7	News	12148	16137	0.752804
8	Law, Courts, And Crime	10024	14076	0.712134
9	Sport And Games	7729	20689	0.373580
10	Letters	5471	14659	0.373218
11	Editorial	3639	18026	0.201875
12	Arts And Culture	3329	4688	0.710111
13	Reviews	2450	3364	0.728300
14	Shipping Notices	2448	3266	0.749541
15	Commerce And Business	2344	5802	0.403999
16	Puzzles	2148	51364	0.041819
17	Weather	1701	17226	0.098746
18	Classified Advertisements And Notices	1486	1657	0.896801
19	Official Appointments And Notices	1283	1303	0.984651
20	Display Advertisement	465	504	0.922619

	term	total_results	total_articles	proportion
0	Article	12294574	173079270	0.071034
1	Advertising	1545286	46938500	0.032922
2	Family Notices	1497995	2031311	0.737452
3	Detailed Lists, Results, Guides	627036	27772615	0.022577
4	Humour	17354	37777	0.459380
6	Literature	14240	37759	0.377129
16	Puzzles	2148	51364	0.041819

Corrections of OCRd text in Trove's newspapers¶

Setting things up¶

How many newspaper articles have corrections?¶

Number of corrections by year¶

Number of corrections by category¶

Number of corrections by newspaper¶

Neediest newspapers¶

	id	total_results	total_articles	proportion
1811	729	6	6	1.0
1810	1000	6	6	1.0
1793	810	24	24	1.0
1729	624	119	119	1.0
1795	918	22	22	1.0

	title	id
0	Canberra Community News (ACT : 1925 - 1927)	166
1	Canberra Illustrated: A Quarterly Magazine (AC...	165
2	Federal Capital Pioneer (Canberra, ACT : 1924 ...	69
3	Good Neighbour (ACT : 1950 - 1969)	871
4	Student Notes/Canberra University College Stud...	665

	title	id	total_results	total_articles	proportion
434	The Newcastle Argus and District Advertiser (N...	513	30	30	1.0
475	The Satirist and Sporting Chronicle (Sydney, N...	1028	286	286	1.0
530	The True Sun and New South Wales Independent P...	1038	20	20	1.0
572	Upper Hunter Courier (Murrurundi, NSW : 1871)	810	24	24	1.0
524	The Temora Telegraph and Mining Advocate (NSW ...	729	6	6	1.0
581	Weekly Observer (Sydney, NSW : 1833)	1490	13	13	1.0
319	The Branxton Advocate: Greta and Rothbury Reco...	686	53	53	1.0
621	Logan and Albert Advocate (Qld. : 1893 - 1900)	842	84	84	1.0
364	The Enterprise (Katoomba, NSW : 1913)	918	22	22	1.0
597	Moonta Herald and Northern Territory Gazette (...	118	56	56	1.0
168	Justice (Narrabri, NSW : 1891)	885	45	45	1.0
140	Goulburn Chronicle and Southern Advertiser (NS...	415	81	81	1.0
161	Intelligence (Bowral, NSW : 1884)	624	119	119	1.0
428	The Mountain Daily (Katoomba, NSW : 1919 - 1920)	919	21	21	1.0
1424	Williamstown Trade Circular (Vic. : 1855 - 1856)	213	32	32	1.0
240	Society (Sydney, NSW : 1887)	1042	21	21	1.0
909	Tasmanian and Port Dalrymple Advertiser (Launc...	273	193	193	1.0
933	The Derwent Star and Van Diemen's Land Intelli...	1046	12	12	1.0
895	Hobart Town Gazette and Van Diemen's Land Adve...	5	1556	1556	1.0
806	Suedaustralische Zeitung (Adelaide, SA : 1850 ...	314	47	47	1.0
28	The Australian Abo Call (National : 1938)	51	78	78	1.0
217	Party (Sydney, NSW : 1942)	1000	6	6	1.0
977	The Van Diemen's Land Gazette and General Adve...	1047	38	38	1.0
992	Alexandra and Yea Standard, Thornton, Gobur an...	154	21	21	1.0
1049	Elsternwick Leader and East Brighton, ... (Vic...	201	17	17	1.0

	title	id	total_results	total_articles	proportion
1745	The Sruss-Sruss (Crawley, WA : 1931)	1755	0	64	0.000000
503	The Sunny Corner Silver Press and Miners' Advo...	1892	0	42	0.000000
1645	The Elswood Observer (Cottesloe, WA : 1931)	1792	0	269	0.000000
1685	The Maylands and Bayswater Chronicle (WA : 1905)	1805	0	145	0.000000
1461	Common Wealth (Perth, WA : 1933)	1806	0	48	0.000000
1260	The Australian Jewish Post (St. Kilda, Vic. : ...	1777	1	5065	0.000197
1315	The Jewish Post (Melbourne, Vic. : 1949 - 1966)	1776	17	55983	0.000304
721	Australijos Lietuvis = The Australian Lithuani...	1876	4	11187	0.000358
300	The Berry Register and Kangaroo Valley and Sou...	1889	7	16407	0.000427
183	Maitland Mercury (NSW : 1939 - 1955)	629	326	659172	0.000495
997	Australier Leben = Australian Life (Melbourne,...	1686	3	3816	0.000786
1471	Daily Commercial News and Shipping List (Perth...	1846	107	122062	0.000877
295	The Bega District News (NSW : 1923 - 1955)	1890	160	155447	0.001029
559	To Ethnico Vema = Greek National Tribune (Arnc...	1592	67	62861	0.001066
68	Broughton Creek Mail (Berry, NSW : 1880 - 188...	1887	26	22024	0.001181
321	The Broughton Creek Register, and Kangaroo Val...	1888	14	7057	0.001984
199	Musu Pastoge = Our Haven (Sydney, NSW : 1950 -...	1594	20	9060	0.002208
846	The Seasider (Christies Beach, SA : 1956 - 1963)	1875	22	9901	0.002222
1517	Hellenic Echo (Perth, WA : 1967 - 1968)	1389	1	448	0.002232
573	Vil'na Dumka = Free Thought (Sydney, NSW : 194...	1593	27	11607	0.002326
347	The Cronulla-Sutherland Advocate (NSW : 1927 -...	1748	48	19231	0.002496
166	Italo-Australian (Sydney, NSW : 1927 - 1940)	1595	100	38986	0.002565
748	Hills Messenger (Port Adelaide, SA : 1984 - 2011)	1891	317	123264	0.002572
1473	Dalgety's Review (Perth, WA : 1926 - 1948)	1760	182	65171	0.002793
107	Daily Mirror (Sydney, NSW : 1941 - 1955)	1852	14163	4791318	0.002956

	id	total_results	total_articles	proportion
1340	1316	1994	2954	0.675017
1117	758	5226	8078	0.646942
782	1769	13334	23686	0.562949
879	927	9402	17227	0.545771
1653	1784	212	392	0.540816

	title	id	total_results	total_articles	proportion
545	The Weekly Advance (Granville, NSW : 1892 - 1893)	1316	1994	2954	0.675017
1047	Dunolly and Betbetshire Express and County of ...	758	5226	8078	0.646942
1377	The Tarrangower Times and Maldon and Newstead ...	1769	13334	23686	0.562949
1092	Hamilton Spectator and Grange District Adverti...	927	9402	17227	0.545771
57	Boggy Camp Tingha and Bora Creek (NSW : 1899)	1784	212	392	0.540816
681	The North Australian, Ipswich and General Adve...	262	6175	11527	0.535699
680	The North Australian (Brisbane, Qld. : 1863 - ...	264	2842	5314	0.534814
935	The Herald of Tasmania (Hobart, Tas. : 1845)	1741	26	50	0.520000
383	The Hay Standard and Advertiser for Balranald,...	725	21629	42068	0.514144
578	Wagga Wagga Express and Murrumbidgee District ...	382	7570	14833	0.510349
234	Robertson Advocate (NSW : 1894 - 1923)	530	36809	72383	0.508531
262	Temora Herald and Mining Journal (NSW : 1882 -...	728	635	1253	0.506784
256	Sydney Mail (NSW : 1860 - 1871)	697	24384	48535	0.502400
911	Tasmanian Morning Herald (Hobart, Tas. : 1865 ...	865	5108	10290	0.496404
189	Molong Argus (NSW : 1896 - 1921)	424	51864	104984	0.494018
906	Morning Star and Commercial Advertiser (Hobart...	1242	828	1703	0.486201
1191	Port Phillip Gazette (Vic. : 1851)	1139	238	491	0.484725
917	Telegraph (Hobart Town, Tas. : 1867)	1250	67	140	0.478571
350	The Cumberland Free Press (Parramatta, NSW : 1...	724	6186	13247	0.466974
944	The Hobart Town Herald (Tas. : 1845)	1740	26	57	0.456140
980	Trumpeter General (Hobart, Tas. : 1833 - 1834)	869	672	1482	0.453441
439	The News, Shoalhaven and Southern Coast Distri...	1588	2449	5495	0.445678
930	The Cornwall Chronicle (Launceston, Tas. : 183...	170	72098	163791	0.440183
1028	Chronicle, South Yarra Gazette, Toorak Times a...	847	1637	3720	0.440054
955	The Mount Lyell Standard and Strahan Gazette (...	1251	36323	83363	0.435721

	title	id	total_articles
837	The Port Adelaide Post Shipping Gazette, Farme...	719	18
1282	The Chinese Advertiser (Ballarat, Vic. : 1856)	706	15
1697	The Mount Margaret Mercury (WA : 1897)	1641	24
773	Port Augusta and Stirling Illustrated News (SA...	1478	125
1517	Hellenic Echo (Perth, WA : 1967 - 1968)	1389	448
1200	Progress (North Fitzroy, Vic. : 1889 - 1890)	1574	254
1260	The Australian Jewish Post (St. Kilda, Vic. : ...	1777	5065
1456	Chung Wah News (Perth, WA : 1981 - 1987)	1383	860
1296	The Elsternwick Leader and Caulfield and Balac...	200	47
831	The Northern Districts Courier (North Adelaide...	1711	885
1461	Common Wealth (Perth, WA : 1933)	1806	48
364	The Enterprise (Katoomba, NSW : 1913)	918	22
1773	The West Australian Times (Perth, WA : 1863 - ...	27	762
388	The Hospital Saturday News (Katoomba, NSW : 1930)	915	54
1484	Echo : Polski Tygodnik Niezalezny (Perth, WA :...	1384	2601
1727	The Possum (Fremantle, WA : 1890)	1201	105
1735	The Southern Cross (Perth, WA : 1893)	1660	59
1480	Der Australische Spiegel = The Australian Mirr...	1385	1455
1789	Vesnik (Perth, WA : 1975 - 1994)	1382	881
813	The Citizen (Port Adelaide, SA : 1938-1940)	1305	1284
338	The Chronicle (Katoomba, NSW : 1929)	914	476
1745	The Sruss-Sruss (Crawley, WA : 1931)	1755	64
1790	Victoria Park News (WA : 1949 - 1950)	1757	1170
2	Federal Capital Pioneer (Canberra, ACT : 1924 ...	69	545
1	Canberra Illustrated: A Quarterly Magazine (AC...	165	57

	title_x	id	total_results_x	total_articles_x	proportion_x	title_y	total_results_y	total_articles_y	proportion_y
0	The Newcastle Argus and District Advertiser (N...	513	30	30	1.0	The Newcastle Argus and District Advertiser (N...	3	30	0.100000
1	The Satirist and Sporting Chronicle (Sydney, N...	1028	286	286	1.0	The Satirist and Sporting Chronicle (Sydney, N...	0	286	0.000000
2	The True Sun and New South Wales Independent P...	1038	20	20	1.0	The True Sun and New South Wales Independent P...	0	20	0.000000
3	Upper Hunter Courier (Murrurundi, NSW : 1871)	810	24	24	1.0	Upper Hunter Courier (Murrurundi, NSW : 1871)	5	24	0.208333
4	The Temora Telegraph and Mining Advocate (NSW ...	729	6	6	1.0	The Temora Telegraph and Mining Advocate (NSW ...	0	6	0.000000

	title	proportion_with_errors	proportion_uncorrected
1194	The Weekly Advance (Granville, NSW : 1892 - 1893)	0.675017	0.961408
668	Dunolly and Betbetshire Express and County of ...	0.646942	0.919906
1479	The Tarrangower Times and Maldon and Newstead ...	0.562949	0.977793
387	Hamilton Spectator and Grange District Adverti...	0.545771	0.862251
1592	Boggy Camp Tingha and Bora Creek (NSW : 1899)	0.540816	0.984694
208	The North Australian, Ipswich and General Adve...	0.535699	0.731153
280	The North Australian (Brisbane, Qld. : 1863 - ...	0.534814	0.801468
1525	The Herald of Tasmania (Hobart, Tas. : 1845)	0.520000	0.980000
1097	The Hay Standard and Advertiser for Balranald,...	0.514144	0.954669
515	Wagga Wagga Express and Murrumbidgee District ...	0.510349	0.895234
919	Robertson Advocate (NSW : 1894 - 1923)	0.508531	0.942708
542	Temora Herald and Mining Journal (NSW : 1882 -...	0.506784	0.901038
359	Sydney Mail (NSW : 1860 - 1871)	0.502400	0.851386
496	Tasmanian Morning Herald (Hobart, Tas. : 1865 ...	0.496404	0.891837
753	Molong Argus (NSW : 1896 - 1921)	0.494018	0.928865
139	Morning Star and Commercial Advertiser (Hobart...	0.486201	0.628890
191	Port Phillip Gazette (Vic. : 1851)	0.484725	0.712831
226	Telegraph (Hobart Town, Tas. : 1867)	0.478571	0.757143
394	The Cumberland Free Press (Parramatta, NSW : 1...	0.466974	0.864875
222	The Hobart Town Herald (Tas. : 1845)	0.456140	0.754386
121	Trumpeter General (Hobart, Tas. : 1833 - 1834)	0.453441	0.597841
986	The News, Shoalhaven and Southern Coast Distri...	0.445678	0.947225
276	The Cornwall Chronicle (Launceston, Tas. : 183...	0.440183	0.798548
925	Chronicle, South Yarra Gazette, Toorak Times a...	0.440054	0.943280
1503	The Mount Lyell Standard and Strahan Gazette (...	0.435721	0.978971

	term	total_results
0	35	914913
1	13	837317
2	11	419359
3	16	375474
4	809	349130