import random

import pandas as pd
import requests
from IPython.display import Image, display

api_url = "https://api.prov.vic.gov.au/search/query"

params = {
    "q": "ostrich",
}
response = requests.get(api_url, params=params)
print(f"API request url: {response.url}")
data = response.json()

API request url: https://api.prov.vic.gov.au/search/query?q=ostrich

print(f"There are {data['response']['numFound']:,} results.")

There are 10 results.

for result in data["response"]["docs"]:
    print(result["title"])

D501 Head close-up of ostrich
D502 Head close-up of ostrich
110
1004/312 Marinda McOstrich Jaffray: Will; Grant of probate
1004/312 Marinda McOstrich Jaffray: Grant of probate
M OSTRICH
553055
M OSTRICH
MATT OSTRICHE
V332 [Daryl Somers and Ozzie Ostrich on the children's show 'Hey, Hey' It's Saturday']

data["response"]["docs"][0]

{'category': 'Item',
 'entity': 'Record',
 '_id': 'B7BE47C9-5613-11EB-BE8C-6757FF78D049',
 'timestamp': 1622643613,
 'identifier.PROV_ACM.id': 'VPRS 14517/P0001/5/155',
 'identifier.PID.id': 'B7BE47C9-5613-11EB-BE8C-6757FF78D049',
 'title': 'D501 Head close-up of ostrich',
 'consignment_id': 'P0001',
 'start_dt': '1753-01-01T00:00:00Z',
 'end_dt': '3000-12-31T00:00:00Z',
 'date_range': ['[1753 TO 3000]'],
 'date_range.not_described': ['[1753 TO 3000]'],
 'description.subject': ['OSTRICHES'],
 'description.aggregate': 'Subject : OSTRICHES',
 'presentation_text': 'Subject : OSTRICHES',
 'jurisdictional_coverage': ['Victoria'],
 'rights_statement': ['Open Public Records Act 1973'],
 'rights_status': ['Open'],
 'item_discrete': 'No',
 'format': 'Physical',
 'medium': ['Polyester Negative'],
 'location': ['North Melbourne', 'Online'],
 'access_restriction': 'No',
 'status': 'Published',
 'citation': 'VPRS 14517/P0001/5, D501',
 'citation_sort': '14517P00010000130000001550',
 'is_part_of_series.id': ['VPRS14517'],
 'is_part_of_series.title': ['Negatives of Photographs [Publications Branch]'],
 'series_id': '14517',
 'parents.ids': ['VPRS14517', 'D6BD4E47-F7E6-11E9-AE98-87DB74D2147D'],
 'parents.titles': ['Negatives of Photographs [Publications Branch]',
  '[Not Set]'],
 'agencies.titles': ['Education Department'],
 'agencies.ids': ['VA714'],
 'agencies.date_ranges': ['[1873 TO 1985]'],
 'resp_agency_title': ['Department of Education'],
 'resp_agency_title_facet': ['Department of Education'],
 'resp_agency_id': ['3098'],
 'is_part_of_item.PID': ['D6BD4E47-F7E6-11E9-AE98-87DB74D2147D'],
 'is_part_of_item.title': ['D332-D667'],
 'iiif-manifest': 'https://images.prov.vic.gov.au/manifests/B7/BE/47/C9/-5613-11EB-BE8C-6757FF78D049/images/manifest.json',
 'iiif-thumbnail': 'https://images.prov.vic.gov.au/loris/B7%2FBE%2F47%2FC9%2F-5613-11EB-BE8C-6757FF78D049%2Fimages%2F1%2Ffiles%2F14517-00013-D0501.tif/full/!200,200/0/default.jpg',
 'control_symbol_labels': ['Registration Number'],
 'control_symbol_values': ['D501'],
 'record_form': ['Photograph or Image'],
 'box_number_sort': [13],
 '_version_': 1816058689549762560}

api_url = "https://api.prov.vic.gov.au/search/query"

params = {"q": "*"}
response = requests.get(api_url, params=params)
print(f"API request url: {response.url}")
data = response.json()
print(f"There are {data['response']['numFound']:,} results.")

API request url: https://api.prov.vic.gov.au/search/query?q=%2A
There are 10,147,135 results.

params = {
    "q": "*",  # an empty query to get everything
    "facet": "true",
    "facet.field": "category",
}
response = requests.get("https://api.prov.vic.gov.au/search/query", params=params)
data = response.json()
values = data["facet_counts"]["facet_fields"]["category"]
print(values)

['Item', 6337827, 'Image', 3613751, 'relatedEntity', 151117, 'Consignment', 23771, 'Series', 17095, 'Agency', 3252, 'Function', 322]

facets = [
    {"category": values[i], "count": values[i + 1]}
    for i in range(0, len(values), 2)
    if values[i + 1] > 0
]
pd.DataFrame(facets)

params = {"q": "rabbits", "rows": 100}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(
    f"This requests delivers {len(data['response']['docs'])} of {data['response']['numFound']:,} results."
)

https://api.prov.vic.gov.au/search/query?q=rabbits&rows=100
This requests delivers 100 of 365 results.

params = {"q": "rabbits", "rows": 1, "sort": "title asc"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(data["response"]["docs"][0]["title"])

https://api.prov.vic.gov.au/search/query?q=rabbits&rows=1&sort=title+asc
'55/37/5

params = {"q": "rabbits", "rows": 1, "sort": "title desc"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(data["response"]["docs"][0]["title"])

https://api.prov.vic.gov.au/search/query?q=rabbits&rows=1&sort=title+desc
Wire Netting Advances Files [SAMPLE ONLY RETAINED]

params = {"q": "rabbits", "rows": 1, "fl": "_id,title"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(data["response"]["docs"][0])

https://api.prov.vic.gov.au/search/query?q=rabbits&rows=1&fl=_id%2Ctitle
{'_id': '3589932B-F1AF-11E9-AE98-C70783C3C724', 'title': "Rabbit Inspector's Reports"}

params = {
    "q": 'iiif-manifest:[* TO *] AND record_form:"Photograph or Image"',
    "rows": 1,
}

# Get total number of results
response = requests.get("https://api.prov.vic.gov.au/search/select", params=params)
data = response.json()
total_results = data["response"]["numFound"]

# Set a random start point within the range of total results
params["start"] = random.randrange(0, total_results)

# Retrieve random result
response = requests.get("https://api.prov.vic.gov.au/search/select", params=params)
data = response.json()
item = data["response"]["docs"][0]

print(item["title"])
display(Image(url=item["iiif-thumbnail"]))

D642 Coarse cloddy soil

params = {
    "q": "rabbits",
    "category": "Item",
    "rows": 100,
}

start = 0
harvested_results = []

# Continue in this loop while there are results to harvest
while True:
    # Update the start parameter
    params["start"] = start
    response = requests.get(api_url, params=params)
    data = response.json()
    results = data["response"]["docs"]
    # Add the results from this request to the harvested results
    harvested_results += results
    # Get the number of results returned by the current request
    num_docs = len(results)
    # Add the number of results from this request to the start value
    start += num_docs
    # There are no more results, so stop the harvest
    if num_docs == 0:
        break

print(f"Harvested {len(harvested_results)} results.")

Harvested 365 results.

params = {
    "q": "murray river",
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=murray+river
There are 64,746 results.

params = {
    "q": "murray OR river",
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=murray+OR+river
There are 64,746 results.

params = {
    "q": "murray AND river",
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=murray+AND+river
There are 2,584 results.

params = {
    "q": '"murray river"',
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=%22murray+river%22
There are 1,746 results.

params = {
    "q": '"gold mining"',
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=%22gold+mining%22
There are 3,910 results.

params = {
    "q": '"gold mining"~10',
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"There are {data['response']['numFound']:,} results.")

https://api.prov.vic.gov.au/search/query?q=%22gold+mining%22~10
There are 4,563 results.

params = {"q": "category:Item"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

for result in data["response"]["docs"]:
    print(result["title"])

https://api.prov.vic.gov.au/search/query?q=category%3AItem

There are 6,337,827 results.

211/374 Leslie A Lamb: Will; Grant of probate
215/936 Ellen Cahill: Will; Grant of probate
215/981 Florence M Lovegrove: Will; Grant of probate
211/107 Amelia Hawking: Will; Grant of probate
215/980 William F Finchett: Will; Grant of probate
215/979 George Wilson: Will; Grant of probate
211/102 Bernard F Cragen: Will; Grant of probate
211/221 Jonathan Coulson: Will; Grant of probate
215/978 William E S Ockenden: Will; Grant of probate
215/959 Otto Holst: Will; Grant of probate

params = {"q": "category:Item OR category:Image"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=category%3AItem+OR+category%3AImage

There are 9,951,578 results.

params = {"q": "record_form:Volume AND location:Ballarat"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

for result in data["response"]["docs"]:
    print(result["title"])

https://api.prov.vic.gov.au/search/query?q=record_form%3AVolume+AND+location%3ABallarat

There are 6,976 results.

1966 - 1873
Book 46, 16.04.1920 - 17.11.1920,
Book 47, 17.11.1920 - 29.07.1921,
Book 48, 01.08.1921 - 28.02.1922,
Book 51, 13.04.1923 - 29.09.1923,
Book 52, 01.10.1923 - 24.04.1924,
Book 53, 28.04.1924 - 02.10.1924,
Book 54, 08.10.1924 - 27.03.1925,
Book 55, 27.03.1925 - 25.09.1925,
Book 56, 25.09.1925 - 08.03.1926,

params = {"q": "start_dt:[1920-01-01 TO 1949-12-31]"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=start_dt%3A%5B1920-01-01+TO+1949-12-31%5D

There are 1,076,668 results.

params = {"q": "start_dt:[1920-01-01 TO *]"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=start_dt%3A%5B1920-01-01+TO+%2A%5D

There are 3,808,984 results.

params = {"q": "iiif-manifest:[* TO *]"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=iiif-manifest%3A%5B%2A+TO+%2A%5D

There are 925,524 results.

print(data["response"]["docs"][0]["iiif-manifest"])

https://images.prov.vic.gov.au/manifests/0135/5021/14/images/manifest.json

params = {"q": "category:Image"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=category%3AImage

There are 3,613,751 results.

params = {"q": "iiif-thumbnail:[* TO *]"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=iiif-thumbnail%3A%5B%2A+TO+%2A%5D

There are 4,547,296 results.

Image(url=data["response"]["docs"][0]["iiif-thumbnail"])

params = {"q": "(iiif-thumbnail:[* TO *] AND category:Item) OR (category:Image)"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=%28iiif-thumbnail%3A%5B%2A+TO+%2A%5D+AND+category%3AItem%29+OR+%28category%3AImage%29

There are 4,547,296 results.

params = {"q": "family_name:[* TO *]"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=family_name%3A%5B%2A+TO+%2A%5D

There are 6,097,812 results.

params = {"q": 'identifier.PROV_ACM.id:"VPRS 13" AND category:Series'}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")
display(data["response"]["docs"][0])

https://api.prov.vic.gov.au/search/query?q=identifier.PROV_ACM.id%3A%22VPRS+13%22+AND+category%3ASeries

There are 1 results.

{'category': 'Series',
 'entity': 'Record',
 '_id': '13912344-F1A4-11E9-AE98-91984FD5C262',
 'timestamp': 1714524468,
 'identifier.PROV_ACM.id': 'VPRS 13',
 'series_id': '13',
 'citation': 'VPRS 13',
 'citation_sort': '00013',
 'identifier.PID.id': '13912344-F1A4-11E9-AE98-91984FD5C262',
 'title': 'Inwards Shipping Index [Refer to Microfilm Copy VPRS 3504]',
 'date_range': ['1900'],
 'start_dt': '1900',
 'start_dt_qual': '?',
 'end_dt': '1900',
 'how_to_use': ['** Further research is required to determine the exact purpose and context of this series **<br/><br/>This series comprises an alphabetical index to shipping arrivals at Victorian ports. Monitoring of shipping arrivals for customs and immigration purposes was undertaken by the Victorian Government from 1839 until responsibility for these functions passed to the Commonwealth Government in 1901 and 1924 respectively.<br/><br/>Ships have been entered in lexicographical (ie. strict alphabetical) order. Each arrival of the ship is then listed in chronological order by date of arrival.<br/><br/>Entries in the volumes include the following details:<br/>name of vessel<br/>tonnage<br/>master<br/>port of embarkation<br/>date of arrival.<br/><br/>This Index covers the period 1839 to 1900. It is assumed that the Index must have been compiled sometime around or after 1900 in order that the correct alphabetical order could be determined. Evidence in the Index suggests that it was compiled from an existing index.<br/><br/>For the period 1901 to 1924 consult VPRS 3503 which is a microfilm copy of a self-indexing chronological record of ship arrivals. Note that the ships listed in VPRS 3503 are not in strict alphabetical order.<br/>'],
 'resp_agency_title': ['Department of Transport (known as Ministry of Transport 1951 to 1992)'],
 'resp_agency_title_facet': ['Department of Transport (known as Ministry of Transport 1951 to 1992)'],
 'resp_agency_id': ['673'],
 'format': 'Physical',
 'rights_status': ['Open'],
 'location': ['North Melbourne'],
 'contents.date_range': ['[1839 TO 1900]'],
 'contents.start_dt': [1839],
 'contents.end_dt': [1900],
 'series_in_custody.date_range': ['1900'],
 'series_in_custody.start_dt': [1900],
 'series_in_custody.end_dt': [1900],
 'responsible_agents.resp_agency_id': [673],
 'responsible_agents.title': ['Department of Transport (known as Ministry of Transport 1951 to 1992)'],
 'responsible_agents.date_ranges': ['[1983 TO 1996]'],
 'responsible_agents.start_dt': [1983],
 'responsible_agents.end_dt': [1996],
 'creating_agents.creating_agency_id': [606],
 'creating_agents.title': ['Department of Trade and Customs'],
 'creating_agents.date_ranges': ['1900'],
 'creating_agents.start_dt': [1900],
 'creating_agents.end_dt': [1900],
 'status': 'Published',
 '_version_': 1816062577834196992}

params = {"q": '_id:"B7BE47C9-5613-11EB-BE8C-6757FF78D049" AND category:Item'}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=_id%3A%22B7BE47C9-5613-11EB-BE8C-6757FF78D049%22+AND+category%3AItem

There are 1 results.

params = {
    "q": 'category:relatedEntity AND entity_id:VA473 AND relationship:"Primary responsible function"',
    "rows": 100,
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} functions.\n")
for result in data["response"]["docs"]:
    print(result["title"])

https://api.prov.vic.gov.au/search/query?q=category%3ArelatedEntity+AND+entity_id%3AVA473+AND+relationship%3A%22Primary+responsible+function%22&rows=100

There are 21 functions.

Crown lands (public)
Crown lands (government)
Armed forces command
Education
Goldfields administration and mining
Library, State
Botanic gardens
Crown solicitor's services
Census and statistics
Finance
General superintendence
Police
Ports and harbours
Immigration (nineteenth century)
Postal services
Buildings, government (design and construction)
Roads and bridges
Health, public
Customs
Aboriginal affairs
Prisons and youth training centres

data["response"]["docs"][0]

{'category': 'relatedEntity',
 'status': 'Published',
 '_id': 'VA473:VF309:2690:primaryresponsibilityfor',
 'timestamp': 1614239338,
 'entity_id': 'VA473',
 'related_entity_id': 'VF309',
 'sort_id': 309,
 'title': 'Crown lands (public)',
 'relationship': 'Primary responsible function',
 'relationship_date_range': ['[1839 TO 1851]'],
 'relationship_start_dt': 1839,
 'relationship_end_dt': 1851,
 '_version_': 1816039025116446725}

params = {
    "q": 'category:relatedEntity AND entity_id:VF309 AND relationship:"Primary responsible agency"',
    "rows": 100,
}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} agencies.\n")
for result in data["response"]["docs"]:
    print(result["title"])

https://api.prov.vic.gov.au/search/query?q=category%3ArelatedEntity+AND+entity_id%3AVF309+AND+relationship%3A%22Primary+responsible+agency%22&rows=100

There are 14 agencies.

Superintendent, Port Phillip District
Department of Conservation and Natural Resources
Department of Crown Lands and Survey, Geelong Division
Department of Conservation, Forests and Lands
Department of Environment and Primary Industries
Department of Conservation and Environment
Police Magistrate Port Phillip District
Department of Natural Resources and the Environment
Department of Sustainability and Environment
Colonial Secretary's Office
Crown Lands Department
Department of Environment, Land, Water and Planning
Department of Energy, Environment and Climate Action 
Department of Crown Lands and Survey

params = {"q": "category:Item AND series_id:460"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=category%3AItem+AND+series_id%3A460

There are 74,409 results.

params = {"q": "category:Item AND is_part_of_series.id:VPRS460"}
response = requests.get(api_url, params=params)
print(response.url)
data = response.json()

print(f"\nThere are {data['response']['numFound']:,} results.\n")

https://api.prov.vic.gov.au/search/query?q=category%3AItem+AND+is_part_of_series.id%3AVPRS460

There are 74,409 results.

Getting started with the PROV API¶

A simple API request¶

Using an 'empty' query to get everything¶

The different types of entities in API results¶

Identifiers and links¶

Functions, agencies, and series¶

Items¶

Images¶

Search facets¶

Controlling the way results are delivered¶

Retrieving a random result¶

Harvest a complete set of results¶

Constructing queries¶

Boolean operators¶

Text searches¶

Filter results by using fields¶

Filter by date¶

Find digitised records¶

Find records about people¶

Find an individual record¶

Items¶

Images¶

Series¶

Agencies¶

Functions¶

	category	count
0	Item	6337827
1	Image	3613751
2	relatedEntity	151117
3	Consignment	23771
4	Series	17095
5	Agency	3252
6	Function	322

Getting started with the PROV API¶

A simple API request¶

Using an 'empty' query to get everything¶

The different types of entities in API results¶

Identifiers and links¶

Functions, agencies, and series¶

Items¶

Images¶

Search facets¶

Controlling the way results are delivered¶

Retrieving a random result¶

Harvest a complete set of results¶

Constructing queries¶

Boolean operators¶

Text searches¶

Filter results by using fields¶

Filter by date¶

Find digitised records¶

Find records about people¶

Find an individual record¶

Find related entities¶

Items¶

Images¶

Series¶

Agencies¶

Functions¶

Filter by related entities¶