Skip to content

Commit 1f423cd

Browse files
committed
add schema-org annotations to html item encoding
resolves #718
1 parent ef2c511 commit 1f423cd

File tree

2 files changed

+119
-1
lines changed

2 files changed

+119
-1
lines changed

pycsw/ogc/api/records.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,8 @@ def item(self, headers_, args, collection, item):
897897
if headers_['Content-Type'] == 'text/html':
898898
response['title'] = self.config['metadata']['identification']['title']
899899
response['collection'] = collection
900+
response['schema-org'] = record2json(record, self.config['server']['url'],
901+
collection, 'schema-org')
900902

901903
if 'json' in headers_['Content-Type']:
902904
headers_['Content-Type'] = 'application/geo+json'
@@ -1390,9 +1392,121 @@ def record2json(record, url, collection, mode='ogcapi-records'):
13901392

13911393
record_dict['properties']['start_datetime'] = start_date
13921394
record_dict['properties']['end_datetime'] = end_date
1395+
elif mode == 'schema-org':
1396+
schema_org = record_dict['properties'].copy()
1397+
schema_org.update({
1398+
'@context':"http://schema.org/",
1399+
'@type': f"schema:{type_iso2schema_org(record_dict['properties'].get('type',''))}",
1400+
'@id':(f"{url}/collections/{collection}/items/{record_dict['id']}")
1401+
})
1402+
schema_org.pop('title','')
1403+
schema_org.pop('type','')
1404+
schema_org['name'] = schema_org.pop('title', None)
1405+
if record.links:
1406+
print('has links')
1407+
schema_org['distribution'] = []
1408+
for link in jsonify_links(record.links):
1409+
schema_org['distribution'].append({
1410+
'@type':'schema:DataDownload',
1411+
'contentUrl':link.get('url',''),
1412+
'name': link.get('name',''),
1413+
'description': link.get('description',''),
1414+
'encodingFormat': link.get('type',link.get('protocol',''))
1415+
})
1416+
schema_org['keywords'] = []
1417+
for t in schema_org.pop('themes',[]):
1418+
for c in t.get('concepts',[]):
1419+
schema_org['keywords'].append(c.get('url') or c.get('id'))
1420+
schema_org['inLanguage'] = schema_org.pop('language', None)
1421+
schema_org['dateModified'] = schema_org.pop('updated', None)
1422+
schema_org['dateCreated'] = schema_org.pop('created', None)
1423+
schema_org['datePublished'] = schema_org.pop('published', None)
1424+
schema_org['encodingFormat'] = [f.get('name') for f in schema_org.pop('formats', [])]
1425+
for c in record_dict['properties'].get('contacts',{}):
1426+
role = role_iso2schema_org(next(iter(c.get('roles',[])), 'contact'))
1427+
if role not in schema_org.keys():
1428+
schema_org[role] = []
1429+
if 'name' in c.keys():
1430+
schema_org[role].append({
1431+
'@type':'schema:Person',
1432+
'familyName': c.get('name',''),
1433+
'url': next(iter(c.get('links',[])), None).get('href',{}).get('url',''),
1434+
'email': next(iter(c.get('emails',[])), None).get('value',''),
1435+
'address': next(iter(c.get('addresses',[])), None).get('value',''),
1436+
'telephone': next(iter(c.get('phones',[])), None).get('value',''),
1437+
'affiliation': c.get('organization','')
1438+
})
1439+
else:
1440+
schema_org[role].append({
1441+
'@type': 'schema:Organization',
1442+
'name': c.get('organization',''),
1443+
'url': [l.get('href',{}).get('url','') for l in next(iter(c.get('links',[])), None)],
1444+
'email': next(iter(c.get('emails',[])), None).get('value',''),
1445+
'address': next(iter(c.get('addresses',[])), None).get('value',''),
1446+
'telephone': next(iter(c.get('phones',[])), None).get('value',''),
1447+
})
1448+
schema_org.pop('contacts',None)
1449+
record_dict = schema_org
13931450

13941451
return record_dict
13951452

1453+
def type_iso2schema_org(tp):
1454+
tp = tp.split('/').pop().lower()
1455+
tps = {
1456+
"dataset": "Dataset",
1457+
"nonGeographicDataset": "Dataset",
1458+
"service": "WebAPI",
1459+
"series": "Series",
1460+
"collection": "Series",
1461+
"software": "SoftwareApplication",
1462+
"model": "ProductModel",
1463+
"document": "DigitalDocument",
1464+
"image": "ImageObject", # from dcmi
1465+
"text": "DigitalDocument",
1466+
"video": "VideoObject",
1467+
"sound": "AudioObject",
1468+
"party": "Organization",
1469+
"place": "Place",
1470+
"event": "Event",
1471+
"journalarticle": "ScholarlyArticle", # from Datacite
1472+
"audiovisual": "AudioObject",
1473+
"award": "Award",
1474+
"book": "Book",
1475+
"bookchapter": "Chapter",
1476+
"computationalnotebook": "Software",
1477+
"conferencepaper": "ScholarlyArticle",
1478+
"conferenceproceeding": "ScholarlyArticle",
1479+
"datapaper": "ScholarlyArticle",
1480+
"dissertation": "DigitalDocument",
1481+
"instrument": "Sensor",
1482+
"journal": "Periodical",
1483+
"outputmanagementplan": "DigitalDocument",
1484+
"peerreview": "Review",
1485+
"preprint": "ScholarlyArticle",
1486+
"project": "Project",
1487+
"report": "DigitalDocument",
1488+
"standard": "DigitalDocument",
1489+
"studyregistration": "DigitalDocument",
1490+
"workflow": "Workflow"
1491+
}
1492+
return tps.get(tp,'Thing')
1493+
1494+
def role_iso2schema_org(rl):
1495+
rl = rl.split('/').pop().lower()
1496+
rls = {
1497+
"custodian": "maintainer",
1498+
"funder": "funder",
1499+
"resourceprovider": "provider",
1500+
"author": "author",
1501+
"processor": "contributor",
1502+
"owner": "copyrightHolder",
1503+
"originator": "creator",
1504+
"distributor": "publisher",
1505+
"publisher": "publisher",
1506+
"user": "contributor",
1507+
"pointofcontact": "contributor"
1508+
}
1509+
return rls.get(rl,'contributor')
13961510

13971511
def build_anytext(name, value):
13981512
"""

pycsw/ogc/api/templates/item.html

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
height: 350px;
1010
}
1111
</style>
12-
12+
{% if data['schema-org'] %}
13+
<script type="application/ld+json">
14+
{{ data['schema-org'] | to_json }}
15+
</script>
16+
{% endif %}
1317
{% endblock %}
1418

1519
{% block crumbs %}

0 commit comments

Comments
 (0)