mirror of
https://github.com/sjlongland/tornado-news.git
synced 2025-09-13 10:03:14 +10:00
Try to handle different feed styles
Sometimes feeds provide the entire post as HTML (e.g. Gentoo Planet), some only give a summary in HTML (ABC News) and some only give a plain-text summary (ABC News previously). This tries to support a few different formats gracefully, before giving up and just displaying "Summary not available".
This commit is contained in:
parent
876cbd1c64
commit
d1acc132d2
@ -71,14 +71,17 @@ class FeedEntry(object):
|
||||
"""
|
||||
content = None
|
||||
|
||||
if entry.get('content'):
|
||||
html_content = filter(lambda c : 'html' in c['type'], entry['content'])
|
||||
content = ''.join([c['value'] for c in html_content])
|
||||
for field in ('content', 'description'):
|
||||
content = entry.get(field)
|
||||
if content:
|
||||
if not isinstance(content, str):
|
||||
html_content = filter(lambda c : 'html' in c['type'], content)
|
||||
content = ''.join([c['value'] for c in html_content])
|
||||
break
|
||||
|
||||
# If the content is empty or not present, then use summary.
|
||||
# ABC news gives plain text (not HTML) content.
|
||||
if not content:
|
||||
content = entry['summary']
|
||||
content = entry.get('summary', 'Summary not available')
|
||||
|
||||
try:
|
||||
entry_id = entry['id']
|
||||
|
Loading…
Reference in New Issue
Block a user