mirror of
https://github.com/sjlongland/tornado-news.git
synced 2025-09-13 10:03:14 +10:00
Handle entries that have no HTML content.
Some sites, notably ABC News, provide just plain-text content or a practically identical HTML summary, resulting in neither appearing on the feed. This checks the output of the HTML content extraction and falls back to the summary if that comes up empty.
This commit is contained in:
parent
9acacf2a96
commit
0a7497fba6
@ -69,10 +69,15 @@ class FeedEntry(object):
|
||||
Parse the feedparser-generated entry dict and return a FeedEntry
|
||||
object from it.
|
||||
"""
|
||||
if 'content' in entry:
|
||||
content = None
|
||||
|
||||
if entry.get('content'):
|
||||
html_content = filter(lambda c : 'html' in c['type'], entry['content'])
|
||||
content = ''.join([c['value'] for c in html_content])
|
||||
else:
|
||||
|
||||
# If the content is empty or not present, then use summary.
|
||||
# ABC news gives plain text (not HTML) content.
|
||||
if not content:
|
||||
content = entry['summary']
|
||||
|
||||
try:
|
||||
|
Loading…
Reference in New Issue
Block a user