From 0a7497fba6586f8b944317bf325cf7fb1a73b5fc Mon Sep 17 00:00:00 2001
From: Stuart Longland <me@vk4msl.id.au>
Date: Sun, 15 Oct 2017 08:32:18 +1000
Subject: [PATCH] Handle entries that have no HTML content.

Some sites, notably ABC News, provide just plain-text content or a
practically identical HTML summary, resulting in neither appearing on
the feed.  This checks the output of the HTML content extraction and
falls back to the summary if that comes up empty.
---
 tornadonews/tornadonews.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tornadonews/tornadonews.py b/tornadonews/tornadonews.py
index 6fd815d..f7ea612 100644
--- a/tornadonews/tornadonews.py
+++ b/tornadonews/tornadonews.py
@@ -69,10 +69,15 @@ class FeedEntry(object):
         Parse the feedparser-generated entry dict and return a FeedEntry
         object from it.
         """
-        if 'content' in entry:
+        content = None
+
+        if entry.get('content'):
             html_content = filter(lambda c : 'html' in c['type'], entry['content'])
             content = ''.join([c['value'] for c in html_content])
-        else:
+
+        # If the content is empty or not present, then use summary.
+        # ABC news gives plain text (not HTML) content.
+        if not content:
             content = entry['summary']
 
         try: