From 2de969742bf62bb6c080893dc010b1eef2504b50 Mon Sep 17 00:00:00 2001 From: Stuart Longland Date: Fri, 2 Feb 2018 12:07:21 +1000 Subject: [PATCH] crawler: Flag users based on projects per minute. A real human can't publish many projects in a minute. These spambots seem to have hundreds after an hour. --- hadsh/crawler/crawler.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hadsh/crawler/crawler.py b/hadsh/crawler/crawler.py index 58e8311..1878972 100644 --- a/hadsh/crawler/crawler.py +++ b/hadsh/crawler/crawler.py @@ -264,6 +264,14 @@ class Crawler(object): # Next page pg_idx = link_res['page'] + 1 + # Does the user have a lot of projects in a short time? + age = (datetime.datetime.now(tz=pytz.utc) - user.created).total_seconds() + if (age > 300.0) and ((user_data['projects'] / 60.0) > 5): + # More than 5 projects a minute on average. + self._log.debug('User %s [#%d] has %d projects in %d seconds', + user.screen_name, user.user_id, user_data['projects'], age) + match = True + # Record the user information detail = self._db.query(UserDetail).get(user_data['id']) if detail is None: