671 lines
16 KiB
Python
671 lines
16 KiB
Python
from functools import partial
|
|
from tornado.gen import coroutine, Return
|
|
|
|
import base64
|
|
|
|
|
|
class Row(object):
|
|
"""
|
|
Raw row class, simplifies manipulation of tables.
|
|
"""
|
|
def __init__(self, db, row):
|
|
for column in self._COLUMNS_:
|
|
if not hasattr(self, column):
|
|
setattr(self.__class__, column,
|
|
property(\
|
|
partial(\
|
|
lambda c, s : s._get_col(c), column),
|
|
partial(\
|
|
lambda c, s, val : s._set_col(c, val), column),
|
|
partial(\
|
|
lambda c, s : s._del_col(c), column)))
|
|
self._data = dict(zip(self._COLUMNS_, row))
|
|
self._dirty = {}
|
|
self._db = db
|
|
|
|
|
|
@classmethod
|
|
@coroutine
|
|
def fetch(cls, db, where, *args, single=False):
|
|
rows = yield db.query(
|
|
'''
|
|
SELECT
|
|
%(columns)s
|
|
FROM
|
|
"%(table)s"
|
|
WHERE
|
|
%(where)s
|
|
''' % {
|
|
'columns': ', '.join(cls._COLUMNS_),
|
|
'table': cls._TABLE_,
|
|
'where': where
|
|
}, *args)
|
|
res = [
|
|
cls(db, row) for row in rows
|
|
]
|
|
|
|
if single:
|
|
try:
|
|
raise Return(res[0])
|
|
except IndexError:
|
|
raise Return(None)
|
|
else:
|
|
raise Return(res)
|
|
|
|
@coroutine
|
|
def refresh(self):
|
|
# Exclude primary keys from the column list
|
|
data_columns = list(filter(
|
|
lambda c : c not in set(self._PRIMARY_KEYS_),
|
|
self._COLUMNS_))
|
|
# Determine the "where" criteria
|
|
where_str = ' AND '.join([
|
|
'(%s = %%s)' % (col,)
|
|
for col in self._PRIMARY_KEYS_
|
|
])
|
|
where_args = tuple([
|
|
self._get_col(c) for c in self._PRIMARY_KEYS_
|
|
])
|
|
|
|
rows = yield self._db.query(
|
|
'''
|
|
SELECT
|
|
%(columns)s
|
|
FROM
|
|
"%(table)s"
|
|
WHERE
|
|
%(where)s
|
|
''' % {
|
|
'columns': ', '.join(data_columns),
|
|
'table': self._TABLE_,
|
|
'where': where_str
|
|
}, *where_args)
|
|
if len(rows) != 1:
|
|
raise ValueError(
|
|
'Row no longer in database or '\
|
|
'primary keys ambiguous')
|
|
self._data.update(dict(zip(data_columns, rows[0])))
|
|
|
|
@coroutine
|
|
def commit(self):
|
|
try:
|
|
dirty_data = self._dirty.copy()
|
|
self._dirty = {}
|
|
dirty_columns = list(dirty_data.keys())
|
|
dirty_args = [dirty_data[c] for c in dirty_columns]
|
|
where_args = [self._data[c] for c in self._PRIMARY_KEYS_]
|
|
|
|
sql = '''UPDATE
|
|
"%(table)s"
|
|
SET
|
|
%(updates)s
|
|
WHERE
|
|
%(criteria)s''' % {
|
|
'table': self._TABLE_,
|
|
'updates': ', '.join([
|
|
'"%s"=%%s' % (c,)
|
|
for c in dirty_columns
|
|
]),
|
|
'criteria': ' AND '.join([
|
|
'("%s"=%%s)' % (c,)
|
|
for c in self._PRIMARY_KEYS_
|
|
])
|
|
}
|
|
|
|
yield self._db.query(sql,
|
|
*tuple(dirty_args + where_args),
|
|
commit=True)
|
|
self._data.update(dirty_data)
|
|
except:
|
|
dirty_data.update(self._dirty)
|
|
self._dirty = dirty_data
|
|
raise
|
|
|
|
def _get_col(self, column):
|
|
return self._dirty.get(column, self._data[column])
|
|
|
|
def _set_col(self, column, value):
|
|
self._dirty[column] = value
|
|
|
|
def _del_col(self, column, value):
|
|
self._dirty.pop(column, None)
|
|
|
|
|
|
class User(Row):
|
|
"""
|
|
All recognised Hackaday.io users, including legitmate ones.
|
|
"""
|
|
|
|
# Table
|
|
_TABLE_ = 'user'
|
|
|
|
# Primary key
|
|
_PRIMARY_KEYS_ = ['user_id']
|
|
|
|
# Columns
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'screen_name',
|
|
'url',
|
|
'avatar_id',
|
|
'created',
|
|
'had_created',
|
|
'last_update'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'User(user_id=%r, screen_name=%r)' \
|
|
% (self.user_id, self.screen_name)
|
|
|
|
@coroutine
|
|
def get_groups(self):
|
|
"""
|
|
Return a set of groups linked to this user.
|
|
"""
|
|
groups = yield self._db.query(
|
|
'''
|
|
SELECT
|
|
g.name
|
|
FROM
|
|
"group" g,
|
|
"user_group_assoc" uga
|
|
WHERE
|
|
g.group_id=uga.group_id
|
|
AND
|
|
uga.user_id=%s
|
|
''', self.user_id)
|
|
raise Return(set([
|
|
name for (name,) in groups
|
|
]))
|
|
|
|
@coroutine
|
|
def set_groups(self, groups):
|
|
"""
|
|
Set the list of groups linked to this user.
|
|
"""
|
|
yield self.add_groups(groups)
|
|
yield self.mask_groups(groups)
|
|
|
|
@coroutine
|
|
def add_groups(self, groups):
|
|
# Add the new groups
|
|
yield self._db.query('''
|
|
INSERT INTO "user_group_assoc"
|
|
(user_id, group_id)
|
|
SELECT
|
|
%s, group_id
|
|
FROM
|
|
"group"
|
|
WHERE
|
|
name IN %s
|
|
ON CONFLICT DO NOTHING
|
|
''', self.user_id, tuple(groups))
|
|
|
|
@coroutine
|
|
def rm_groups(self, groups):
|
|
# Remove groups listed listed
|
|
yield self._db.query('''
|
|
DELETE FROM "user_group_assoc"
|
|
WHERE
|
|
user_id=%S
|
|
AND
|
|
group_id IN (
|
|
SELECT
|
|
%s, group_id
|
|
FROM
|
|
"group"
|
|
WHERE
|
|
name IN %s
|
|
)
|
|
''', self.user_id, tuple(groups))
|
|
|
|
@coroutine
|
|
def mask_groups(self, groups):
|
|
# Remove groups not listed
|
|
yield self._db.query('''
|
|
DELETE FROM "user_group_assoc"
|
|
WHERE
|
|
user_id=%S
|
|
AND
|
|
group_id NOT IN (
|
|
SELECT
|
|
%s, group_id
|
|
FROM
|
|
"group"
|
|
WHERE
|
|
name IN %s
|
|
)
|
|
''', self.user_id, tuple(groups))
|
|
|
|
@coroutine
|
|
def get_detail(self):
|
|
raise Return((yield UserDetail.fetch(self._db,
|
|
'user_id=%s LIMIT 1', self.user_id,
|
|
single=True)))
|
|
|
|
@coroutine
|
|
def get_deferral(self):
|
|
raise Return((yield DeferredUser.fetch(self._db,
|
|
'user_id=%s LIMIT 1', self.user_id,
|
|
single=True)))
|
|
|
|
|
|
class Account(Row):
|
|
_TABLE_ = 'account'
|
|
_PRIMARY_KEYS_ = ['user_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'name',
|
|
'hashedpassword',
|
|
'changenextlogin'
|
|
]
|
|
|
|
@coroutine
|
|
def get_user(self):
|
|
raise Return((yield User.fetch(self._db,
|
|
'user_id=%s LIMIT 1', self.user_id,
|
|
single=True)))
|
|
|
|
|
|
class DeferredUser(Row):
|
|
"""
|
|
Object to represent when a user account has been added but inspection
|
|
has been deferred.
|
|
"""
|
|
|
|
_TABLE_ = 'deferred_user'
|
|
_PRIMARY_KEYS_ = ['user_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'inspect_time',
|
|
'inspections'
|
|
]
|
|
|
|
|
|
class Group(Row):
|
|
"""
|
|
Groups used for classifying users.
|
|
"""
|
|
|
|
_TABLE_ = 'group'
|
|
_PRIMARY_KEYS_ = ['group_id']
|
|
_COLUMNS_ = [
|
|
'group_id',
|
|
'name'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'Group(group_id=%r, name=%r)' \
|
|
% (self.group_id, self.name)
|
|
|
|
|
|
class Session(Row):
|
|
"""
|
|
Session token storage. The session ID will be emitted to the user, so
|
|
we use a UUID field to make the value unguessable.
|
|
"""
|
|
_TABLE_ = 'session'
|
|
_PRIMARY_KEYS_ = ['session_id']
|
|
_COLUMNS_ = [
|
|
'session_id',
|
|
'user_id',
|
|
'expiry_date'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'Session(user=%r)' \
|
|
% (self.user)
|
|
|
|
@coroutine
|
|
def get_user(self):
|
|
raise Return((yield User.fetch(self._db,
|
|
'user_id=%s LIMIT 1', self.user_id,
|
|
single=True)))
|
|
|
|
|
|
class UserDetail(Row):
|
|
"""
|
|
Detail on 'suspect' users. Only users that have been identified as
|
|
possibly spammy by the search algorithm, or users that have been flagged
|
|
as spammy by logged-in users, appear here.
|
|
"""
|
|
_TABLE_ = 'user_detail'
|
|
_PRIMARY_KEYS_ = ['user_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'about_me',
|
|
'who_am_i',
|
|
'what_i_would_like_to_do',
|
|
'location',
|
|
'projects'
|
|
]
|
|
|
|
|
|
class UserLink(Row):
|
|
"""
|
|
Links attached to 'suspect' users.
|
|
"""
|
|
_TABLE_ = 'user_link'
|
|
_PRIMARY_KEYS_ = ['user_id', 'url']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'title',
|
|
'url'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'UserLink(user_id=%r, title=%r, url=%r)' \
|
|
% (self.user_id, self.title, self.url)
|
|
|
|
|
|
class Avatar(Row):
|
|
"""
|
|
A cache of users' avatars, as some share the same image.
|
|
"""
|
|
_TABLE_ = 'avatar'
|
|
_PRIMARY_KEYS_ = ['avatar_id']
|
|
_COLUMNS_ = [
|
|
'avatar_id',
|
|
'url',
|
|
'avatar',
|
|
'avatar_type'
|
|
]
|
|
|
|
@coroutine
|
|
def get_hashes(self):
|
|
hashes = yield AvatarHash.fetch(self._db,
|
|
'''
|
|
hash_id IN (
|
|
SELECT
|
|
hash_id
|
|
FROM
|
|
"avatar_hash_assoc"
|
|
WHERE
|
|
avatar_id=%s
|
|
)''', self.avatar_id)
|
|
raise Return(dict([
|
|
(h.hashalgo, h) for h in hashes
|
|
]))
|
|
|
|
|
|
class AvatarHash(Row):
|
|
"""
|
|
A hash of users' avatars and their scores.
|
|
"""
|
|
_TABLE_ = 'avatar_hash'
|
|
_PRIMARY_KEYS_ = ['hash_id']
|
|
_COLUMNS_ = [
|
|
'hash_id',
|
|
'hashalgo',
|
|
'hashdata'
|
|
]
|
|
|
|
@property
|
|
def hashstr(self):
|
|
return base64.a85encode(self.hashdata)
|
|
|
|
def __repr__(self):
|
|
return '<%s #%d %s %s>' % (
|
|
self.__class__.__name__,
|
|
self.hash_id,
|
|
self.hashalgo,
|
|
self.hashstr)
|
|
|
|
|
|
class Tag(Row):
|
|
"""
|
|
A list of tags seen applied to users' accounts.
|
|
"""
|
|
_TABLE_ = 'tag'
|
|
_PRIMARY_KEYS_ = ['tag_id']
|
|
_COLUMNS_ = [
|
|
'tag_id',
|
|
'tag'
|
|
]
|
|
|
|
|
|
class NewestUserPageRefresh(Row):
|
|
"""
|
|
A record of when each page of the "newst users" list was last refreshed.
|
|
"""
|
|
_TABLE_ = 'newest_user_page_refresh'
|
|
_PRIMARY_KEYS_ = ['page_num']
|
|
_COLUMNS_ = [
|
|
'page_num',
|
|
'refresh_date'
|
|
]
|
|
|
|
|
|
class NewUser(Row):
|
|
"""
|
|
A record of a new user that is to be inspected.
|
|
"""
|
|
_TABLE_ = 'new_user'
|
|
_PRIMARY_KEYS_ = ['user_id']
|
|
_COLUMNS_ = [
|
|
'user_id'
|
|
]
|
|
|
|
|
|
class Hostname(Row):
|
|
"""
|
|
A hostname that appears in the links of user profiles.
|
|
"""
|
|
_TABLE_ = 'hostname'
|
|
_PRIMARY_KEYS_ = ['hostname_id']
|
|
_COLUMNS_ = [
|
|
'hostname_id',
|
|
'hostname',
|
|
'score',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'Hostname(hostname_id=%r, hostname=%r, score=%r, count=%r)' \
|
|
% (self.hostname_id, self.hostname, self.score, self.count)
|
|
|
|
|
|
class Word(Row):
|
|
"""
|
|
A single word in the vocabulary of the Hackaday.io community.
|
|
"""
|
|
_TABLE_ = 'word'
|
|
_PRIMARY_KEYS_ = ['word_id']
|
|
_COLUMNS_ = [
|
|
'word_id',
|
|
'word',
|
|
'score',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'Word(word_id=%r, word=%r, score=%r, count=%r)' \
|
|
% (self.word_id, self.word, self.score, self.count)
|
|
|
|
|
|
class WordAdjacent(Row):
|
|
"""
|
|
How often two words appear next to each other.
|
|
"""
|
|
_TABLE_ = 'word_adjacent'
|
|
_PRIMARY_KEYS_ = ['proceeding_id', 'following_id']
|
|
_COLUMNS_ = [
|
|
'proceeding_id',
|
|
'following_id',
|
|
'score',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'WordAdjacent(proceeding=%r, following=%r, score=%r, count=%r)' \
|
|
% (self.proceeding_id, self.following_id, self.score, self.count)
|
|
|
|
|
|
class UserWord(Row):
|
|
"""
|
|
Words used by a given user
|
|
"""
|
|
_TABLE_ = 'user_word'
|
|
_PRIMARY_KEYS_ = ['user_id', 'word_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'word_id',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'UserWord(user=%r, word=%r, count=%r)' \
|
|
% (self.user_id, self.word_id, self.count)
|
|
|
|
|
|
class UserHostname(Row):
|
|
"""
|
|
Hostnames used by a given user
|
|
"""
|
|
_TABLE_ = 'user_hostname'
|
|
_PRIMARY_KEYS_ = ['user_id', 'hostname_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'hostname_id',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'UserHostname(user=%r, hostname=%r, count=%r)' \
|
|
% (self.user_id, self.hostname_id, self.count)
|
|
|
|
|
|
class UserWordAdjacent(Row):
|
|
"""
|
|
Adjacent words used by a given user
|
|
"""
|
|
_TABLE_ = 'user_word_adjacent'
|
|
_PRIMARY_KEYS_ = [
|
|
'user_id',
|
|
'proceeding_id',
|
|
'following_id'
|
|
]
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'proceeding_id',
|
|
'following_id',
|
|
'count'
|
|
]
|
|
|
|
def __repr__(self):
|
|
return 'UserWordAdjacent(user=%r, proceeding=%r, '\
|
|
'following=%r, count=%r)' \
|
|
% (self.user_id, self.proceeding_id,
|
|
self.following_id,
|
|
self.count)
|
|
|
|
|
|
class UserToken(Row):
|
|
"""
|
|
Suspect tokens found in regular expression search.
|
|
"""
|
|
_TABLE_ = 'user_token'
|
|
_PRIMARY_KEYS_ = ['user_id', 'token']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'token',
|
|
'count'
|
|
]
|
|
|
|
|
|
class Trait(Row):
|
|
"""
|
|
Traits are characteristics which are common to subsets of users.
|
|
"""
|
|
_TABLE_ = 'trait'
|
|
_PRIMARY_KEYS_ = ['trait_id']
|
|
_COLUMNS_ = [
|
|
'trait_id',
|
|
'trait_class',
|
|
'trait_type',
|
|
'score',
|
|
'count',
|
|
'weight'
|
|
]
|
|
|
|
|
|
class TraitInstance(Row):
|
|
"""
|
|
Instances of particular traits (e.g. if the trait is a 'word',
|
|
this may be the score for a particular word).
|
|
"""
|
|
_TABLE_ = 'trait_instance'
|
|
_PRIMARY_KEYS_ = ['trait_inst_id']
|
|
_COLUMNS_ = [
|
|
'trait_inst_id',
|
|
'trait_id',
|
|
'score',
|
|
'count'
|
|
]
|
|
|
|
@property
|
|
def instance(self):
|
|
return 't%di%d' % (self.trait_id, self.trait_inst_id)
|
|
|
|
|
|
class TraitInstanceString(TraitInstance):
|
|
"""
|
|
Trait instance that is described by a string.
|
|
"""
|
|
_COLUMNS_ = TraitInstance._COLUMNS_ + [
|
|
'trait_string'
|
|
]
|
|
|
|
@property
|
|
def instance(self):
|
|
return self.trait_string
|
|
|
|
|
|
class TraitInstanceAvatarHash(TraitInstance):
|
|
"""
|
|
Trait instance that references an avatar hash.
|
|
"""
|
|
_COLUMNS_ = TraitInstance._COLUMNS_ + [
|
|
'trait_hash_id'
|
|
]
|
|
|
|
@property
|
|
def instance(self):
|
|
return self.trait_hash_id
|
|
|
|
|
|
class TraitInstancePair(TraitInstance):
|
|
"""
|
|
Instances of two other traits that appear together.
|
|
"""
|
|
_COLUMNS_ = TraitInstance._COLUMNS_ + [
|
|
'prev_id',
|
|
'next_id'
|
|
]
|
|
|
|
|
|
class UserTrait(Row):
|
|
"""
|
|
Traits linked to a particular user.
|
|
"""
|
|
_TABLE_ = 'user_trait'
|
|
_PRIMARY_KEYS_ = ['user_id', 'trait_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'trait_id',
|
|
'count'
|
|
]
|
|
|
|
|
|
class UserTraitInstance(Row):
|
|
"""
|
|
Trait instances linked to a particular user.
|
|
"""
|
|
_TABLE_ = 'user_trait_instance'
|
|
_PRIMARY_KEYS_ = ['user_id', 'trait_inst_id']
|
|
_COLUMNS_ = [
|
|
'user_id',
|
|
'trait_inst_id',
|
|
'count'
|
|
]
|