示例#1
0
 def parse_result(self, rev):
     date = rev['cl_timestamp']
     date = date.isoformat() + 'Z'
     namespace = NAMESPACE_MAP.get(rev['rc_namespace'])
     if namespace and rev['rc_namespace'] > 0:
         title = namespace + ':' + rev['rc_title']
     else:
         title = rev['rc_title']
     if namespace is not None:
         url = 'https://%s/wiki/%s' % (self.wiki, title.replace(' ', '_'))
     else:
         # Use curid because we don't know the namespace
         url = 'https://%s/w/index.php?curid=%s' % (self.wiki,
                                                    rev['rc_cur_id'])
     ret = {
         'date': date,
         'url': url,
         'title': title.replace('_', ' '),
         'category': self.category
     }
     ret['created_at'] = date
     ret['meta'] = {
         'id': url_to_uuid5(ret['url']),
         'timestamp': iso8601_to_epoch(date)
     }
     return ret
示例#2
0
 def parse_result(self, rev):
     date = datetime.datetime.strptime(rev['rc_timestamp'], '%Y%m%d%H%M%S')
     date = date.isoformat() + 'Z'
     if not rev['rc_new_len']:
         rev['rc_new_len'] = 0
     if not rev['rc_old_len']:
         rev['rc_old_len'] = 0
     ret = {
         'date':
         date,
         'url':
         'https://%s/w/index.php?diff=%s&oldid=%s' %
         (self.wiki, int(rev['rc_this_oldid']), int(rev['rc_last_oldid'])),
         'user':
         rev['rc_user_text'],
         'size':
         rev['rc_new_len'] - rev['rc_old_len'],
         'comment':
         rev['rc_comment'],
         'title':
         rev['rc_title'].replace('_', ' ')
     }
     ret['created_at'] = date
     ret['meta'] = {
         'id': url_to_uuid5(ret['url']),
         'timestamp': iso8601_to_epoch(date)
     }
     return ret
示例#3
0
 def parse_result(self, rev):
     date = datetime.datetime.strptime(rev['rc_timestamp'], '%Y%m%d%H%M%S')
     date = date.isoformat() + 'Z'
     tags = find_hashtags(rev['rc_comment'])
     ret = {
         'raw_tags':
         tags,
         'input_hashtag':
         self.tag,
         'return_hashtags':
         ' '.join(tags),
         'date':
         date,
         'url':
         'https://%s/w/index.php?diff=%s&oldid=%s' %
         (self.wiki, int(rev['rc_this_oldid']), int(rev['rc_last_oldid'])),
         'user':
         rev['rc_user_text'],
         'size':
         rev['rc_new_len'] - rev['rc_old_len'],
         'comment':
         rev['rc_comment'],
         'title':
         rev['rc_title']
     }
     ret['created_at'] = date
     ret['meta'] = {
         'id': url_to_uuid5(ret['url']),
         'timestamp': iso8601_to_epoch(date)
     }
     return ret
示例#4
0
 def parse_result(self, result):
     # Get the timestamp of when the query is runned in the specified format
     timestamp = "{:%Y-%m-%dT%H:%M:%SZ}".format(datetime.datetime.now())
     meta_id = url_to_uuid5(result['user'])
     created_at = result['date']
     ts = iso8601_to_epoch(timestamp)
     return {'created_at': created_at,
             'meta': {'id': meta_id, 'timestamp': ts}}
示例#5
0
 def parse_result(self, result):
     meta_id = url_to_uuid5(result['url'])
     created_at = result['date']
     ts = iso8601_to_epoch(result['date'])
     return {
         'created_at': created_at,
         'meta': {
             'id': meta_id,
             'timestamp': ts
         }
     }
示例#6
0
 def parse_entry(self, entry):
     """Parse a single feed entry into an IFTTT trigger item."""
     # Not sure why, but sometimes we get http entry IDs. If we
     # don't have consistency between https/http, we get mutliple
     # unique UUIDs for the same entry.
     meta_id = url_to_uuid5(entry.id.replace('http:', 'https:'))
     date = entry.published_parsed
     created_at = utc_to_iso8601(date)
     ts = utc_to_epoch(date)
     return {'created_at': created_at,
             'entry_id': meta_id,
             'url': entry.id,
             'meta': {'id': meta_id, 'timestamp': ts}}
示例#7
0
 def parse_entry(self, entry):
     """Parse a single feed entry into an IFTTT trigger item."""
     # Not sure why, but sometimes we get http entry IDs. If we
     # don't have consistency between https/http, we get mutliple
     # unique UUIDs for the same entry.
     meta_id = url_to_uuid5(entry.id.replace('http:', 'https:'))
     date = entry.published_parsed
     created_at = utc_to_iso8601(date)
     ts = utc_to_epoch(date)
     return {'created_at': created_at,
             'entry_id': meta_id,
             'url': entry.id,
             'meta': {'id': meta_id, 'timestamp': ts}}
示例#8
0
 def parse_result(self, rev):
     date = datetime.datetime.strptime(rev['rc_timestamp'], '%Y%m%d%H%M%S')
     date = date.isoformat() + 'Z'
     ret = {'date': date,
            'url': 'https://%s/w/index.php?diff=%s&oldid=%s' %
                   (self.wiki,
                    int(rev['rc_this_oldid']),
                    int(rev['rc_last_oldid'])),
            'user': rev['rc_user_text'],
            'size': rev['rc_new_len'] - rev['rc_old_len'],
            'comment': rev['rc_comment'],
            'title': rev['rc_title']}
     ret['created_at'] = date
     ret['meta'] = {'id': url_to_uuid5(ret['url']),
                    'timestamp': iso8601_to_epoch(date)}
     return ret
示例#9
0
 def parse_result(self, rev):
     date = rev['cl_timestamp']
     date = date.isoformat() + 'Z'
     namespace = NAMESPACE_MAP.get(rev['rc_namespace'])
     if namespace and rev['rc_namespace'] > 0:
         title = namespace + ':' + rev['rc_title']
     else:
         title = rev['rc_title']
     if namespace is not None:
         url = 'https://%s/wiki/%s' % (self.wiki, title.replace(' ', '_'))
     else:
         # Use curid because we don't know the namespace
         url = 'https://%s/w/index.php?curid=%s' % (self.wiki, rev['rc_cur_id'])
     ret = {'date': date,
            'url': url,
            'title': title.replace('_', ' '),
            'category' : self.category}
     ret['created_at'] = date
     ret['meta'] = {'id': url_to_uuid5(ret['url']),
                    'timestamp': iso8601_to_epoch(date)}
     return ret
示例#10
0
 def parse_result(self, page):
     url = "https://en.wikipedia.org/wiki/%s?referrer=ifttt-trending"%page['title'].replace(' ', '_')
     updated = page['updated'][0:19] + 'Z'
     try:
         thumbUrl = page['thumbnail']['source']
     except KeyError:
         thumbUrl = DEFAULT_IMAGE
     return {
         'thumbURL': thumbUrl,
         'bias': page['bias'],
         'tags': page['tags'],
         'title': page['title'],
         'url': url,
         'score': page['trendiness'],
         'date': updated,
         'since': page['start'][0:19] + 'Z',
         'edits': page['edits'],
         'editors': len(page['contributors']),
         'meta': {'id': url_to_uuid5(url),
              'timestamp': iso8601_to_epoch(updated)},
     }
示例#11
0
 def parse_result(self, page):
     url = "https://en.wikipedia.org/wiki/%s" % page['title'].replace(
         ' ', '_')
     updated = page['updated'][0:19] + 'Z'
     try:
         thumbUrl = page['thumbnail']['source']
     except KeyError:
         thumbUrl = 'https://upload.wikimedia.org/wikipedia/commons/6/63/Wikipedia-logo.png'
     return {
         'thumbURL': thumbUrl,
         'bias': page['bias'],
         'tags': page['tags'],
         'title': page['title'],
         'url': url,
         'score': page['trendiness'],
         'date': updated,
         'since': page['start'][0:19] + 'Z',
         'edits': page['edits'],
         'editors': len(page['contributors']),
         'meta': {
             'id': url_to_uuid5(url),
             'timestamp': iso8601_to_epoch(updated)
         },
     }
示例#12
0
 def parse_result(self, page):
     url = "https://en.wikipedia.org/wiki/%s?referrer=ifttt-trending" % page[
         'title'].replace(' ', '_')
     updated = page['updated'][0:19] + 'Z'
     try:
         thumbUrl = page['thumbnail']['source']
     except KeyError:
         thumbUrl = DEFAULT_IMAGE
     return {
         'thumbURL': thumbUrl,
         'bias': page['bias'],
         'tags': page['tags'],
         'title': page['title'],
         'url': url,
         'score': page['trendiness'],
         'date': updated,
         'since': page['start'][0:19] + 'Z',
         'edits': page['edits'],
         'editors': len(page['contributors']),
         'meta': {
             'id': url_to_uuid5(url),
             'timestamp': iso8601_to_epoch(updated)
         },
     }
示例#13
0
 def parse_result(self, result):
     meta_id = url_to_uuid5(result['url'])
     created_at = result['date']
     ts = iso8601_to_epoch(result['date'])
     return {'created_at': created_at,
             'meta': {'id': meta_id, 'timestamp': ts}}