示例#1
0
def test_main_server_unexpected_behavior(monkeypatch, requests_mock, caplog):
    monkeypatch.setattr("album.validate_input", lambda: MOCK_ALBUM_ID)
    requests_mock.get(URL.format(MOCK_ALBUM_ID), json=[])
    with pytest.raises(RuntimeError, match=SERVER_ERROR_MESSAGE):
        album.main()
    assert caplog.record_tuples == [("root", logging.ERROR,
                                    SERVER_ERROR_MESSAGE)]
示例#2
0
def test_main_expected_behavior_with_loop(monkeypatch, requests_mock, capsys):
    inputs = [-1, MOCK_ALBUM_ID]
    monkeypatch.setattr("album.validate_input", lambda: inputs.pop(0))
    requests_mock.get(URL.format(MOCK_ALBUM_ID), json=MOCK_JSON)
    album.main()
    captured = capsys.readouterr()
    assert captured.out == MOCK_OUTPUT
示例#3
0
 def get_unique_tweets(self, data_dict):
     # TODO: Implement filter to check if Tweet text starts with 'RT'
     """
     :param data_dict:
     :return:
     """
     flag = False
     try:
         text = data_dict['text'].encode('ascii', 'ignore').lower()
         # Check for 'retweeted_status' in metadata field to determine
         # if tweet is a retweet (1st check)
         if 'retweeted_status' not in data_dict:
             url_match = URL.match(text)
             # Check if link contains url
             if url_match:
                 match_group = url_match.group()
                 if len(self.key_list) > 0:
                     if any(match_group in item for item in self.key_list):
                         flag = True
                     if flag is False:
                         data_dict['text'] = match_group
                         print "Inserted text: " + data_dict['text'] + '\n'
                         self.key_list.append(match_group)
                         sid = SentimentIntensityAnalyzer()
                         ss = sid.polarity_scores(text)
                         print ss['compound']
                         score = ss['compound']
                         if score < 0:
                             score += (3 * score)
                         for w in GOOGLE:
                             if w in text and self.google_price >= 0:
                                 self.google_price = score
                                 self.google_text = text
                         for w in MICROSOFT:
                             if w in text and self.microsoft_price >= 0:
                                 self.microsoft_price = score
                                 self.microsoft_text = text
                         for w in FACEBOOK:
                             if w in text and self.facebook_price >= 0:
                                 self.facebook_price = score
                                 self.facebook_text = text
                         p.trigger('test_channel', 'my_event',
                                   {'google': self.google_price,
                                    'microsoft': self.microsoft_price,
                                    'facebook': self.facebook_price})
                         p.trigger('tweet_channel', 'my_event',
                                   {
                                       'google_text': self.google_text,
                                       'microsoft_text': self.microsoft_text,
                                       'facebook_text' : self.facebook_text
                                   })
                         self.google_price = 0
                         self.microsoft_price = 0
                         self.facebook_price = 0
                 else:
                     self.key_list.append(url_match.group())
     except TypeError, e:
         print >> sys.stderr, e
         self.log_error(str(e))
示例#4
0
 def get_unique_tweets(self, data_dict):
     # TODO: Implement filter to check if Tweet text starts with 'RT'
     """
     :param data_dict:
     :return:
     """
     flag = False
     try:
         text = data_dict['text'].encode('ascii', 'ignore').lower()
         # Check for 'retweeted_status' in metadata field to determine
         # if tweet is a retweet (1st check)
         if 'retweeted_status' not in data_dict:
             print "Number of tweets in collection: " + \
                   str(self.stream_filter.collection.count())
             url_match = URL.match(text)
             # Check if link contains url
             if url_match:
                 match_group = url_match.group()
                 if len(self.key_list) > 0:
                     if any(match_group in item for item in self.key_list):
                         flag = True
                     if flag is False:
                         data_dict['text'] = match_group
                         print "Inserted text: " + data_dict['text'] + '\n'
                         self.key_list.append(match_group)
                         self.stream_filter.collection.insert(data_dict)
                         if self.wtf is True:
                             if os.path.isfile(self.filename):
                                 with open(self.filename, 'a') as outfile:
                                     json.dump(data_dict['text'], outfile)
                                     outfile.write('\n')
                             else:
                                 with open(self.filename, 'w') as outfile:
                                     json.dump(data_dict['text'], outfile)
                                     outfile.write('\n')
                 else:
                     self.key_list.append(url_match.group())
             else:
                 print "Inserted text: " + text
                 self.stream_filter.collection.insert(data_dict)
     except TypeError, e:
         print >> sys.stderr, e
         self.log_error(str(e))
示例#5
0
def run():
    for os in OS_LIST:
        page = START_PAGE

        while 1:
            # 构造url
            url = URL.format(os=os, page=page)
            # get请求
            res = requests.get(url)
            print(f'Fetch: {url}')
            root = html.fromstring(res.content)
            rs = root.xpath('//td[@class="useragent"]/a/text()')
            ua_list.extend(rs)

            if 50 > len(rs):
                break

            page += 1

            if page > END_PAGE:
                break
def make_req(kind, i):
    return grequests.get(URL.format(kind=kind, episode=i), callback=set_meta({"kind": kind, "i": i}))
def make_req(season, episode, act):
    return grequests.get(
        URL.format(season=season, episode=episode, act=act),
        callback=set_meta({"season": season, "episode": episode, "act": act}))