def get_single_article(url): d = PyQuery(url=url, encoding="utf-8") a = d('#artibody') a.remove('#left_hzh_ad') content = a.text() title = d('.main-title').text() if not title: return False source = d('.source').text() collection = Mongo().news db_result = collection.find_one({'spider_from': 'sina', 'url': url}) if db_result: return True insert_data = { 'type': 'articles', 'created_at': int(time.time()), 'author': '', 'spider_from': 'sina', 'source': source, 'source_id': -1, 'title': title, 'content': content, 'url': url, 'images': [], 'keywords': [], 'has_send': 0 } collection.insert(insert_data) return True
def get_data(token_name, url, api_url): collection = Mongo().github result = requests.get('{}?client_id={}&client_secret={}'.format(api_url, 'dcc3734066251548c999', '89d90ad41f32b18d2ed689cb21875b75e88a2d82')).json() if 'forks_count' not in result: # TODO record error result return token = collection.find_one({ 'token_name': token_name, 'github_url': url }) insert_data = { 'token_name': token_name, 'github_url': url, 'star': result['stargazers_count'], 'fork': result['forks_count'], 'watch': result['subscribers_count'], 'spider_time': time.time(), 'update_time': result['updated_at'], 'create_time': result['created_at'] } if token: token.update(insert_data) collection.save(token) else: collection.insert(insert_data)
def get_erc_transaction(): collection = Mongo().token p = 1 # 取前面150位 while p <= 3: p += 1 list_page = PyQuery(url='https://etherscan.io/tokens') tokens = list_page('tbody')('tr').items() for token in tokens: token_name = token('h5')('a').text() token_name = re.findall(r'\w+', token_name) token_name = token_name[-1].lower() href = 'https://etherscan.io' + token('h5')('a').attr('href') contract_address = href.split('/')[-1] if token_name in key_words: try: transaction = get_single_erc_transaction(contract_address) db_result = collection.find_one({'token_name': token_name}) if db_result: db_result.update({ 'transaction': transaction }) collection.save(db_result) else: collection.insert({ 'token_name': token_name, 'transaction': transaction }) except: print(contract_address)
def get_google_trend(key, token_id): # socks.set_default_proxy(socks.SOCKS5, "127.0.0.1", 1086) # temp_socket = socket.socket # socket.socket = socks.socksocket token, search_time = get_google_token(key) headers = { 'host': 'trends.google.com', 'User_Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36', 'Referfer': ('https://trends.google.com/trends/explore?q=' + key).encode('utf-8'), 'x-client-data': 'CJa2yQEIo7bJAQjBtskBCKmdygEIqKPKAQ==' } request_url = 'https://trends.google.com/trends/api/widgetdata/multiline?hl=zh-CN&tz=-480&req=%7B%22time%22:%22{}%22,%22resolution%22:%22DAY%22,%22locale%22:%22zh-CN%22,%22comparisonItem%22:%5B%7B%22geo%22:%7B%7D,%22complexKeywordsRestriction%22:%7B%22keyword%22:%5B%7B%22type%22:%22BROAD%22,%22value%22:%22{}%22%7D%5D%7D%7D%5D,%22requestOptions%22:%7B%22property%22:%22%22,%22backend%22:%22IZG%22,%22category%22:0%7D%7D&token={}&tz=-480'.format( search_time, key, token) result = requests.get(request_url, headers=headers).text[5:] result = json.loads(result) data = result['default']['timelineData'] # socket.socket = temp_socket collection = Mongo().google_trends db_result = collection.find_one({ 'token_id': token_id }) if db_result: db_result.update({ 'trends': data }) collection.save(db_result) return True collection.insert({ 'token_id': token_id, 'token_name': key, 'trends': data }) return True
def send_single_token_github(token_id, token_name): collection = Mongo().github db_result = collection.find_one({ 'token_name': token_name, }) if db_result: send_data = { "token_id": token_id, 'url': db_result['github_url'], 'star': db_result['star'], 'fork': db_result['fork'], 'user_count': db_result['watch'], 'code_hot': db_result['star'] } result = requests.post(conf['sync']['host'] + conf['sync']['git_update'], data=send_data) print(result.json())
def get_btc_holders(): collection = Mongo().token_address result = requests.get('https://api.blockchain.info/charts/my-wallet-n-users?format=json') if result.status_code == 200: values = result.json()['values'] values = values[-5:-1] for value in values: db_result = collection.find_one({ 'token_name': 'btc', 'time': value['x'] }) if not db_result: collection.insert({ 'token_name': 'btc', 'time': value['x'], 'address': value['y'] })
def get_user_info(token_name, username, token_id): try: collection = Mongo().twitter result = api.get_user(screen_name=username) result._json['token_name'] = token_name result._json['user_name'] = username result._json['token_id'] = token_id token = collection.find_one({ "token_id": token_id, "user_name": username }) if token: token.update(result._json) collection.save(token) else: collection.insert(result._json) except TweepError: pass
def sync_test_token_github(): tokens = get_test_tokens() collection = Mongo().github for token in tokens: db_result = collection.find_one({ 'token_name': token['ticker'].lower(), }) if db_result: send_data = { "token_id": token['token_id'], 'url': db_result['github_url'], 'star': db_result['star'], 'fork': db_result['fork'], 'user_count': db_result['watch'], 'code_hot': db_result['star'] } print('send test environment github') print(send_data) result = requests.post('http://47.52.103.240:18189' + conf['sync']['git_update'], data=send_data) print(result.json())
def send_test_token_info(): collection = Mongo().token tokens = get_test_tokens() for token in tokens: token_name = token['ticker'].lower() db_result = collection.find_one({ 'token_name': token_name }) if not db_result: continue data = { 'token_id': token['token_id'], 'transaction': db_result.get('transaction', 0), 'holders': db_result.get('address', 0), 'holders_increase': db_result.get('address_increase', 0) } try: result = requests.post('http://47.52.103.240:18189' + conf['sync']['token_info'], data) except: pass
def get_eth_holders(): collection = Mongo().token_address result = requests.get('https://etherscan.io/chart/address?output=csv') if result.status_code == 200: text = result.text values = text.split('\r\n')[-5:-1] for value in values: value = value.replace('"', '') value = value.split(',') address_time = int(value[1]) address = int(value[2]) db_result = collection.find_one({ 'token_name': 'eth', 'time': address_time }) if not db_result: collection.insert({ 'token_name': 'eth', 'time': address_time, 'address': address })
def get_transaction(): collection = Mongo().token dom = PyQuery(url='http://www.blocktivity.info/') lists = dom('.font_size_row').items() for _ in lists: token_name = _('td').eq(2)('a').text().lower() transaction = _('td').eq(3).text() transaction = list(filter(str.isdigit, transaction)) transaction = int(''.join(map(str, transaction))) db_result = collection.find_one({'token_name': token_name}) if db_result: db_result.update({ 'transaction': transaction }) collection.save(db_result) else: collection.insert({ 'token_name': token_name, 'transaction': transaction }) get_erc_transaction()
def statistic_tokens_address(): collection = Mongo().token tokens = get_tokens() for token in tokens: token_name = token['ticker'].lower() code, address, increase = statistic_token_address(token_name) if not code: address = 0 increase = 0 db_result = collection.find_one({'token_name': token_name}) if db_result: db_result.update({ 'address': address, 'address_increase': increase }) collection.save(db_result) else: collection.insert({ 'token_name': token_name, 'address': address, 'address_increase': increase })