class Okex_data(object): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) self.base_url = 'https://www.okex.com/api/v1/' self.headers = { "Content-type" : "application/x-www-form-urlencoded", } def tickers(self): request_url = self.base_url + 'tickers.do' res_json = common_fun.get_url_json(request_url, self.headers) insert_list = [] data_time = res_json['date'] for ticker in res_json['tickers']: insert_str = "INSERT INTO okex_tickers (date_time, currency_pair, high, low, last, sell, buy, vol)" insert_str += "VALUES (" + str(data_time) + ",'" + str(ticker['symbol']) + "'," + str( ticker['high']) + "," + str(ticker['low']) + "," + str(ticker['last']) + "," + str( ticker['sell']) + "," + str(ticker['buy']) + "," + str(ticker['vol']) + ");" insert_list.append(insert_str) try: self.db.execute_list(insert_list) except: print(insert_str) print('insert_list tickers err data_time = ', data_time)
class Bittrex(object): def __init__(self): self.base_url = 'https://bittrex.com/api/v1.1/public/' self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) def getmarketsummaries(self): request_url = self.base_url + 'getmarketsummaries' res_json = common_fun.get_url_json(request_url) res_json['updata_time'] = int(time.time()) insert_list = [] data_time = int(time.time()) for ticker in res_json['result']: market_name = ticker['MarketName'].split('-') token_name = market_name[1] + '_' + market_name[0] insert_str = "INSERT INTO bittrex_tickers (date_time, currency_pair, high, low, last, sell, buy, vol)" insert_str += "VALUES (" + str( data_time) + ",'" + token_name + "'," + str( ticker['High']) + "," + str(ticker['Low']) + "," + str( ticker['Last']) + "," + str(ticker['Ask']) + "," + str( ticker['Bid']) + "," + str(ticker['Volume']) + ");" insert_list.append(insert_str) try: self.db.execute_list(insert_list) except: print(insert_str) print('insert_list tickers err data_time = ', data_time)
class Bian_data(object): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) self.base_url = 'https://api.binance.com' self.quot_asset = ['BTC', 'ETH', 'USDT', 'BNB'] self.headers = { "Content-type": "application/x-www-form-urlencoded", 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36' } def tickers(self): request_url = self.base_url + '/api/v1/ticker/24hr' res_json = common_fun.get_url_json(request_url, self.headers) insert_list = [] data_time = int(time.time()) for currencys in res_json: currency_pair = '' for quot in self.quot_asset: if currencys['symbol'].endswith(quot): currency_pair = currencys['symbol'].replace(quot, '_' + quot) high = currencys['highPrice'] low = currencys['lowPrice'] last = currencys['lastPrice'] sell = currencys['askPrice'] buy = currencys['bidPrice'] vol = currencys['volume'] insert_str = "INSERT INTO bian_tickers (date_time, currency_pair, high, low, last, sell, buy, vol)" insert_str += "VALUES (" + str(data_time) + ",'" + currency_pair + "'," + str(high) + "," + str( low) + "," + str(last) + "," + str(sell) + "," + str(buy) + "," + str(vol) + ");" insert_list.append(insert_str) try: self.db.execute_list(insert_list) except: print(insert_str) print('insert_list tickers err data_time = ', data_time)
class HistorWeather(): def __init__(self): self.county_index = {} self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) self.insert_list = [] def city_index(self): url = 'http://tianqi.2345.com/js/citySelectData.js' res_str = common_fun.get_url_text(url, 'error.log') new_str = res_str.split('var provqx=new Array();')[1] for i in range(10, 44): new_str = new_str.replace('provqx[' + str(i) + ']=', '') new_str = new_str.replace('\n', '').replace('[\'', '').replace('\']', '') str_list = new_str.split('\r') for province in str_list: province_list = province.split(',') for citys in province_list: citys_list = citys.split('|') for county in citys_list: county_str_list = re.split('[- ]', county) try: if county_str_list[0].replace( '\'', '') == county_str_list[3].replace('\'', ''): self.county_index[county_str_list[ 2]] = county_str_list[0].replace('\'', '') except: print(county_str_list) with open('citys.txt', 'w') as f: f.write(str(self.county_index.keys())) f.write(str(len(self.county_index.keys()))) #json_city = json.loads(new_str) #print(json_city) def make_weather_url(self, key, year, month): weather_date = '' if year == 2016: if month < 3: weather_date = str(year) + str(month) url = config.weather_js_base + self.county_index[ key] + '_' + weather_date + '.js' elif month < 10: weather_date = str(year) + '0' + str(month) url = config.weather_js_base + weather_date + '/' + self.county_index[ key] + '_' + weather_date + '.js' else: weather_date = str(year) + str(month) url = config.weather_js_base + weather_date + '/' + self.county_index[ key] + '_' + weather_date + '.js' elif year < 2016: weather_date = str(year) + str(month) url = config.weather_js_base + self.county_index[ key] + '_' + weather_date + '.js' elif year > 2016: if year == 2018 and month < 7: if month < 10: weather_date = str(year) + '0' + str(month) else: weather_date = str(year) + str(month) else: if month < 10: weather_date = str(year) + '0' + str(month) else: weather_date = str(year) + str(month) url = config.weather_js_base + weather_date + '/' + self.county_index[ key] + '_' + weather_date + '.js' return url def analyze_data(self, res_text): try: res_text = res_text.replace('var weather_str=', '').replace(',{}', '') res_text = res_text[0:-1] res_dic = re.sub(r'(?!={|, )(\w*):', r'"\1":', res_text) res_dic = eval(res_dic) except: print(res_text) try: for tianqi in res_dic['tqInfo']: insert_str = "INSERT INTO weather_data (city_name, max_temperature, min_temperature, weather, wind_direction, wind_speed, date)" insert_str += "VALUES ('" + res_dic['city'] + "','" + str( tianqi['bWendu'] ) + "','" + str(tianqi['yWendu']) + "', '" + tianqi[ 'tianqi'] + "', '" + tianqi['fengxiang'] + "','" + tianqi[ 'fengli'] + "','" + tianqi['ymd'] + "')" self.insert_list.append(insert_str) except: print('dic err', res_text) def updata_to_mysql(self): if len(self.insert_list) > 0: try: self.db.execute_list(self.insert_list) self.insert_list.clear() except: print('insert err, len(insert_list) = ', len(self.insert_list)) with open('insert.txt', 'a+') as f: for line in self.insert_list: f.write(line) f.write('\n') def get_weather_data(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('start_time', recordDate) retry_url = [] for key in self.county_index.keys(): for year in range(2011, 2019): time.sleep(1) for month in range(1, 13): url = self.make_weather_url(key, year, month) if url == '': continue res_text = common_fun.get_url_text(url, 'error.log') if res_text == '': continue elif res_text == 503: retry_data = {'key': key, 'url': url} retry_url.append(retry_data) continue self.analyze_data(res_text) self.updata_to_mysql() for retry_data in retry_url: print(retry_data) res_text = common_fun.get_url_text(retry_data['url'], 'error.log') if res_text == '': continue self.analyze_data(res_text) self.updata_to_mysql() recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('end_time', recordDate)
class Erc20Data(): def __init__(self): self.db = Mysqldb(config.MySqlHost, config.MySqlUser, config.MySqlPasswd, config.MySqlDb, config.MySqlPort) #self.driver = webdriver.Chrome(executable_path = config.chromedriver) #由于网络原因,数据获取经常性的中断,所以需要分成多步来获取数据,原则上数据每天更新一次,每天0点启动脚本 #首先通过时间戳和token id生成erc20_data的唯一标识,并且给每条数据标记未获取 #通过数据库筛选出来所有未获取的数据得唯一标识和erc20_contract,并更新数据 def erc20_data_key(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('erc20_data_key', recordDate) timestamps = int(time.time()) sel_str = "SELECT id, erc20_contract from token_base WHERE erc20_contract <> ''" res_sel = self.db.select(sel_str) for token_bace in res_sel: insert_str = "INSERT INTO erc20_data (token_id, get_data_time, top100_detail)" insert_str += "VALUES (" + str( token_bace[0]) + "," + str(timestamps) + "," + '0' + ")" try: self.db.insert(insert_str) except Exception as e: print(insert_str) print('INSERT err internet_data, token_id = ', token_bace[0]) continue def open_driver(self): options = webdriver.FirefoxOptions() options.add_argument('--headless') driver = webdriver.Firefox(options=options) driver.set_page_load_timeout(20) driver.maximize_window() return driver def get_erc20_data(self): recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_erc20_data', recordDate) sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract from erc20_data as a, token_base as b WHERE (a.token_hold_num = -1 or a.token_tx_num = -1)AND a.token_id = b.id" while True: res_sel = self.db.select(sel_str) print(len(res_sel)) if len(res_sel) <= 0: break driver = self.open_driver() #driver = webdriver.Chrome(executable_path = config.chromedriver) for token_bace in res_sel: url = config.eth_token_url + token_bace[2] try: driver.get(url) except TimeoutException: driver.execute_script('window.stop()') try: token_holders = driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_divSummary"]/div[1]/table/tbody/tr[3]/td[2]' ).text token_holders = token_holders.strip().split(' ')[0] tx_num = driver.find_element_by_xpath( '//*[@id="totaltxns"]').text updata_str = "UPDATE erc20_data SET token_hold_num=" + token_holders + ", token_tx_num=" + str( tx_num) updata_str += " where token_id =" + str( token_bace[0]) + " and get_data_time=" + str( token_bace[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err internet_data, token_id = ', token_bace[0]) continue except: print('xpath err', url) driver.close() driver.service.stop() recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_erc20_data', recordDate) def get_top100_hold(self): sel_str = "SELECT a.token_id, a.get_data_time, b.erc20_contract, a.token_hold_num from erc20_data as a, token_base as b WHERE top100_detail = 0 AND a.token_id = b.id LIMIT 30" while True: res_sel = self.db.select(sel_str) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(len(res_sel), recordDate) if len(res_sel) <= 0: break driver = self.open_driver() #driver = webdriver.Chrome(executable_path = config.chromedriver) for token_bace in res_sel: url = config.eth_token_url + token_bace[2] try: driver.get(url) except TimeoutException: driver.execute_script('window.stop()') try: driver.find_element_by_xpath( '//*[@id="ContentPlaceHolder1_li_balances"]/a').click( ) except: print('click err', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) time.sleep(5) continue try: driver.switch_to_alert().accept() except: pass driver.switch_to.frame('tokeholdersiframe') xpath_str = '//*[@id="maintable"]/table/tbody/tr[2]/td[1]' try: element_present = EC.text_to_be_present_in_element( (By.XPATH, xpath_str), '1') WebDriverWait(driver, 20, 1).until(element_present) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) if len( driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr')) < 2: print('rank data len less') continue insert_list = [] for element in driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr'): try: rank = element.find_element_by_xpath( './td[1]').text.replace(',', '') address = element.find_element_by_xpath( './td/span').text.replace(',', '') quantity = element.find_element_by_xpath( './td[3]').text.replace(',', '') percentage = element.find_element_by_xpath( './td[4]').text.replace(',', '').replace('%', '') insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)" insert_str += "VALUES (" + str( token_bace[0] ) + "," + str(token_bace[1]) + "," + str( rank) + ",'" + str(address) + "'," + str( quantity) + "," + str( percentage) + ",'" + recordDate + "');" insert_list.append(insert_str) except: continue if token_bace[3] > 50: driver.find_element_by_xpath( '//*[@id="PagingPanel"]/a[3]').click() element_present = EC.text_to_be_present_in_element( (By.XPATH, xpath_str), '51') WebDriverWait(driver, 20, 1).until(element_present) for element in driver.find_elements_by_xpath( '//*[@id="maintable"]/table/tbody/tr'): try: rank = element.find_element_by_xpath( './td[1]').text.replace(',', '') address = element.find_element_by_xpath( './td/span').text.replace(',', '') quantity = element.find_element_by_xpath( './td[3]').text.replace(',', '') percentage = element.find_element_by_xpath( './td[4]').text.replace(',', '').replace( '%', '') insert_str = "INSERT INTO hold_top_100 (token_id, get_data_time, rank, address, quantity, percentage, creat_at)" insert_str += "VALUES (" + str( token_bace[0]) + "," + str( token_bace[1] ) + "," + str(rank) + ",'" + str( address ) + "'," + str(quantity) + "," + str( percentage) + ",'" + recordDate + "');" insert_list.append(insert_str) except Exception as e: continue if len(insert_list) == 100 or len( insert_list) == token_bace[3]: try: self.db.execute_list(insert_list) updata_str = "UPDATE erc20_data SET top100_detail=" + '1' updata_str += " where token_id =" + str( token_bace[0]) + " and get_data_time=" + str( token_bace[1]) try: self.db.update(updata_str) except Exception as e: print(updata_str) print('UPDATE err updata_str, token_id = ', token_bace[0]) except Exception as e: print('INSERT err internet_data, >50 token_id = ', token_bace[0]) except: print('Timed out waiting for page to load. token_id:', token_bace[0]) driver.close() driver.service.stop() time.sleep(5) recordDate = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print('get_top100_hold', recordDate)