def import_price_data(self, ticker_list=None): ''' importing price data from database, imputing and conforming to weekday-standard (no date holes) out: <DataFrame> date_, yh_id, price ''' if ticker_list: ticker_sql = Misc().list_to_sql(ticker_list) else: ticker_sql = Misc().list_to_sql( self.__import_sector_constituents()) db = Connect('homedev') df = db.import_from_db( table='d_price', columns='*', condition= f"where yh_id in ({ticker_sql}) and date_ >= '{self.data_start_date}' and date_ <='{self.end_date}'" ) # format date_ col df = Dates().str_to_date(df, 'date_') df = self.__conform_price_data(df) return df
def __init__(self): self.mager = Mager() self.mager.init() self.conn = Connect() self.conn.connect('root', 'root', '172.17.0.4', 'ChinaNet') handlers = [(r"/get_result", ResultHandler, { 'mager': self.mager }), (r"/nlp", NlpHandler), (r"/nlp_process", NlpProcessHandler, { 'mager': self.mager }), (r"/", IndexHandler), (r"/index", IndexHandler), (r"/table", TableHandler), (r"/readlist", ReadListHandler, { 'conn': self.conn }), (r"/readtable", ReadTableHandler, { 'conn': self.conn }), (r"/getitem", GetItemHandler, { 'conn': self.conn }), (r"/additem", AddItemHandler, { 'conn': self.conn }), (r"/delitem", DelItemHandler, { 'conn': self.conn })] settings = dict( template_path=os.path.join(os.path.dirname(__file__), "templates"), static_path=os.path.join(os.path.dirname(__file__), "static"), debug=True, ) tornado.web.Application.__init__(self, handlers, **settings)
def start(**kwargs): """""" t = time.time() sql = kwargs.get('sql') if not sql: raise Exception('必须传入一个sql参数') args = extract(sql=sql) # 提取参数 # args = [{'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}, # {'customer_id': '13131-123151', 'phone': 95599}] print('-----start searching-----') html_list = multi_search(args_list=args, processes=2) # 查询参数 print('-----start parsing-----') result = multi_parse(html_list, processes=4) # 多线程解析html db = Connect('ORACLE') # result = [{ # # 'ID': 100001, # 'CUSTOMER_ID': 'aa', # 'PHONE': 'aa', # 'ADDRESS': 'aa', # 'ENGINE': 'aa', # 'LABEL_USER': '******', # 'LABEL_NUMBER': 'aa', # 'LABEL_TYPE': 'aa', # 'LABEL': 'aa', # 'TIP': 'aa', # 'SOURCE_URL': 'aa', # 'ABSTRACT': 'aa', # 'CREATE_TIME': '', # 'STATUS': 0}, # { # # 'ID': 100002, # 'CUSTOMER_ID': 'aa', # 'PHONE': 'aa', # 'ADDRESS': 'aa', # 'ENGINE': 'aa', # 'LABEL_USER': '******', # 'LABEL_NUMBER': 'aa', # 'LABEL_TYPE': 'aa', # 'LABEL': 'aa', # 'TIP': 'aa', # 'SOURCE_URL': 'aa', # 'ABSTRACT': 'aa', # 'CREATE_TIME': '', # 'STATUS': 0} # ] result = db.add_index(result) print('-----writing to database-----') db.write_sql(result, OUTPUT_TABLE) # 插入数据库 print('Finished:', time.time() - t)
def test_delete(self): db = Connect().get_db() oid = db['blocks'].insert_one({}).inserted_id block = Block(oid, db) n = 100 ids = [] points = [] for i in range(n): ids.append(i) points.append((i**2) % 23) block.add_user(str(ids[i]), int(points[i])) ids = list(map(str, ids)) for i in range(n): if len(ids) <= i: break if np.random.uniform() < 0.2: uid = ids[i] ids.pop(i) points.pop(i) block.delete_user(uid) tmp = list(zip(points, ids)) tmp = sorted(tmp, reverse=True) t_points, t_ids = list(zip(*tmp)) t_vals = (list(t_ids), list(t_points)) tmp = block.get_users() p_ids = [e['id'] for e in tmp] p_points = [e['points'] for e in tmp] p_vals = (p_ids, p_points) self.assertEqual(t_vals, p_vals)
def test_add_user(self): db = Connect().get_db() oid = db['blocks'].insert_one({}).inserted_id block = Block(oid, db) n = 100 ids = [] points = [] for i in range(n): ids.append(i) points.append((i**2) % 23) block.add_user(str(ids[i]), int(points[i])) ids = list(map(str, ids)) tmp = list(zip(points, ids)) tmp = sorted(tmp, reverse=True) t_points, t_ids = list(zip(*tmp)) t_vals = (list(t_ids), list(t_points)) tmp = block.get_users() p_ids = [e['id'] for e in tmp] p_points = [e['points'] for e in tmp] p_vals = (p_ids, p_points) #pdb.set_trace(); self.assertEqual(t_vals, p_vals)
def __import_sector_constituents(self): ''' importing s&p500 const from db out: <List> ''' db = Connect('homedev') if self.sector == 'ALL': df = db.import_from_db( table='sp500_constituents', columns='*', ) else: df = db.import_from_db(table='sp500_constituents', columns='*', condition=f"where sector = '{self.sector}'") return df['yh_id'].to_list()
def test_combine(self): db = Connect().get_db() blocks = [] all_ids = [] all_points = [] n = 7 for i in range(n): blocks.append(db['blocks'].insert_one({}).inserted_id) for j in range(n): n_each = np.random.randint(10, 15) ids = np.random.randint(low=0, high=10000, size=n_each) points = np.random.randint(low=0, high=10000, size=n_each) block = Block(blocks[j], db) for i in range(n_each): block.add_user(str(ids[i]), int(points[i])) all_ids += list(ids) all_points += list(points) all_ids = list(map(str, all_ids)) tmp = list(zip(all_points, all_ids)) tmp = sorted(tmp, reverse=True) points, ids = list(zip(*tmp)) points, ids = list(points), list(ids) b = Block(blocks[0], db) for i in range(1, len(blocks)): Block(blocks[i], db).combine(blocks[0]) t_vals = (ids, points) tmp = b.get_users() p_ids = [e['id'] for e in tmp] p_points = [e['points'] for e in tmp] p_vals = (p_ids, p_points) self.assertEqual(t_vals, p_vals)
def test_divide(self): db = Connect().get_db() oid = db['blocks'].insert_one({}).inserted_id block = Block(oid, db) n = 53 block_size = 13 ids = np.random.randint(low=0, high=10000, size=n) points = np.random.randint(low=0, high=10000, size=n) for i in range(n): block.add_user(str(ids[i]), int(points[i])) new_ids = block.divide(block_size) new_ids = [oid] + new_ids ids = list(map(str, ids)) tmp = list(zip(points, ids)) tmp = sorted(tmp, reverse=True) points, ids = list(zip(*tmp)) points, ids = list(points), list(ids) t_vals = [] p_vals = [] for i in range(n // block_size + 1): l = i * block_size r = min((i + 1) * block_size, n) t_vals.append((ids[l:r], points[l:r])) b = Block(new_ids[i], db) tmp = b.get_users() p_ids = [e['id'] for e in tmp] p_points = [e['points'] for e in tmp] p_vals.append((p_ids, p_points)) self.assertEqual(t_vals, p_vals)
def extract_args(sql): """extract args from database by sql""" db = Connect('oracle') data = db.read_sql(sql=sql) db.engine.close() return data
def delete_all_db(): c = Connect() for db_name in c.get_db_names(): if db_name == 'admin' or db_name == 'local': continue c.client.drop_database(db_name)
import pandas as pd import json import os import time from database import Connect connection = Connect.get_connection() db = connection.webvision def import_content(filepath, symbol): start_time = time.time() collection_name = symbol db_cm = db[collection_name] cdir = os.path.dirname(__file__) file_res = os.path.join(cdir, filepath) try: print('yes it is called') data = pd.read_csv(file_res) data_json = json.loads(data.to_json(orient='records')) db[symbol].drop() db_cm.insert_many(data_json) print("---------%s seconds" % (time.time() - start_time)) except Exception as e: print(f'error: {e}') # filepath = 'csv/BTCUSDT-1m-data.csv' # import_content(filepath, filepath.split('/')[1].split('-')[0])
from flask import Flask from flask import request from database import Connect from Block import Block from bson import ObjectId import pdb import time from flask import jsonify from flask import Response import json import uuid app = Flask(__name__) database = Connect() all_countries = 'all_countries' EPS = 1e-6 sizes = { 'small': 1000, 'mid': 3000, 'large': 10000 } @app.route('/leaderboard', methods=['GET']) def get_leaderboard(): return __leaderboard(database.get_db(all_countries)) @app.route('/leaderboard/<string:country>', methods=['GET']) def get_country_leaderboard(country):
import os from datetime import datetime from jinja2 import Environment, FileSystemLoader from database import Connect, Query template_search_dir = os.getcwd() output_file_dir = os.getcwd() + "/output/" timestamp = datetime.utcnow().strftime('%s') file_output_name = "importa_participante_" + timestamp f = open(output_file_dir + file_output_name, 'a') con = Connect(user='******', password='******', hostname='200.160.11.109', db='ptt') cursor = con.mariadb().cursor() q_cobranca = Query(['*']) cursor.execute(q_cobranca.ptt_info_cobranca()) p_dict = dict() participante_number = 0 template_env = Environment(loader=FileSystemLoader(template_search_dir)) c = template_env.get_template('template_insere_as') for p in cursor.fetchall(): c_participante = con.mariadb().cursor() c_participante.execute(q_cobranca.ptt_participante_ix(p[0])) ix_query_res = c_participante.fetchall() c_participante.close()