示例#1
0
    def import_price_data(self, ticker_list=None):
        '''
            importing price data from database, imputing and conforming to weekday-standard (no date holes)

            out:
            <DataFrame>
                date_, yh_id, price
        '''

        if ticker_list:
            ticker_sql = Misc().list_to_sql(ticker_list)
        else:
            ticker_sql = Misc().list_to_sql(
                self.__import_sector_constituents())

        db = Connect('homedev')
        df = db.import_from_db(
            table='d_price',
            columns='*',
            condition=
            f"where yh_id in ({ticker_sql}) and date_ >= '{self.data_start_date}' and date_ <='{self.end_date}'"
        )

        # format date_ col
        df = Dates().str_to_date(df, 'date_')

        df = self.__conform_price_data(df)

        return df
示例#2
0
 def __init__(self):
     self.mager = Mager()
     self.mager.init()
     self.conn = Connect()
     self.conn.connect('root', 'root', '172.17.0.4', 'ChinaNet')
     handlers = [(r"/get_result", ResultHandler, {
         'mager': self.mager
     }), (r"/nlp", NlpHandler),
                 (r"/nlp_process", NlpProcessHandler, {
                     'mager': self.mager
                 }), (r"/", IndexHandler), (r"/index", IndexHandler),
                 (r"/table", TableHandler),
                 (r"/readlist", ReadListHandler, {
                     'conn': self.conn
                 }), (r"/readtable", ReadTableHandler, {
                     'conn': self.conn
                 }), (r"/getitem", GetItemHandler, {
                     'conn': self.conn
                 }), (r"/additem", AddItemHandler, {
                     'conn': self.conn
                 }), (r"/delitem", DelItemHandler, {
                     'conn': self.conn
                 })]
     settings = dict(
         template_path=os.path.join(os.path.dirname(__file__), "templates"),
         static_path=os.path.join(os.path.dirname(__file__), "static"),
         debug=True,
     )
     tornado.web.Application.__init__(self, handlers, **settings)
示例#3
0
文件: run.py 项目: H2OSIR/spider
def start(**kwargs):
    """"""
    t = time.time()
    sql = kwargs.get('sql')
    if not sql:
        raise Exception('必须传入一个sql参数')
    args = extract(sql=sql)  # 提取参数
    # args = [{'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599},
    #         {'customer_id': '13131-123151', 'phone': 95599}]
    print('-----start searching-----')
    html_list = multi_search(args_list=args, processes=2)  # 查询参数
    print('-----start parsing-----')
    result = multi_parse(html_list, processes=4)  # 多线程解析html
    db = Connect('ORACLE')
    # result = [{
    #     # 'ID': 100001,
    #     'CUSTOMER_ID': 'aa',
    #     'PHONE': 'aa',
    #     'ADDRESS': 'aa',
    #     'ENGINE': 'aa',
    #     'LABEL_USER': '******',
    #     'LABEL_NUMBER': 'aa',
    #     'LABEL_TYPE': 'aa',
    #     'LABEL': 'aa',
    #     'TIP': 'aa',
    #     'SOURCE_URL': 'aa',
    #     'ABSTRACT': 'aa',
    #     'CREATE_TIME': '',
    #     'STATUS': 0},
    #         {
    #             # 'ID': 100002,
    #             'CUSTOMER_ID': 'aa',
    #             'PHONE': 'aa',
    #             'ADDRESS': 'aa',
    #             'ENGINE': 'aa',
    #             'LABEL_USER': '******',
    #             'LABEL_NUMBER': 'aa',
    #             'LABEL_TYPE': 'aa',
    #             'LABEL': 'aa',
    #             'TIP': 'aa',
    #             'SOURCE_URL': 'aa',
    #             'ABSTRACT': 'aa',
    #             'CREATE_TIME': '',
    #             'STATUS': 0}
    #     ]
    result = db.add_index(result)
    print('-----writing to database-----')
    db.write_sql(result, OUTPUT_TABLE)  # 插入数据库
    print('Finished:', time.time() - t)
示例#4
0
    def test_delete(self):
        db = Connect().get_db()
        oid = db['blocks'].insert_one({}).inserted_id
        block = Block(oid, db)

        n = 100
        ids = []
        points = []

        for i in range(n):
            ids.append(i)
            points.append((i**2) % 23)
            block.add_user(str(ids[i]), int(points[i]))
        ids = list(map(str, ids))

        for i in range(n):
            if len(ids) <= i: break
            if np.random.uniform() < 0.2:
                uid = ids[i]
                ids.pop(i)
                points.pop(i)
                block.delete_user(uid)

        tmp = list(zip(points, ids))
        tmp = sorted(tmp, reverse=True)

        t_points, t_ids = list(zip(*tmp))
        t_vals = (list(t_ids), list(t_points))

        tmp = block.get_users()
        p_ids = [e['id'] for e in tmp]
        p_points = [e['points'] for e in tmp]
        p_vals = (p_ids, p_points)

        self.assertEqual(t_vals, p_vals)
示例#5
0
    def test_add_user(self):
        db = Connect().get_db()
        oid = db['blocks'].insert_one({}).inserted_id
        block = Block(oid, db)

        n = 100
        ids = []
        points = []

        for i in range(n):
            ids.append(i)
            points.append((i**2) % 23)
            block.add_user(str(ids[i]), int(points[i]))

        ids = list(map(str, ids))
        tmp = list(zip(points, ids))
        tmp = sorted(tmp, reverse=True)

        t_points, t_ids = list(zip(*tmp))
        t_vals = (list(t_ids), list(t_points))

        tmp = block.get_users()
        p_ids = [e['id'] for e in tmp]
        p_points = [e['points'] for e in tmp]
        p_vals = (p_ids, p_points)

        #pdb.set_trace();

        self.assertEqual(t_vals, p_vals)
示例#6
0
    def __import_sector_constituents(self):
        '''
            importing s&p500 const from db

            out:
                <List>
        '''
        db = Connect('homedev')
        if self.sector == 'ALL':
            df = db.import_from_db(
                table='sp500_constituents',
                columns='*',
            )
        else:
            df = db.import_from_db(table='sp500_constituents',
                                   columns='*',
                                   condition=f"where sector = '{self.sector}'")

        return df['yh_id'].to_list()
示例#7
0
    def test_combine(self):
        db = Connect().get_db()

        blocks = []
        all_ids = []
        all_points = []

        n = 7
        for i in range(n):
            blocks.append(db['blocks'].insert_one({}).inserted_id)

        for j in range(n):
            n_each = np.random.randint(10, 15)

            ids = np.random.randint(low=0, high=10000, size=n_each)
            points = np.random.randint(low=0, high=10000, size=n_each)

            block = Block(blocks[j], db)
            for i in range(n_each):
                block.add_user(str(ids[i]), int(points[i]))

            all_ids += list(ids)
            all_points += list(points)

        all_ids = list(map(str, all_ids))
        tmp = list(zip(all_points, all_ids))
        tmp = sorted(tmp, reverse=True)
        points, ids = list(zip(*tmp))
        points, ids = list(points), list(ids)

        b = Block(blocks[0], db)
        for i in range(1, len(blocks)):
            Block(blocks[i], db).combine(blocks[0])

        t_vals = (ids, points)

        tmp = b.get_users()
        p_ids = [e['id'] for e in tmp]
        p_points = [e['points'] for e in tmp]
        p_vals = (p_ids, p_points)

        self.assertEqual(t_vals, p_vals)
示例#8
0
    def test_divide(self):
        db = Connect().get_db()
        oid = db['blocks'].insert_one({}).inserted_id
        block = Block(oid, db)

        n = 53

        block_size = 13

        ids = np.random.randint(low=0, high=10000, size=n)
        points = np.random.randint(low=0, high=10000, size=n)

        for i in range(n):
            block.add_user(str(ids[i]), int(points[i]))

        new_ids = block.divide(block_size)
        new_ids = [oid] + new_ids

        ids = list(map(str, ids))
        tmp = list(zip(points, ids))
        tmp = sorted(tmp, reverse=True)
        points, ids = list(zip(*tmp))
        points, ids = list(points), list(ids)

        t_vals = []
        p_vals = []
        for i in range(n // block_size + 1):
            l = i * block_size
            r = min((i + 1) * block_size, n)

            t_vals.append((ids[l:r], points[l:r]))

            b = Block(new_ids[i], db)
            tmp = b.get_users()
            p_ids = [e['id'] for e in tmp]
            p_points = [e['points'] for e in tmp]
            p_vals.append((p_ids, p_points))

        self.assertEqual(t_vals, p_vals)
示例#9
0
文件: args.py 项目: H2OSIR/spider
def extract_args(sql):
    """extract args from database by sql"""
    db = Connect('oracle')
    data = db.read_sql(sql=sql)
    db.engine.close()
    return data
示例#10
0
def delete_all_db():
    c = Connect()
    for db_name in c.get_db_names():
        if db_name == 'admin' or db_name == 'local': continue
        c.client.drop_database(db_name)
示例#11
0
import pandas as pd
import json
import os
import time

from database import Connect

connection = Connect.get_connection()
db = connection.webvision


def import_content(filepath, symbol):
    start_time = time.time()
    collection_name = symbol
    db_cm = db[collection_name]
    cdir = os.path.dirname(__file__)
    file_res = os.path.join(cdir, filepath)
    try:
        print('yes it is called')
        data = pd.read_csv(file_res)
        data_json = json.loads(data.to_json(orient='records'))
        db[symbol].drop()
        db_cm.insert_many(data_json)
        print("---------%s seconds" % (time.time() - start_time))
    except Exception as e:
        print(f'error: {e}')


# filepath = 'csv/BTCUSDT-1m-data.csv'
# import_content(filepath, filepath.split('/')[1].split('-')[0])
示例#12
0
from flask import Flask
from flask import request
from database import Connect
from Block import Block
from bson import ObjectId
import pdb
import time
from flask import jsonify
from flask import Response
import json
import uuid

app = Flask(__name__)
database = Connect()
all_countries = 'all_countries'
EPS = 1e-6

sizes = {
    'small': 1000,
    'mid': 3000,
    'large': 10000
}


@app.route('/leaderboard', methods=['GET'])
def get_leaderboard():
    return __leaderboard(database.get_db(all_countries))


@app.route('/leaderboard/<string:country>', methods=['GET'])
def get_country_leaderboard(country):
示例#13
0
import os
from datetime import datetime

from jinja2 import Environment, FileSystemLoader

from database import Connect, Query

template_search_dir = os.getcwd()
output_file_dir = os.getcwd() + "/output/"
timestamp = datetime.utcnow().strftime('%s')

file_output_name = "importa_participante_" + timestamp
f = open(output_file_dir + file_output_name, 'a')

con = Connect(user='******',
              password='******',
              hostname='200.160.11.109',
              db='ptt')
cursor = con.mariadb().cursor()
q_cobranca = Query(['*'])
cursor.execute(q_cobranca.ptt_info_cobranca())
p_dict = dict()
participante_number = 0

template_env = Environment(loader=FileSystemLoader(template_search_dir))
c = template_env.get_template('template_insere_as')

for p in cursor.fetchall():
    c_participante = con.mariadb().cursor()
    c_participante.execute(q_cobranca.ptt_participante_ix(p[0]))
    ix_query_res = c_participante.fetchall()
    c_participante.close()