Exemplo n.º 1
1
def get_link(url):
	link_exr = re.compile(r'<a.*?\s*href=\"(.*?)\".*?>(.*?)</a>')
	links = []
	
	# open web content
	f = urllib2.urlopen(url)
	content = f.read()
	
	# versi find html tag : find all url and save to links
	# soup = BeautifulSoup(content, "lxml")
	# for a in soup.find_all('a', href=True):
	# 	if "detik.com" in a['href']:
	# 		if "http:" not in a['href']:
	# 			a['href'] = "http:" + a['href']
	# 		print "Found the URL:", a['href']
	# 		links.append(a['href'])
			
	# versi regex : find all url and save to links			
	for link in link_exr.findall(content):
		if "detik.com" in link[0]:
			link_detik = link[0]
			if "http:" not in link_detik:
				link_detik = "http:" + link_detik
			links.append(link_detik)
	
	# save to DataFrame
	df = DataFrame(links, columns=['detik url'])
	df.drop_duplicates()

	print df.head(0)
		# create and save to sqlite database
	detik_db = create_engine("mysql://*****:*****@localhost/data_detik") 
	df.to_sql('url_detik', detik_db, if_exists='replace')
Exemplo n.º 2
1
class WriteSQLDtypes(object):

    goal_time = 0.2
    params = (['sqlalchemy', 'sqlite'],
              ['float', 'float_with_nan', 'string', 'bool', 'int', 'datetime'])
    param_names = ['connection', 'dtype']

    def setup(self, connection, dtype):
        N = 10000
        con = {'sqlalchemy': create_engine('sqlite:///:memory:'),
               'sqlite': sqlite3.connect(':memory:')}
        self.table_name = 'test_type'
        self.query_col = 'SELECT {} FROM {}'.format(dtype, self.table_name)
        self.con = con[connection]
        self.df = DataFrame({'float': np.random.randn(N),
                             'float_with_nan': np.random.randn(N),
                             'string': ['foo'] * N,
                             'bool': [True] * N,
                             'int': np.random.randint(0, N, size=N),
                             'datetime': date_range('2000-01-01',
                                                    periods=N,
                                                    freq='s')},
                            index=tm.makeStringIndex(N))
        self.df.loc[1000:3000, 'float_with_nan'] = np.nan
        self.df['datetime_string'] = self.df['datetime'].astype(str)
        self.df.to_sql(self.table_name, self.con, if_exists='replace')

    def time_to_sql_dataframe_column(self, connection, dtype):
        self.df[[dtype]].to_sql('test1', self.con, if_exists='replace')

    def time_read_sql_query_select_column(self, connection, dtype):
        read_sql_query(self.query_col, self.con)
Exemplo n.º 3
1
class ReadSQLTable(object):

    goal_time = 0.2

    def setup(self):
        N = 10000
        self.table_name = 'test'
        self.con = create_engine('sqlite:///:memory:')
        self.df = DataFrame({'float': np.random.randn(N),
                             'float_with_nan': np.random.randn(N),
                             'string': ['foo'] * N,
                             'bool': [True] * N,
                             'int': np.random.randint(0, N, size=N),
                             'datetime': date_range('2000-01-01',
                                                    periods=N,
                                                    freq='s')},
                            index=tm.makeStringIndex(N))
        self.df.loc[1000:3000, 'float_with_nan'] = np.nan
        self.df['datetime_string'] = self.df['datetime'].astype(str)
        self.df.to_sql(self.table_name, self.con, if_exists='replace')

    def time_read_sql_table_all(self):
        read_sql_table(self.table_name, self.con)

    def time_read_sql_table_parse_dates(self):
        read_sql_table(self.table_name, self.con, columns=['datetime_string'],
                       parse_dates=['datetime_string'])
Exemplo n.º 4
1
 def send_to_db(self):
     conn = sqlite3.connect('data2.sqlite', timeout=30)
     c = conn.cursor()
     df = DataFrame(self.__dict__.items(), index=self.__dict__.keys())
     df = df.drop(0,1)
     df = df.transpose()
     df = df.sort(axis=1)
     df.to_sql('earnings_calendar', conn, if_exists='append', index=False)
Exemplo n.º 5
1
def save_to_database(df: pd.DataFrame, table_name: str):
    con = mysql.connector.connect(
            host='ec2-34-245-208-245.eu-west-1.compute.amazonaws.com',
            database='pregnaware',
            user='******',
            password=os.environ['DB_PREGNAWARE_PWD'])

    df.to_sql(con=con, name=table_name, flavor='mysql', if_exists='append', index=False)
Exemplo n.º 6
0
    def to_mysql(self):

        try:

            df = DataFrame({'user_id':[self.user_id],
                            'user_name':[self.user_name],
                            'title':[self.title],
                            'detail': [self.detail],
                            'publish_time':[self.publish_time],
                            'device':[self.device],
                            'href':[self.href],
                            'repost_count':[self.repost_count],
                            'donate_count':[self.donate_count],
                            'comment_count':[self.comment_count]
                            },
                           columns=['user_id', 'user_name', 'title', 'detail',
                                    'publish_time', 'repost_count', 'donate_count',
                                    'comment_count', 'device', 'href'])
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and detail='{detail}' and publish_time='{publish_time}'".format(
                        table = mysql_table_xueqiu_article,
                        user_id = self.user_id,
                        detail = self.detail,
                        publish_time = self.publish_time
                        )
                engine.execute(sql_del)
            except Exception,e:
                print 'delete error! ', str(e)

            df.to_sql(mysql_table_xueqiu_article, engine, if_exists='append', index=False)
            return True
Exemplo n.º 7
0
    def to_mysql(self):

        try:

            df = DataFrame({'user_id':[self.user_id],
                            'name':[self.name],
                            'sex': [self.sex],
                            'area':[self.area],
                            'stock_count':[self.stock_count],
                            'talk_count':[self.talk_count],
                            'fans_count':[self.fans_count],
                            'big_v_in_fans_count':0,
                            'follows_count':0,
                            'capacitys':[self.capacitys],
                            'summary':[self.summary],
                            'follow_search_time':'',
                            'update_time':[self.update_time]
                            },
                           columns=['user_id', 'name', 'sex', 'area', 'stock_count', 'talk_count',
                                    'fans_count', 'big_v_in_fans_count', 'follows_count', 'capacitys',
                                    'summary', 'follow_search_time', 'update_time'])
            print df
            df.to_sql(big_v_table_mysql, engine, if_exists='append', index=False)
            return True
        except Exception,e:
            print e
            return False
Exemplo n.º 8
0
def insert_co_exp_ids(profile_ids, modalities, db_con_1, db_con_2):
    """
    Scan high frequency type modalities
    and extract mutually existent exp ids.
    Save it to CSV file.
    """
    print('\twriting co_exp_ids for sensor data')
    for profile_id in profile_ids:
        high_interval_mods = filter(lambda x: info.MOD_FREQ_TYPE[x] == info.FREQ_HIGH, modalities)
        co_exp_ids = []
        for mod in high_interval_mods:
            exp_ids = loader.load_exp_ids(profile_id, mod, filtered=False, server_index=1, db_con=db_con_1, close=False)
            if len(exp_ids) > 0:
                co_exp_ids.append(pd.DataFrame([0] * len(exp_ids), index=exp_ids, columns=[mod]))
        co_exp_ids = pd.concat(co_exp_ids, axis=1)
        co_exp_ids = co_exp_ids.dropna()
        co_exp_ids = list(co_exp_ids.index)
        co_exp_ids.sort()

        done_ids = loader.load_co_exp_ids(profile_id, db_con=db_con_2, close=False)
        co_exp_ids = filter(lambda x: x not in done_ids, co_exp_ids)
        if len(co_exp_ids) == 0:
            print profile_id, "all co_exp_ids are already inserted!"
            continue

        df = DataFrame(co_exp_ids, columns=['expId'])
        df['profile_id'] = profile_id
        df.to_sql("co_exp_ids", db_con_2, flavor='mysql', if_exists='append', index=False)
        print('\t\t%s number of exp ids of user %s are successfully inserted!' % (len(df), profile_id))
Exemplo n.º 9
0
    def get_publish_articles(self):

        t1 = time.time()
        print 'begin query...'
        #sql = 'select distinct user_id from %s where user_id not in (select distinct user_id from %s)' % (big_v_table_mysql, archive_table_mysql)
        #df = pd.read_sql_query(sql, engine)
        #user_ids = df['user_id'].get_values()
        sql1 = 'select distinct user_id from %s where fans_count > 1000 and fans_count < 10001 ' % (big_v_table_mysql)
        sql2 = 'select distinct user_id from %s' % archive_table_mysql
        df1 = pd.read_sql_query(sql1, engine)
        df2 = pd.read_sql_query(sql2, engine)
        user_ids1 = df1['user_id'].get_values()
        user_ids2 = df2['user_id'].get_values()
        user_ids = [id for id in set(user_ids1).difference(user_ids2)]
        t2 = time.time()
        print 'query mysql by join cose:', t2-t1, 's'

        for user_id in user_ids:
            try:
                self.get_publish_articles_by_id(user_id)
            except Exception, e:
                se = Series([user_id, GetNowTime(), str(e)], index=['user_id', 'fail_time', 'fail_reason'])
                df = DataFrame(se).T
                df.to_sql(unfinish_arcticle_table_mysql, engine, if_exists='append', index=False)
                print e
Exemplo n.º 10
0
    def _big_v_in_fans_to_sql(self, followList, id):

        try:
            df = DataFrame({'user_id':followList, #被关注者
                            'fans_id':id                            #关注者
                            }, columns=['user_id', 'fans_id'])
            print df[:10]
            df.to_sql(fans_in_big_v_table_mysql, engine, if_exists='append', index=False)

        except Exception,e:
            print e
Exemplo n.º 11
0
    def test_mixed_dtype_insert(self):
        # see GH6509
        s1 = Series(2**25 + 1,dtype=np.int32)
        s2 = Series(0.0,dtype=np.float32)
        df = DataFrame({'s1': s1, 's2': s2})

        # write and read again
        df.to_sql("test_read_write", self.conn, index=False)
        df2 = sql.read_table("test_read_write", self.conn)

        tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
Exemplo n.º 12
0
def insert_stat_types():
    mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2)
    df = pd.read_sql("select * from info_statistics where statType = 'basic'", mysql_con, index_col='id')
    new_dict = {'statistics': [], 'statType': [], 'valueType': []}
    for i in range(10):
        for index, st_series in df.iterrows():
            new_dict['statistics'].append("cat%sdur%s" % (i, str(st_series['statistics'][0]).capitalize()
                                                          + st_series['statistics'][1:]))
            new_dict['statType'].append('valueDuration')
            new_dict['valueType'].append(st_series['valueType'])
    new_df = DataFrame(new_dict)
    new_df.to_sql("info_statistics", mysql_con, flavor='mysql', if_exists='append', index=False)
Exemplo n.º 13
0
    def df2db(self, df: pd.DataFrame, tab_name):
        """
        Upload a df to db
        :param df: df to upload
        :param tab_name: table name
        :return: None
        """

        self.execute("set hive.execution.engine = tez")
        self.execute("set tez.queue.name = sephora_internal")
        self.execute(
            "drop table if exists {table_name}".format(table_name=tab_name))
        df.to_sql(tab_name, self.engine, method='multi', index=False)
Exemplo n.º 14
0
def save_data(df: pd.DataFrame, database_filename: str) -> None:
    """
    export the result to a db
    input:
        df: a cleaned df
        database_filename: the file path of db

    output:
        None
    """
    engine = create_engine('sqlite:///'+database_filename)
    df.to_sql('message_table', engine, index=False, if_exists='replace')
    engine.dispose()
Exemplo n.º 15
0
    def export_table(
        self,
        df: pd.DataFrame,
        table,
        schema=None,
        if_exists="replace",
        method="multi",
        show_confirmation=True,
    ):
        with PostgresConnection(database_var=self.database_var) as connection:
            schema = schema or self.schema
            df = self.get_filtered_export(df,
                                          table,
                                          schema,
                                          connection,
                                          if_exists=if_exists)
            if df.shape[0] == 0:
                return
            df = pre_convert_data(df)
            df_dtype_dict = get_dataframe_dtypes(df)
            df = convert_dataframe_columns(df, df_dtype_dict)
            table_already_exists = check_table_exists(table, schema,
                                                      connection)
            dtype_param = convert_dtypes(
                dtype_dict=df_dtype_dict,
                from_dtype="dataframe_dtype",
                to_dtype="postgres_dtype",
            )

            # Attempting to overwrite mismatched data results in error
            if if_exists == "replace":
                connection.connection.execute(
                    f"drop table if exists {schema}.{table} cascade")
            if show_confirmation:
                print(f"Exporting {table} {df.shape} to {schema}", end="")
            start = time.time()
            df.to_sql(
                table,
                method=method,
                if_exists=if_exists,
                dtype=dtype_param,
                schema=schema,
                con=connection.connection,
                index=False,
            )
            end = time.time()
            elapsed_time = end - start
            if not table_already_exists:
                declare_primary_key(df, table, schema, connection)
            if show_confirmation:
                print(f" in {elapsed_time} seconds")
Exemplo n.º 16
0
def write_mysql(table_name: str, data: pd.DataFrame, dtype: dict = None):
    """写入MySQl数据库, 表格如果存在, 则新增数据"""
    try:
        data.to_sql(name=f'o_{table_name}',
                    con=RemoteMySQLConfig.engine,
                    if_exists='append',
                    index=0,
                    dtype=dtype)

        LOG.logger_font.debug(f"mysql write table {table_name} succeed!")
    except Exception as exc:
        LOG.logger_font.error(
            f"mysql write table {table_name} failed, error: {exc}.")
        raise Exception
def cached_table_push(df: pd.DataFrame, tablename: str, **kwargs):
    """
    Save df to local cache file and replace the table in the database.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame to push
    tablename : str
        Name of the table in the database
    """
    cache_path = CACHE_PATH + '/' + tablename + '.pkl'
    df.to_pickle(cache_path)
    df.to_sql(tablename, DB_CONNECT_STRING, if_exists='replace', method='multi', **kwargs)
Exemplo n.º 18
0
def store_table(
    data_frame: pd.DataFrame,
    table_name: str,
    dtype: Optional[Mapping] = None,
):
    """Store a data frame in the DB.

    dtype is a dictionary of (column_name, column_type) column type can be:

    - 'boolean',
    - 'datetime',
    - 'double',
    - 'integer',
    - 'string'

    The function will use these to translate into (respectively)

    - sqlalchemy.Boolean()
    - sqlalchemy.DateTime()
    - sqlalchemy.Float()
    - sqlalchemy.BigInteger()
    - sqlalchemy.UnicodeText()

    :param data_frame: The data frame to store
    :param table_name: The name of the table in the DB
    :param dtype: dictionary with (column_name, data type) to force the storage
    of certain data types
    :return: Nothing. Side effect in the DB
    """
    # Check the length of the column names
    if any(len(cname) > sql.COLUMN_NAME_SIZE for cname in data_frame.columns):
        raise Exception(
            _('Column name is longer than {0} characters').format(
                sql.COLUMN_NAME_SIZE))

    if dtype is None:
        dtype = {}

    with cache.lock(table_name):
        # We ovewrite the content and do not create an index
        data_frame.to_sql(
            table_name,
            OnTaskSharedState.engine,
            if_exists='replace',
            index=False,
            dtype={
                key: ontask_to_sqlalchemy[tvalue]
                for key, tvalue in dtype.items()
            },
        )
Exemplo n.º 19
0
 def aggregated_prod(self):
     print('Starting Aggegration......')
     try:
         data = self.cnx.execute(
             "Select count(*),name from prod group by name").fetchall()
         df = DataFrame(data, columns=['count', 'name'])
         df.to_sql('aggregated_data',
                   self.cnx,
                   if_exists='replace',
                   index=False)
         print('Aggegration completed!!')
     except Exception as e:
         print(e)
         print('Aggegration failed!!')
Exemplo n.º 20
0
    def process_item(self, item, spider):
        global MYSQL_TableName
        poly = item['poly']
        if item['results']:
            results = item['results']
            rows = []
            for result in results:
                row = []
                keys1 = [
                    'name', 'province', 'city', 'area', 'address', 'telephone',
                    'uid', 'street_id', 'detail', 'detail_info', 'location'
                ]

                for key in keys1:
                    # d[key] = result.get(key)
                    row.append(result.get(key))

                keys2 = ['detail_url', 'tag', 'type']
                for key in keys2:
                    detail_info = result.get('detail_info')
                    if detail_info is None:
                        row.append(None)
                    else:
                        row.append(detail_info.get(key))
                keys3 = ['search_word', 'region', 'requests_url']

                for key in keys3:
                    row.append(item[key])
                rows.append([str(x) for x in row])
                print('获取到的pois:%s' % row[0])

            df = DataFrame(rows, columns=keys1 + keys2 + keys3)
            # region_pinyin = ''.join(lazy_pinyin(item['region']))
            region_pinyin = str(item['region'])

            # 判断点是否在指定poly区域内,使用到了shapely polygon.contains函数
            try:
                df['isin_region'] = df['location'].apply(
                    lambda x: poly.contains(
                        Point(float(eval(x)['lng']), float(eval(x)['lat']))))
            except Exception as e:
                logging.info(e)
                df['isin_region'] = 999

            if MYSQL_TableName == "":
                MYSQL_TableName = '{region}_bd_map_pois'.format(
                    region=region_pinyin)
            else:
                pass
            df.to_sql(MYSQL_TableName, engine, if_exists='append', index=False)
Exemplo n.º 21
0
    def update_table(self, table_name: str, dataframe: pd.DataFrame,
                     schema: str = None, **kwargs):
        """
        This method updates an existing table based on an action
        For reference https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html

        Args:
            table_name: Name of the table to update
            dataframe: A pandas dataframe 
            schema: defaults to current schema

        Kwargs:
            **if_exists**: {‘fail’, ‘replace’, ‘append’}, default ‘fail’
                How to behave if the table already exists.
                    fail: Raise a ValueError.
                    replace: Drop the table before inserting new values.
                    append: Insert new values to the existing table.

            **index**: bool, default True
                Write DataFrame index as a column. Uses index_label as the column name in the table.

            **index_label**: str or sequence, default None
                Column label for index column(s). If None is given (default) and index is True, then the index names
                are used.A sequence should be given if the DataFrame uses MultiIndex.

            **chunksize**: int, optional
                Specify the number of rows in each batch to be written at a time. By default, all rows will be written
                 at once.

            **dtype**: dict or scalar, optional
                Specifying the datatype for columns. If a dictionary is used, the keys should be the column names and
                the values should be the SQLAlchemy types or strings for the sqlite3 legacy mode. If a scalar is
                provided, it will be applied to all columns.

            **method**: {None, ‘multi’, callable}, optional
                Controls the SQL insertion clause used:
                    None : Uses standard SQL INSERT clause (one per row).
                    ‘multi’: Pass multiple values in a single INSERT clause.
                    callable with signature (pd_table, conn, keys, data_iter).
                    Details and a sample callable implementation can be found in the section insert method of
                    pandas documentation.

        Returns:
            None

        Raises:
            ValueError
                if if_exists is 'fail' which is default
        """
        dataframe.to_sql(table_name, self.engine, schema=schema, **kwargs)
Exemplo n.º 22
0
def table_write(table_name: str, df: pd.DataFrame):

    # Rename cols to snake_case
    df.columns = (pd.Series(
        df.columns).map(lambda col: re.sub("([A-Z])", lambda x: "_" + x.group(
            1).lower(), col).lstrip("_")).tolist())

    engine = create_engine("postgresql://{}:{}@{}:5432/{}".format(
        os.environ["POSTGRES_USER"],
        os.environ["POSTGRES_PASSWORD"],
        os.environ["POSTGRES_HOST"],
        os.environ["PROJECT_NAME"],
    ))
    df.to_sql(name=table_name, con=engine, if_exists="replace", index=True)
Exemplo n.º 23
0
def dfToTable(
    df: pd.DataFrame,
    table: str,
    db: str,
    ifExists: str = "replace",
    indexCols: list[str] | None = None,
) -> None:
    """Saves dataframe as table in sqlite3 database

    Args:

        df (pd.DataFrame) -- data to save

        table (str) -- table name

        db (str) -- database name (ending in .db)

        ifExists (str) -- pass-thru for pandas arg. "replace" (default), "append", "fail"

        indexCols (list of str) -- cols to be used as index. Defaults to None (no index).

    Returns nothing.
    """

    # Handle dtypes
    df = df.convert_dtypes()

    assert ifExists in ["replace", "append",
                        "fail"], f"Invalid ifExists: {ifExists}"

    # Handle index var
    if indexCols is not None:
        index_label = indexCols
        df.set_index(indexCols, drop=True, inplace=True, verify_integrity=True)
        index = True
    else:
        index_label = None
        index = False

    # Load table
    with sq.connect(db) as con:
        df.to_sql(
            name=table,
            con=con,
            if_exists=ifExists,
            method="multi",
            index=index,
            index_label=index_label,
            chunksize=1000,
        )
Exemplo n.º 24
0
    def to_mysql(self):

        try:

            df = DataFrame(
                {
                    "user_id": [self.user_id],
                    "user_name": [self.user_name],
                    "title": [self.title],
                    "detail": [self.detail],
                    "publish_time": [self.publish_time],
                    "href": [self.href],
                    "watch_count": [self.watch_count],
                    "repost_count": [self.repost_count],
                    "donate_count": [self.donate_count],
                    "comment_count": [self.comment_count],
                    #'is_top':[self.is_top],
                    #'is_repost':[self.is_repost],
                    #'repost_reason':[self.repost_reason
                    "device": [self.device],  # ]
                },
                columns=[
                    "user_id",
                    "user_name",
                    "title",
                    "detail",
                    "publish_time",
                    "href",
                    "watch_count",
                    "repost_count",
                    "donate_count",
                    "comment_count",
                    "device",
                ],
            )
            print df

            try:
                sql_del = "delete from {table} where user_id='{user_id}' and title='{title}' and publish_time='{publish_time}'".format(
                    table=mysql_table_licaishi_viewpoint,
                    user_id=self.user_id,
                    title=self.title,
                    publish_time=self.publish_time,
                )
                engine.execute(sql_del)
            except Exception, e:
                print "delete error! ", str(e)

            df.to_sql(mysql_table_licaishi_viewpoint, engine, if_exists="append", index=False)
            return True
Exemplo n.º 25
0
def insert_cols(table_name: str, df: pd.DataFrame):
    # Rename cols to snake_case
    df.columns = (pd.Series(
        df.columns).map(lambda col: re.sub("([A-Z])", lambda x: "_" + x.group(
            1).lower(), col).lstrip("_")).tolist())

    engine = create_engine("postgresql://{}:{}@{}:5432/{}".format(
        os.environ["POSTGRES_USER"],
        os.environ["POSTGRES_PASSWORD"],
        os.environ["POSTGRES_HOST"],
        os.environ["PROJECT_NAME"],
    ))
    df.to_sql(name=table_name + "_tmp",
              con=engine,
              if_exists="replace",
              index=True)

    dtype_mapper = {
        np.dtype("object"): "TEXT",
        np.dtype("int32"): "BIGINT",
        np.dtype("int64"): "BIGINT",
        np.dtype("uint8"): "BIGINT",
        np.dtype("float64"): "DOUBLE PRECISION",
    }

    queries = [
        """
        ALTER TABLE {0} DROP COLUMN IF EXISTS {1};
        ALTER TABLE {0} ADD COLUMN IF NOT EXISTS {1} {2};
        UPDATE {0} t1
        SET    {1} = t2.{1}
        FROM   {0}_tmp t2
        WHERE  t1.index = t2.index
        """.format(table_name, col_name, dtype)
        for col_name, dtype in df.dtypes.map(dtype_mapper).iteritems()
    ]

    with psycopg2.connect(
            user=os.environ["POSTGRES_USER"],
            password=os.environ["POSTGRES_PASSWORD"],
            host=os.environ["POSTGRES_HOST"],
            port=5432,
            database=os.environ["PROJECT_NAME"],
    ) as conn, conn.cursor() as cur:
        for query in queries:
            cur.execute(query)
        conn.commit()

        cur.execute("DROP TABLE {}_tmp;".format(table_name))
        conn.commit()
Exemplo n.º 26
0
def save_data(df: pd.DataFrame, database_filename: str) -> None:
    '''
    Saves dataframe to sqlite database so results contain.

    Parameters
    ----------
    df : pd.DataFrame
        records to save
    database_filename : str
        database file name
    '''
    with sqlite3.connect(database_filename) as conn:
        df.to_sql("disturbing_tweets", conn, if_exists='replace')
        conn.commit()
Exemplo n.º 27
0
    def writeToDB(self, ticker: str, df: pd.DataFrame):
        ### 'date','open','high','low','close' to 'id', 'time','open','high','low','close'

        conn = self.engine.connect()

        ticker_id = self._getTickerId(ticker)

        df.rename(columns={'date': 'time'}, inplace=True)
        df['id'] = ticker_id

        df.to_sql(DBAccessor.DATA_TABLE,
                  index=False,
                  con=conn,
                  if_exists='append')
Exemplo n.º 28
0
def save_data(df: list, table: str):
    engine = get_engine(password='',
                        host='localhost',
                        port=5432,
                        database=constants.DATABASE)
    dfl = DataFrame(df)

    dfl.to_sql(table,
               con=engine,
               index=False,
               if_exists='append',
               chunksize=100)

    return True
Exemplo n.º 29
0
 def save_to_sql(self, df: pd.DataFrame) -> None:
     """Dump DataFrame to the (coinmarketcap table) coinmarketcap.db"""
     database_name = config.database_name.split('.')[0]
     try:
         with self.connection as conn:
             logger.info('Successfully connected to SQLite')
             self.cursor.execute(f'DROP TABLE IF EXISTS {database_name};')
             df.to_sql(database_name, conn)
     except sqlite3.Error as error:
         logger.error('Error with connection to sqlite', error)
     finally:
         if self.connection:
             self.connection.close()
         logger.info('The SQLite connection is closed')
Exemplo n.º 30
0
 def __insert(self,
              df: pd.DataFrame,
              table_name: str,
              dtype: dict,
              chunksize: int = 1000,
              mode: str = 'replace') -> None:
     df.to_sql(name=table_name,
               con=self.__engine,
               index=False,
               schema=self.SCHEMA,
               if_exists=mode,
               dtype=dtype,
               chunksize=chunksize,
               method='multi')
Exemplo n.º 31
0
def upload_portfolio_valuation(df: pd.DataFrame):
    conn = sqlite3.connect(db)
    c = conn.cursor()

    checker = check_if_table_exists('PortfolioValuation')

    if not checker:
        df.to_sql('PortfolioValuation', conn)
    elif checker:
        c.execute('''drop table main.PortfolioValuation''')
        df.to_sql('PortfolioValuation', conn)

    conn.commit()
    conn.close()
Exemplo n.º 32
0
    def write_table(table: str, df: pd.DataFrame, mode='replace'):

        if len(df) > 0:
            engine = MysqlDAO.create_engine()
            df.to_sql(table,
                      con=engine,
                      if_exists=mode,
                      index=False,
                      chunksize=1000)

            with engine.begin() as conn:
                conn.execute(f'ALTER TABLE {table} ENGINE = MYISAM')

            engine.dispose()
Exemplo n.º 33
0
def write_mysql(
    table_name: str,
    data: pd.DataFrame,
):
    """写入MySQl数据库, 表格如果存在, 则新增数据"""
    try:
        data.to_sql(name=f'o_{table_name}',
                    con=RemoteMySQLConfig.engine,
                    if_exists='append',
                    index=0)
        logging.info(f"mysql write table {table_name} succeed!")
    except Exception as exc:
        logging.error(f"mysql write table {table_name} failed, error: {exc}.")
        raise Exception
Exemplo n.º 34
0
def sqlselect(table: pd.DataFrame, sql):
    if len(table.columns) == 0:
        return (pd.DataFrame(), [])

    with sqlite3.connect(":memory:",
                         detect_types=sqlite3.PARSE_DECLTYPES) as conn:
        table.to_sql("input", conn, index=False)

        with _deleting_cursor(conn.cursor()) as c:
            try:
                c.execute(sql)
            except sqlite3.DatabaseError as err:
                return None, _database_error_to_messages(err)
            except sqlite3.Warning as err:
                return None, _database_warning_to_messages(err)

            if c.description is None:
                return (
                    None,
                    [
                        i18n.trans(
                            "badValue.sql.commentedQuery",
                            "Your query did nothing. Did you accidentally comment it out?",
                        )
                    ],
                )

            colnames = [d[0] for d in c.description]

            dupdetect = set()
            for colname in colnames:
                if colname in dupdetect:
                    return (
                        None,
                        [
                            i18n.trans(
                                "badValue.sql.duplicateColumnName",
                                'Your query would produce two columns named {colname}. Please delete one or alias it with "AS".',
                                {"colname": colname},
                            )
                        ],
                    )
                dupdetect.add(colname)

            # Memory-inefficient: creates a Python object per value
            data = c.fetchall(
            )  # TODO benchmark c.arraysize=1000, =100000, etc.

    return pd.DataFrame.from_records(data, columns=colnames), []
Exemplo n.º 35
0
def set_events(events_df: pd.DataFrame, table_name: str):

    events_df = events_df.drop(['chip_plays', 'top_element_info'], axis=1)

    try:
        events_df.to_sql(name=table_name,
                         con=database.cnx,
                         if_exists='replace',
                         index=True)
    except ValueError as vx:
        print(vx)
    except Exception as ex:
        print(ex)
    else:
        print(table_name + " table created successfully.")
def fetch_ohlcv(since):
    ohlcv = bitmex.fetch_ohlcv(symbol,
                               timeframe='1m',
                               since=since,
                               limit=LIMIT,
                               params={'partial': False})

    df = DataFrame(ohlcv, columns=cols)
    df['date'] = to_datetime(df['date'],
                             unit='ms',
                             utc=True,
                             infer_datetime_format=True)
    print(df)
    df.to_sql('ticks', con, if_exists='append', index=None)
    print("fetch done")
Exemplo n.º 37
0
def save_data(df: pd.DataFrame, database_filename: str) -> None:
    """
    Save the dataframe to a Sql-lite Database
    Parameters
    df: The pandas.Dataframe to be written
    database_filename: The filename path for the database
    Returns
    None
    """
    engine = create_engine(
        'sqlite:///' + database_filename
    )  # https://docs.sqlalchemy.org/en/13/dialects/sqlite.html
    df.to_sql(
        'DisasterMessages', engine, if_exists="replace", index=False
    )  # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_sql.html
Exemplo n.º 38
0
 def writePandastoTable(self, df: pd.DataFrame, schema, tablename):
     """
     Writes the Pandas Dataframe to DB Table. Data write operation is only append. If table doesn't exists in the DB, it creates new table and load the data
     :param df: Source Data
     :param tablename: Target DB Table
     :param schema: Target DB Schema
     :return:
     """
     logging.info("Storing Dataframe data to " + schema + "." + tablename)
     print("Storing Dataframe data to " + schema + "." + tablename)
     df.to_sql(name=tablename,
               schema=schema,
               con=self._dbEngine,
               if_exists="append",
               index=False)
Exemplo n.º 39
0
 def get_one_stock_data_toDb_byEngine(self, stockId):
     '''
     存储股票数据到数据库,通过pandas的特殊方法
     '''
     try:
         df = ts.get_k_data(stockId)
         #SQLALCHEMY_DATABASE_URI = 'mysql+pymysql://.....'
         engine = create_engine(
             'mysql+pymysql://root:Root@[email protected]/stocksdb?charset=utf8'
         )
         # 存入数据库
         df.to_sql(stockId + '_KDay', engine, if_exists='replace')
     except:
         logging.error('one stock toDb:\n%s' % traceback.format_exc())
         pass
Exemplo n.º 40
0
def write_df(table: DeclarativeMeta, df: pd.DataFrame) -> None:
    """
    Writes the |df| to the |table|.

    The column headers on |df| must match the column names in |table|. All rows
    in |df| will be appended to |table|. If a row in |df| already exists in
    |table|, then that row will be skipped.
    """
    try:
        df.to_sql(table.__tablename__,
                  SQLAlchemyEngineManager.get_engine_for_schema_base(JailsBase),
                  if_exists='append',
                  index=False)
    except IntegrityError:
        _write_df_only_successful_rows(table, df)
Exemplo n.º 41
0
 def create_mock_bq_table(
     self,
     dataset_id: str,
     table_id: str,
     mock_schema: MockTableSchema,
     mock_data: pd.DataFrame,
 ) -> None:
     postgres_table_name = self.register_bq_address(
         address=BigQueryAddress(dataset_id=dataset_id, table_id=table_id))
     mock_data.to_sql(
         name=postgres_table_name,
         con=self.postgres_engine,
         dtype=mock_schema.data_types,
         index=False,
     )
Exemplo n.º 42
0
 def probe(cls,
           conn: sqlite3.Connection,
           checkList: pd.DataFrame = None,
           **keyargs):
     '''
     探针函数,对参数中给出的主键在数据库中查询该条数据是否存在,被renew用于确定是否存在需要更新的数据。
     使用方式:
     (1)以输入参数为查询条件查找对应的内容字段是否在数据库中存在,返回bool值
         如probe(conn,ts_code='000001.SZ',trade_date='20191010')->True
     (2)输入DataFrame,以其中的每一行为查询条件判断数据库中是否存在对应内容字段,将无对应内容字段的行以新DataFrame返回
                                             (目前若该行的条件对应多条数据,只要一条数据为null则判定该条件对应数据存在缺失.但某些api)
                                             todo:增加选项,若该行的条件对应多条数据,需要这些数据全部为null才返回该行为数据缺失).
     '''
     tablename = list(cls.mapper.keys())[0]
     reverse_mapper = {
         value: key
         for key, value in cls.mapper[tablename].items()
     }
     if (checkList is None):
         sql = "select 1 from {targetTable} where {clause} limit 1"
         clause = " "
         for field in keyargs.keys():
             db_field = reverse_mapper[field]
             clause += "{db_field}='{param}' and ".format(
                 db_field=db_field, param=keyargs[field])
         a1 = time.time()
         clause += "{nonkf} notnull".format(nonkf=cls.GET_NotKeyField())
         probesql = sql.format(targetTable=tablename, clause=clause)
         rst = conn.execute(probesql)
         return True if rst.fetchall().__len__() > 0 else False
     else:
         if (type(checkList) is not pd.DataFrame):
             raise ("checkList必须为pandas.DataFrame!")
         checkList.to_sql(name='temp_table',
                          con=conn,
                          if_exists='replace',
                          index=False)
         joinCondition = ''
         sql = 'select a.* from temp_table a left join {tablename} b on {joinCondition} where b.{contentField} isnull'
         for col in list(checkList.columns):
             joinCondition += 'a.{df_col} = b.{db_col} and'.format(
                 df_col=col, db_col=reverse_mapper[col])
         joinCondition = joinCondition[0:-4]
         probesql = sql.format(tablename=tablename,
                               joinCondition=joinCondition,
                               contentField=cls.GET_NotKeyField())
         rst = pd.read_sql_query(probesql, conn)
         return rst
Exemplo n.º 43
0
    def load_data(
        self,
        table_name: str,
        data: pd.DataFrame,
        database: str | None = None,
        if_exists: Literal['fail', 'replace', 'append'] = 'fail',
    ) -> None:
        """Load data from a dataframe to the backend.

        Parameters
        ----------
        table_name
            Name of the table in which to load data
        data
            Pandas DataFrame
        database
            Database in which the table exists
        if_exists
            What to do when data in `name` already exists

        Raises
        ------
        NotImplementedError
            Loading data to a table from a different database is not
            yet implemented
        """
        if database == self.current_database:
            # avoid fully qualified name
            database = None

        if database is not None:
            raise NotImplementedError(
                'Loading data to a table from a different database is not '
                'yet implemented')

        params = {}
        if self.has_attachment:
            # for database with attachment
            # see: https://github.com/ibis-project/ibis/issues/1930
            params['schema'] = self.current_database

        data.to_sql(
            table_name,
            con=self.con,
            index=False,
            if_exists=if_exists,
            **params,
        )
Exemplo n.º 44
0
def _downloadFqFactor(codes):
    factorDF = DataFrame()
    for code in codes:
        logging.info("Downloading %s fq factor."%code)
        df = ts.stock.trading._parase_fq_factor(code,'','')
        df.insert(0,"code",code,True)
        df = df.drop_duplicates('date').set_index('date')
        factorDF = pd.concat([factorDF, df])
        if conf.DEBUG:
            break

    logging.info("Deleting fq factor.")
    utils.executeSQL("delete from t_daily_fqFactor")
    logging.info("Saving fq factor.")
    factorDF.to_sql(name='t_daily_fqFactor',con=utils.getEngine(), if_exists="append",chunksize=20000)
    logging.info("Saved fq factor.")
def test_aliased_views_with_computation():
    engine = sa.create_engine('sqlite:///:memory:')

    df_aaa = DataFrame({'x': [1, 2, 3, 2, 3],
                        'y': [2, 1, 2, 3, 1],
                        'z': [3, 3, 3, 1, 2]})
    df_bbb = DataFrame({'w': [1, 2, 3, 2, 3],
                        'x': [2, 1, 2, 3, 1],
                        'y': [3, 3, 3, 1, 2]})

    df_aaa.to_sql('aaa', engine)
    df_bbb.to_sql('bbb', engine)

    metadata = sa.MetaData(engine)
    metadata.reflect()

    sql_aaa = metadata.tables['aaa']
    sql_bbb = metadata.tables['bbb']

    L = Symbol('aaa', discover(df_aaa))
    R = Symbol('bbb', discover(df_bbb))

    expr = join(by(L.x, y_total=L.y.sum()),
                R)
    a = compute(expr, {L: df_aaa, R: df_bbb})
    b = compute(expr, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr2 = by(expr.w, count=expr.x.count(), total2=expr.y_total.sum())
    a = compute(expr2, {L: df_aaa, R: df_bbb})
    b = compute(expr2, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr3 = by(expr.x, count=expr.y_total.count())
    a = compute(expr3, {L: df_aaa, R: df_bbb})
    b = compute(expr3, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    expr4 = join(expr2, R)
    a = compute(expr4, {L: df_aaa, R: df_bbb})
    b = compute(expr4, {L: sql_aaa, R: sql_bbb})
    assert into(set, a) == into(set, b)

    """ # Takes a while
Exemplo n.º 46
0
def insert_data_types():
    mysql_con = mdb.connect(info.HOST_2, info.ID, info.PWD, info.DB_NAME_2)
    df = {'modality': [], 'field': [], 'variableType': [], 'permissionFree': [], 'category': [],
          'collectionFrequency': [], 'collectionDuration': [], 'sensitiveSensor': []}
    for mod in info.FREE_MODE_LIST:
        fields = info.MOD_FIELD_LIST[mod]
        for f in fields:
            df['modality'] += [mod]
            df['field'] += [f]
            df['variableType'] += [info.MOD_FIELD_TYPE[mod][f].split("_type")[0]]
            df['permissionFree'] += ['yes'] if f in info.PERMISSION_FREE[mod] else ['no']
            df['category'] += [info.MOD_CATEGORY[mod].split("Probe")[0]]
            df['collectionFrequency'] += [info.MOD_FREQ_TYPE[mod].split("_frequency")[0]]
            df['collectionDuration'] += [info.MOD_COL_TYPE[mod].split("_type")[0]]
            df['sensitiveSensor'] += ['yes'] if mod in info.SENSITIVE_MOD else ['no']

    df = DataFrame(df, columns=['modality', 'field', 'variableType', 'permissionFree', 'category',
                                'collectionFrequency', 'collectionDuration', 'sensitiveSensor'])
    df.to_sql("dataTypes", mysql_con, flavor='mysql', if_exists='append', index=False)
Exemplo n.º 47
0
def insert_from_cp(con):
    instCpStockCode = win32com.client.Dispatch("CpUtil.CpStockCode")
    instStockMst = win32com.client.Dispatch("dscbo1.StockMst")
    instCpCodeMgr = win32com.client.Dispatch("CpUtil.CpCodeMgr")

    code_data = {'CODE':[],
                 'NAME':[],
                 'TYPE':[],
                 'PER':[],
                 'BPS':[], #PRICE / BPS
                 'INDUSTRY_CODE':[],
                 'INDUSTRY':[]}

    total = instCpStockCode.GetCount()
    for i in range(0, total):
        code = instCpStockCode.GetData(CPSTOCKCODE_CODE, i)
        name = instCpStockCode.GetData(CPSTOCKCODE_NAME, i)
        code_data['CODE'].append(code)
        code_data['NAME'].append(name)
        
        instStockMst.SetInputValue(0, code)
        instStockMst.BlockRequest()
        type = instStockMst.GetHeaderValue(CPSTOCKMST_CATEGORY)
        per = instStockMst.GetHeaderValue(CPSTOCKMST_PER)
        bps = instStockMst.GetHeaderValue(CPSTOCKMST_BPS)
        industry_code = instStockMst.GetHeaderValue(CPSTOCKMST_INDUSTRY_CODE)
        industry_name = instCpCodeMgr.GetIndustryName(industry_code)

        code_data['TYPE'].append(type)
        code_data['PER'].append(per)
        code_data['BPS'].append(bps)
        code_data['INDUSTRY_CODE'].append(industry_code)
        code_data['INDUSTRY'].append(industry_name)

        get_logger().debug("{}/{} {} {} {} {} {}".format(i, total,code, name, type, per, bps))

    data = DataFrame(code_data)
    data.to_sql("CODE", con, if_exists='replace', chunksize=1000)
    get_logger().debug("{} 주식 종목 코드를 저장 하였습니다.".format(len(data)))
Exemplo n.º 48
0
    def _write(self, tablename, expected_dtypes, frame):
        if frame is None or frame.empty:
            # keeping the dtypes correct for empty frames is not easy
            frame = DataFrame(
                np.array([], dtype=list(expected_dtypes.items())),
            )
        else:
            if tablename == 'shares':
                pass
            else:
                if frozenset(frame.columns) != frozenset(expected_dtypes):
                    raise ValueError(
                        "Unexpected frame columns:\n"
                        "Expected Columns: %s\n"
                        "Received Columns: %s" % (
                            set(expected_dtypes),
                            frame.columns.tolist(),
                        )
                    )

            actual_dtypes = frame.dtypes
            for colname, expected in iteritems(expected_dtypes):
                actual = actual_dtypes[colname]
                if not issubdtype(actual, expected):
                    raise TypeError(
                        "Expected data of type {expected} for column"
                        " '{colname}', but got '{actual}'.".format(
                            expected=expected,
                            colname=colname,
                            actual=actual,
                        ),
                    )

        frame.to_sql(
            tablename,
            self.conn,
            if_exists='append',
            chunksize=50000,
        )
Exemplo n.º 49
0
class SQLDFTest(unittest.TestCase):

    def setUp(self):
        self.default_df = DataFrame(
            [["l1", 1, 2], ["l2", 3, 4], ["l3", 4, 5]], columns=["label", "c1", "c2"])
        self.default_env = {"a": 1, "df": self.default_df}
        self.default_udfs = {"udf1": lambda x: x}

        class udaf1(object):

            def __init__(self):
                self.count = 0

            def step(self, x):
                self.count += 1

            def finalize(self):
                return self.count
        self.default_udafs = {"udaf1": udaf1}

    def tearDown(self):
        pass

    def test_constructor_with_default(self):
        sqldf = SQLDF(self.default_env)
        self.assertEqual(isinstance(sqldf, SQLDF), True)
        self.assertEqual(sqldf.env, self.default_env)
        self.assertEqual(sqldf.inmemory, True)
        self.assertEqual(sqldf._dbname, ":memory:")
        self.assertEqual(sqldf.udfs, {})
        self.assertEqual(sqldf.udafs, {})
        self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)

    def test_constructor_with_assign(self):
        sqldf = SQLDF(
            self.default_env,
            inmemory=False,
            udfs=self.default_udfs,
            udafs=self.default_udafs)
        self.assertEqual(isinstance(sqldf, SQLDF), True)
        self.assertEqual(sqldf.env, self.default_env)
        self.assertEqual(sqldf.inmemory, False)
        self.assertEqual(sqldf._dbname, ".pysqldf.db")
        self.assertEqual(sqldf.udfs, self.default_udfs)
        self.assertEqual(sqldf.udafs, self.default_udafs)
        self.assertEqual(isinstance(sqldf.conn, sqlite3.Connection), True)

    def test_destructor_with_inmemory_db(self):
        sqldf = SQLDF(self.default_env)
        conn = sqldf.conn
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: conn.execute("select * from tbl;"))
        sqldf = None  # destruct
        self.assertRaises(
            sqlite3.ProgrammingError,
            lambda: conn.execute("select * from tbl;"))

    def test_destructor_with_fs_db(self):
        sqldf = SQLDF(self.default_env, inmemory=False)
        conn = sqldf.conn
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: conn.execute("select * from tbl;"))
        self.assertEqual(os.path.exists(".pysqldf.db"), True)
        sqldf = None  # destruct
        self.assertRaises(
            sqlite3.ProgrammingError,
            lambda: conn.execute("select * from tbl;"))
        self.assertEqual(os.path.exists(".pysqldf.db"), False)

    def test_execute_method(self):
        sqldf = SQLDF(self.default_env)
        query = "select * from df;"
        result = sqldf.execute(query)
        assert_frame_equal(result, self.default_df)
        # table deleted
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: sqldf.conn.execute(query))

    def test_execute_method_returning_none(self):
        births = load_births()
        result = SQLDF(locals()).execute(
            "select a from births limit 10;")  # col a not exists
        self.assertEqual(result, None)

    def test_execute_method_with_table_not_found(self):
        sqldf = SQLDF(self.default_env)
        self.assertRaises(
            Exception,
            lambda: sqldf.execute("select * from notable"))
        # table deleted
        self.assertRaises(sqlite3.OperationalError,
                          lambda: sqldf.conn.execute("select * from df;"))

    def test_execute_method_with_query_error(self):
        sqldf = SQLDF(self.default_env)
        self.assertEqual(sqldf.execute("select a from df uuuuuu;"), None)
        # table deleted
        self.assertRaises(sqlite3.OperationalError,
                          lambda: sqldf.conn.execute("select * from df;"))

    def test_extract_table_names_method(self):
        sqldf = SQLDF(self.default_env)
        tablenames = {
            "select * from df;": ["df"],
            "select * from df": ["df"],
            "select * from _": ["_"],
            "select * from 11;": [],
            "select * from 1ab;": [],
            "select * from a-b;": [],
            "select * from a.b;": [],
            "select a;": [],
            "select * from (select * from subq_df) f;": ["subq_df"],
            "select * from df d1 inner join df2 d2 on d1.id = d2.id;": ["df", "df2"],
            "select a, b c from df where c in (select foo from df2 inner join df3 on df2.id = df3.id);": ["df", "df2", "df3"],
            "select * from df where a in (select a from (select c from df2 where c in (select a from df3 inner join df4 on df3.id = df4.id)));": ["df", "df2", "df3", "df4"]
        }
        for query, tablename in tablenames.items():
            self.assertEqual(
                set(sqldf._extract_table_names(query)), set(tablename))

    def test_ensure_data_frame_method_nested_list(self):
        data = [[1, 2, 3], [4, 5, 6]]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["c0", "c1", "c2"])
        self.assertEqual(list(result.index), [0, 1])

    def test_ensure_data_frame_method_list_of_tuple(self):
        data = [(1, 2, 3), (4, 5, 6)]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["c0", "c1", "c2"])
        self.assertEqual(list(result.index), [0, 1])

    def test_ensure_data_frame_method_nested_tuple(self):
        data = ((1, 2, 3), (4, 5, 6))
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf._ensure_data_frame(
                data,
                "df"))

    def test_ensure_data_frame_method_tuple_of_list(self):
        data = ([1, 2, 3], [4, 5, 6])
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf._ensure_data_frame(
                data,
                "df"))

    def test_ensure_data_frame_method_list_of_dict(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]
        result = SQLDF(locals())._ensure_data_frame(data, "df")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["a", "b", "c"])
        self.assertEqual(list(result.index), [0, 1])

    def test_write_table_method(self):
        df = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
        sqldf = SQLDF(locals())
        sqldf._write_table("tbl", df)
        # table created
        cursor = sqldf.conn.cursor()
        sq_type, name, tbl_name, rootpage, sql = list(
            cursor.execute("select * from sqlite_master where type='table';"))[0]
        self.assertEqual(name, "tbl")

    def test_write_table_method_col_with_left_bracket(self):
        df = DataFrame([[1]], columns=["col("])
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))

    def test_write_table_method_col_with_right_bracket(self):
        df = DataFrame([[1]], columns=["co)l"])
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))

    def test_write_table_method_garbage_table(self):
        df = [[1, 2], [3, [4]]]
        sqldf = SQLDF(locals())
        self.assertRaises(Exception, lambda: sqldf._write_table("tbl", df))
        # table destroyed
        cursor = sqldf.conn.cursor()
        tablemaster = list(cursor.execute("select * from sqlite_master where type='table';"))
        self.assertEqual(tablemaster, [])

    def test_del_table_method(self):
        sqldf = SQLDF(locals())
        cursor = sqldf.conn.cursor()
        # create table
        cursor.execute("create table deltbl(col);")
        sqldf._del_table(["deltbl"])
        self.assertEqual(
            list(
                cursor.execute("select * from sqlite_master where type='table';")),
            [])

    def test_del_table_method_not_exist_table(self):
        sqldf = SQLDF(locals())
        self.assertRaises(
            sqlite3.OperationalError,
            lambda: sqldf._del_table(
                ["deltblaaaaaaa"]))

    def test_set_udf_method(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)
        sqldf._set_udf(self.default_udfs)
        self.assertEqual(
            list(
                conn.execute("select udf1(label) from df;")), [
                ("l1",), ("l2",), ("l3",)])

    def test_set_udaf_method_with_agg_class(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)
        sqldf._set_udaf(self.default_udafs)
        self.assertEqual(
            list(
                conn.execute("select udaf1(label) from df;")), [
                (3,)])

    def test_set_udaf_method_with_agg_function(self):
        sqldf = SQLDF(locals())
        conn = sqldf.conn
        self.default_df.to_sql("df", conn)

        def agg_func(values):
            return len(values)
        sqldf._set_udaf({"mycount": agg_func})
        self.assertEqual(
            list(
                conn.execute("select mycount(label) from df;")), [
                (3,)])

    def test_udf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        def ten(x):
            return 10
        result = SQLDF(locals(), udfs={"ten": ten}).execute(
            "SELECT ten(a) AS ten FROM data;")
        self.assertEqual(len(result), 2)
        self.assertEqual(list(result.columns), ["ten"])
        self.assertEqual(list(result.index), [0, 1])
        self.assertEqual(list(result["ten"]), [10, 10])

    def test_udaf(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]

        class mycount(object):

            def __init__(self):
                super(mycount, self).__init__()
                self.count = 0

            def step(self, x):
                self.count += x

            def finalize(self):
                return self.count
        result = SQLDF(locals(), udafs={"mycount": mycount}).execute(
            "select mycount(a) as mycount from data;")
        self.assertEqual(len(result), 1)
        self.assertEqual(list(result.columns), ["mycount"])
        self.assertEqual(list(result.index), [0])
        self.assertEqual(list(result["mycount"]), [1 + 4])

    def test_no_table(self):
        self.assertRaises(
            Exception, lambda: SQLDF(
                locals()).execute("select * from notable;"))

    def test_invalid_colname(self):
        data = [{"a": "valid", "(b)": "invalid"}]
        sqldf = SQLDF(locals())
        self.assertRaises(
            Exception,
            lambda: sqldf.execute("select * from data;"))

    def test_db_in_fs(self):
        data = [{"a": 1, "b": 2, "c": 3}, {"a": 4, "b": 5, "c": 6}]
        sqldf = SQLDF(locals(), inmemory=False)
        self.assertEqual(os.path.exists(".pysqldf.db"), True)
        sqldf = None  # run GC
        self.assertEqual(os.path.exists(".pysqldf.db"), False)
Exemplo n.º 50
0
# In[163]:

df


# In[151]:

conn = lite.connect('UN_education.db')
cur = conn.cursor()


# In[164]:

cur.execute('DROP TABLE IF EXISTS school_years')
df.to_sql('school_years', conn)


# In[156]:

cur.execute('SELECT * FROM school_years')
for r in cur:
    print r


# In[169]:

print df.describe()


Exemplo n.º 51
0
# coding:UTF-8
Exemplo n.º 52
0
import dota2api

from pandas import DataFrame, Series
import pandas as pd
import numpy as np

import sqlalchemy

from settings import API_KEY
import settings
ACCOUNT_ID = 172282397

mysql_engine = sqlalchemy.create_engine("postgresql://{}@localhost/{}".format(settings.PSQL_USER,settings.PSQL_DB))
#mysql_engine = sqlalchemy.create_engine('mysql+pymysql://root@localhost/bp_helper_db?charset=utf8')
connection = mysql_engine.connect()
api = dota2api.Initialise(API_KEY)

#hero_info
heroes = api.get_heroes(language='zh')["heroes"]
heroes_df = DataFrame(heroes)
heroes_df.to_sql('dota2_hero',mysql_engine,if_exists='replace')
#item_info
items = api.get_game_items(language='zh')["items"]
items_df = DataFrame(items)
items_df.to_sql('dota2_item',mysql_engine,if_exists='replace')
Exemplo n.º 53
0
def run():
    with sqlite3.connect("analyze.db") as con:
        cursor = con.cursor()

        code_magc = {'CODE':[],
                     'NAME':[]}
        code_bband= {'CODE':[],
                     'NAME':[]}
        code_macd = {'CODE':[],
                     'NAME':[]}

        code_arrange = {'CODE':[],
                        'NAME':[]}

        per_bps_pbr = {'CODE':[],
                       'NAME':[],
                       'OPEN':[],
                       'HIGH':[],
                       'LOW':[],
                       'CLOSE':[],
                       'VOLUME':[],
                       'PER':[],
                       'BPS':[],
                       'PBR':[],
                       'INDUSTRY_CODE':[],
                       'INDUSTRY':[]}

        for code, name in get_code_list():
            df = makeDataFrame( code )
            if len(df) == 0: continue

            # per,bps, pbr table
            per , bps = get_per_bps_with_code(code)
            open, high, low, close, volume = get_last_data_with_code(code)
            if bps != 0:
                pbr = close / bps
            else:
                pbr = 0.0
            
            industry_code, industry_name = get_industry_with_code(code)

            per_bps_pbr['CODE'].append(code)
            per_bps_pbr['NAME'].append(name)
            per_bps_pbr['OPEN'].append(open)
            per_bps_pbr['HIGH'].append(high)
            per_bps_pbr['LOW'].append(low)
            per_bps_pbr['CLOSE'].append(close)
            per_bps_pbr['VOLUME'].append(volume)
            per_bps_pbr['PER'].append(per)
            per_bps_pbr['BPS'].append(bps)
            per_bps_pbr['PBR'].append(pbr)
            per_bps_pbr['INDUSTRY_CODE'].append(industry_code)
            per_bps_pbr['INDUSTRY'].append(industry_name)
            get_logger().debug("{} {} {} {} {} {} {} {} {} {}".format(code,name,open,high,low, close,volume,per,bps,pbr))

            arranged = isArrange( df, 60, 120 )
            if arranged == True and pbr <= 1:
                code_arrange['CODE'].append(code)
                code_arrange['NAME'].append(name)
                get_logger().debug("ARRANGE {}{}".format(code, name))
            
            res = isBBandSignal( df, 20 )
            if res == True and pbr <= 1:
                code_bband['CODE'].append(code)
                code_bband['NAME'].append(name)
                get_logger().debug("BBnad lower after up {}{}".format(code,name))

            res = isMAGoldCross( df, 20, 60 )
            if res == True and arranged == True and pbr <= 1:
                code_magc['CODE'].append(code)
                code_magc['NAME'].append(name)
                get_logger().debug("MA20, MA60 Golden Cross {}{}".format(code,name))

            res = isMACDSignal( df, 12, 26, 9)
            if res == True and arranged == True and pbr <= 1:
                code_macd['CODE'].append(code)
                code_macd['NAME'].append(name)
                get_logger().debug("MACD sig {}{}".format(code,name))

        magc = DataFrame(code_magc)
        bband   = DataFrame(code_bband)
        macd = DataFrame(code_macd)
        
        magc.to_sql("MAGC", con, if_exists='replace', chunksize=1000)
        get_logger().debug("MAGC {} saved.".format(len(magc)))
        bband.to_sql("BBAND", con, if_exists='replace', chunksize=1000)
        get_logger().debug("BBAND {} saved.".format(len(bband)))
        macd.to_sql("MACD", con, if_exists='replace', chunksize=1000)
        get_logger().debug("MACD {} saved.".format(len(macd)))

        arrange = DataFrame(code_arrange)
        arrange.to_sql("ARRANGE", con, if_exists='replace', chunksize=1000)
        get_logger().debug("ARRANGE {} saved.".format(len(arrange)))

        per_bps_pbr_df = DataFrame(per_bps_pbr)
        per_bps_pbr_df.to_sql("BPS", con, if_exists='replace', chunksize=1000)
        get_logger().debug("BPS {} saved.".format(len(per_bps_pbr_df)))
Exemplo n.º 54
0
import sqlite3
import pandas as pd
import pandas.io.sql as sql
from pandas import Series, DataFrame

df = DataFrame({'A': [1,2,3]})
#df["id"] = df.index

con = sqlite3.connect("test.db")

df.to_sql("test", con)

Exemplo n.º 55
0
if not (args.apps or args.tests or args.extra or args.conv):
    benches = apps
else:
    if args.apps:
        benches.extend(apps)
    if args.tests:
        benches.extend(tests)
    if args.conv:
        benches.extend(conv)
    if args.extra:
        benches.extend(args.extra)

benches = filter(lambda a: a not in disabled, benches)

print 'Loading:\n  ' + '\n  '.join(benches)

res = DataFrame()

for app in benches:
    try:
        res = res.append(ingest(app))
    except: # IOError,e:
        print 'Skipping missing or malformed: '+app
    #except:
    #    print '\n\nFailed on',app,'\n'
    #    raise

db = create_engine('sqlite:///benchmarks.db')
res.to_sql('benchmarks', db)
res.to_csv('benchmarks.csv')
Exemplo n.º 56
0
if __name__ == '__main__':
    db_name = sys.argv[1]
    db_user = sys.argv[2]
    db_pass = sys.argv[3]

    # Load data for each page
    con = db.connect('localhost',db_user,db_pass,db_name)
    query = 'SELECT rev_user, rev_ip, page_id, page_namespace, name FROM revision ' + \
        'INNER JOIN page ON page.page_id=revision.rev_page ' + \
        'INNER JOIN namespaces ON page.page_namespace=namespaces.code'
    data     = read_sql(query, con)
    con.close()

    # Map IP addresses to country codes
    data['country'] = data['rev_ip'].apply(lambda ip: getCountryCode(ip))
    data['country'] = data['country'].fillna('Unknown')

    # Create country contributions for each page
    con = db.connect('localhost',db_user,db_pass,db_name)
    for page_id, page_revs in data.groupby('page_id'):
        nRevs = len(page_revs)
        cRevs = page_revs.groupby('country').size() / nRevs
    
        # Insert into country_contrib
        # Values: page_id, country (cRevs.keys()), contributions(cRevs.keys())
        df = DataFrame(cRevs, columns=['contribution'] )
        df['page_id'] = page_id
        df.to_sql(con=con, name='country_contrib', if_exists='append', flavor='mysql')
    
    con.close()
Exemplo n.º 57
0
def run():
    global code
    global sell_point

    with sqlite3.connect("backtesting.db") as con:
        cursor = con.cursor()
        
        backtesting_save_data = { 'CODE':[],
                                  'NAME':[],
                                  'STRATEGY':[],
                                  'SELL_PRICE_RATIO':[],
                                  'PORTFOLIO_VALUE':[],
                                  'OPEN':[],
                                  'HIGH':[],
                                  'LOW':[],
                                  'CLOSE':[],
                                  'VOLUME':[],
                                  'PER':[],
                                  'BPS':[],
                                  'PBR':[]}

        for strategys in STRATEGY:
            for code, name in get_code_list_from_analyze(strategys):
                per , bps = get_per_bps_with_code(code)

                open, high, low, close, volume = get_last_data_with_code(code)
                if bps != 0:
                    pbr = close / bps
                else:
                    pbr = 0.0

                get_logger().debug("code : {}. name:{} strategy:{} start".format(code,name,strategys))
                
                last_portfolio = 0
                data = makeBacktestingDataFrame(code)
                for point in SELL_PRICE_RATIO:
                    sell_point = point
                    if strategys == 'MAGC':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_magc, handle_data=handle_data_magc, identifiers=[code]  )
                        results = algo.run(data)
                    elif strategys == 'MACD':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_macd, handle_data=handle_data_macd, identifiers=[code]  )
                        results = algo.run(data)
                    elif strategys == 'BBAND':
                        algo = TradingAlgorithm(capital_base=10000000, initialize=initialize_bband, handle_data=handle_data_bband, identifiers=[code]  )
                        results = algo.run(data)
                    
                    portfolio = results['portfolio_value'][-1]
                    if last_portfolio < portfolio:
                        if last_portfolio != 0 :
                            backtesting_save_data['CODE'].pop()
                            backtesting_save_data['NAME'].pop()
                            backtesting_save_data['STRATEGY'].pop()
                            backtesting_save_data['SELL_PRICE_RATIO'].pop()
                            backtesting_save_data['PORTFOLIO_VALUE'].pop()
                            backtesting_save_data['OPEN'].pop()
                            backtesting_save_data['HIGH'].pop()
                            backtesting_save_data['LOW'].pop()
                            backtesting_save_data['CLOSE'].pop()
                            backtesting_save_data['VOLUME'].pop()
                            backtesting_save_data['PER'].pop()
                            backtesting_save_data['BPS'].pop()
                            backtesting_save_data['PBR'].pop()

                        backtesting_save_data['CODE'].append(code)
                        backtesting_save_data['NAME'].append(name)
                        backtesting_save_data['STRATEGY'].append(strategys)
                        backtesting_save_data['SELL_PRICE_RATIO'].append('{}'.format(point))
                        backtesting_save_data['PORTFOLIO_VALUE'].append(portfolio)
                        backtesting_save_data['OPEN'].append(open)
                        backtesting_save_data['HIGH'].append(high)
                        backtesting_save_data['LOW'].append(low)
                        backtesting_save_data['CLOSE'].append(close)
                        backtesting_save_data['VOLUME'].append(volume)
                        backtesting_save_data['PER'].append(per)
                        backtesting_save_data['BPS'].append(bps)
                        backtesting_save_data['PBR'].append(pbr)
                        last_portfolio = portfolio

        backtesting_save_df = DataFrame(backtesting_save_data)
               
        backtesting_save_df.to_sql('BACK', con, if_exists='replace', chunksize=1000)
        get_logger().debug("code : {}. name:{} strategy:{} end".format(code,name,strategys))
Exemplo n.º 58
0
def load_audit_logs_into_postgres(options):
    session = ValkfleetConnector().db
    engine = WarehouseConnector().db

    log.info('Loading audit points for {start} to {stop} (batches of {n})'.format(
        start=options.start,
        stop=NOW,
        n=options.batch_size)
    )

    # Avoid loading data twice: grab the latest timestamp in the database
    result = engine.execute(sql.statements[1])
    resume_from = args.start

    if result.rowcount:
        last_timestamp = list(result)[0][0] + timedelta(milliseconds=1)
        if last_timestamp > args.start:
            resume_from = last_timestamp
            log.info('Resuming at %s', resume_from)

    cursor = None
    more = True
    batch_counter = 0
    batch_max_timestamp = None

    while more:
        url = ENDPOINT + '?filter=timestamp gt {start}&orderby=timestamp&batch_size={batch_size}'.format(
            start=resume_from.isoformat(),
            batch_size=options.batch_size
        )
        if cursor:
            url += '&cursor=%s' % cursor

        log.info('Requesting %s', url)
        response = session.get(url)
        json = response.json()

        if 'error' not in json.keys():
            records = json['items']
            more = json['more']
            cursor = json['cursor']

            def add_foreign_key(records_):
                for record in records_:
                    if record['event'] != 'acceptRoute-clicked':
                        record.update({'delivery_uuid': record['metadata']['delivery']})
                        del record['metadata']
                        yield record

            fc_records = list(add_foreign_key(records))
            batch = DataFrame().from_records(fc_records)

            if batch.empty:
                log.warning('Batch %s is empty', batch_counter)

            batch.to_sql(TABLE, engine,
                         schema=SCHEMA,
                         if_exists='append',
                         index=False)

            # The time columns are actually strings
            batch_min_timestamp = batch['timestamp'].min()[:19]
            batch_max_timestamp = batch['timestamp'].max()[:19]

            batch_counter += 1
            kwargs = dict(schema=SCHEMA,
                          table=TABLE,
                          n=batch_counter,
                          records=batch.shape[0],
                          fields=batch.shape[1],
                          min=batch_min_timestamp,
                          max=batch_max_timestamp)
            log.info('Loaded batch {n} into table {schema}.{table} '
                     '({records} records, {fields} fields):'
                     '{min} to {max}'.format(**kwargs))

        else:
            message = 'Lost the cursor on batch {n} {time}): {status} {error}'.format(
                n=batch_counter,
                time=batch_max_timestamp,
                status=response.status_code,
                error=response.json()
            )
            log.error(message, exc_info=True)
            exit(message)

    log.info('Finished loading tracks for %s to %s', options.start, NOW)
Exemplo n.º 59
0
def run():
    with sqlite3.connect("price.db") as con:
        cursor = con.cursor()

        def _make_long_date(date):
            return date.year * 10000 + date.month * 100 + date.day

        for code, name in get_code_list():

            table_name = code
            
            recent_date = get_last_update_date(table_name)
            if recent_date is None: recent_date = START_DATE

            if recent_date.date() == datetime.now().date(): continue

            if recent_date.weekday() == 4 and datetime.now() - recent_date < timedelta(hours=64): continue

            start_date = (recent_date + timedelta(days=1))
            end_date = datetime.now()

            if datetime.now().hour < 16: end_date = datetime.now() - timedelta(days=1)

            if start_date > end_date: continue

            instStockChart = win32com.client.Dispatch("CpSysDib.StockChart")

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_CODE, code)
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_DATE_OR_COUNT, CPSTOCKCHART_REQ_PARAM_DATE)

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_END_DATE,_make_long_date(end_date))
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_START_DATE,_make_long_date(start_date))

            instStockChart.SetInputValue(CPSTOCKCHART_REQ_FIELD, 
                                            [CPSTOCKCHART_REQ_PARAM_FIELD_DATE,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_OPEN,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_HIGH,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_LOW,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_CLOSE,
                                            CPSTOCKCHART_REQ_PARAM_FIELD_VOLUME])
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_TYPE, CPSTOCKCHART_REQ_TYPE_PARAM_DAY)
            instStockChart.SetInputValue(CPSTOCKCHART_REQ_ADJ, CPSTOCKCHART_REQ_ADJ_PARAM_ADJUST)

            instStockChart.BlockRequest()

            numData = instStockChart.GetHeaderValue(CPSTOCKCHART_RES_DATA_COUNT)
            price_data = {'DATE':[],
                          'OPEN':[],
                          'HIGH':[],
                          'LOW':[],
                          'CLOSE':[],
                          'VOLUME':[]}

            # cybos plus 최근데이터 부터 온다
            for i in reversed(range(numData)):
                
                long_date = instStockChart.GetDataValue(0, i)
                year = int(long_date / 10000)
                month = int(long_date / 100) % 100
                day = long_date % 100
                dateval = datetime(year, month, day, 0, 0, 0)

                open = instStockChart.GetDataValue(1, i)
                high = instStockChart.GetDataValue(2, i)
                low = instStockChart.GetDataValue(3, i)
                close = instStockChart.GetDataValue(4, i)
                volume = instStockChart.GetDataValue(5, i)

                price_data['DATE'].append(dateval)
                price_data['OPEN'].append(open)
                price_data['HIGH'].append(high)
                price_data['LOW'].append(low)
                price_data['CLOSE'].append(close)
                price_data['VOLUME'].append(volume)

            price = DataFrame(price_data)
            price.to_sql(table_name, con, if_exists='append', chunksize=1000)
            get_logger().debug("{} 종목의 {}데이터를 저장 하였습니다.".format(code, len(price)))

            #remove old data
            row = cursor.execute("SELECT COUNT(*) FROM '{}'".format(table_name)).fetchone()
            if row[0] > 600:
                row = cursor.execute("DELETE FROM '{}' WHERE DATE = (SELECT MIN(DATE) FROM '{}')".format(table_name, table_name))
    for name, params in tables.items():
        new_df = DataFrame()
        print(' -> Clean up {} dataframe'.format(name))
        for year in Years:
            # clean up the table
            clean_func = params[0]
            tmp_df = clean_func(year)

            if new_df.empty:
                new_df = tmp_df
            else:
                new_df = new_df.append(tmp_df, (params[1] == 'index'))

        print(' -> Load {} dataframe to SQL database'.format(name))

        if options.replace:
            if_exists = 'replace'
        else:
            if_exists = 'append'

        sql_options = {'con':sqlEngine, 'if_exists':if_exists,
                'index':True, 'chunksize':100}

        new_df.to_sql(name=name, **sql_options)

        # We want the index to be a primary key to speed things up among other reasons.
        sqlEngine.execute('''ALTER TABLE {}
                ADD PRIMARY KEY (`{}`)'''.format(name,params[1]))