示例#1
0
    def test_to_sql_index_label(self):
        temp_frame = DataFrame({'col1': range(4)})

        # no index name, defaults to 'index'
        sql.to_sql(temp_frame, 'test_index_label', self.conn)
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'index')

        # specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")

        # using the index name
        temp_frame.index.name = 'index_name'
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace')
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'index_name',
                         "Index name not written to database")

        # has index name, but specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label='other_label')
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'other_label',
                         "Specified index_label not written to database")
示例#2
0
    def test_date_parsing(self):
        # Test date parsing in read_sq
        # No Parsing
        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite')
        self.assertFalse(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite', parse_dates=['DateCol'])
        self.assertTrue(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite',
                                parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'})
        self.assertTrue(
            issubclass(df.DateCol.dtype.type, np.datetime64),
            "DateCol loaded with incorrect type")

        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite', parse_dates=['IntDateCol'])

        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")

        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite', parse_dates={'IntDateCol': 's'})

        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")
示例#3
0
文件: reader.py 项目: olegarch/hodim
def read():
    pd.set_option('display.encoding','utf-8')
    
    engine = create_engine('mysql+mysqldb://scrapyuser:[email protected]:3306/testdb?charset=utf8', echo=True)
    df = sql.read_sql_query("SELECT url, rooms, floor, totfloors, m2, kitchenm2, restm2, "
                            "wc, walls, ceilings, rennovation, builtdate, heating, "
                            "water, balcony, security, "
                            "x(location) as lat, y(location) as lon, price "
                            "FROM realestate "
                            #"WHERE security IS NOT NULL "
                            #"LIMIT 10"
                            ,engine)
    print df.shape
    
    df.loc[(df.balcony == u'есть')|(df.balcony == u'да'),'balcony'] = 1
    df.loc[df.balcony == u'нет','balcony'] = -1
    df.loc[df.balcony.isnull(),'balcony'] = 0

    #df.loc[(df.security == u'есть')|(df.security == u'да'),'security'] = 1
    #df.loc[df.security.isnull(),'security'] = 0

    # панельный - 1; кирпичный - 2; монолит - 3
    #df.loc[(df.walls == u'панельный')|(df.security == u'да'),'security'] = 1
    
    print df
    df.to_csv('data.csv', sep=',', encoding='utf-8', index=True)
示例#4
0
    def test_sql_open_close(self):
        """
        Test if the IO in the database still work if the connection
        is closed between the writing and reading (as in many real
        situations).
        """

        self._load_test2_data()

        with tm.ensure_clean() as name:

            conn = self.connect(name)

            sql.to_sql(
                self.test_frame2,
                "test_frame2_legacy",
                conn,
                flavor="sqlite",
                index=False,
            )

            conn.close()
            conn = self.connect(name)

            result = sql.read_sql_query(
                "SELECT * FROM test_frame2_legacy;",
                conn,
                flavor="sqlite",
            )

            conn.close()

        tm.assert_frame_equal(self.test_frame2, result)
def get_stock_info():
    '''
            从companyclassified中提取股票信息,这个表会每天进行更新,以获取最新的数据
            包括股票代码,上市日期,市盈率等信息
    '''
    sql_str="SELECT *  FROM  stock_company.`company_basic_info`";
    rs=sql.read_sql_query(sql=sql_str, con=conn, index_col='code', coerce_float=True)
    return rs
示例#6
0
def getStockCodeListForStockHolder(reportDate):
    df_ap = ts.get_stock_basics()    
    mysql_conn  = pyodbc.connect(conn_info,charset='utf8')
    sql ="select distinct code from stock_holder_info t where date(t.report_date) = '"+reportDate+"';"
    df_exist =  psql.read_sql_query(sql, mysql_conn)
    df_result = df_ap[~df_ap.index.isin(df_exist.code)]
    mysql_conn.close() 
    return list(df_result.index)
示例#7
0
def get_case_data(db):
    return sql.read_sql_query(select([Case.name,
                                      Case.winning_side,
                                      Case.facts,
                                      Case.dec_type,
                                      Case.dec_date,
                                      Case.id,
                                      Case.scdb_id]), con=db.engine, index_col='id')
示例#8
0
def getStockCodeListForHistTran(start_date):
    df_ap = ts.get_stock_basics()    
    mysql_conn  = pyodbc.connect(conn_info,charset='utf8')
    sql ="select distinct code from his_trans t where t.tran_date > '"+start_date+"'"
    df_exist =  psql.read_sql_query(sql, mysql_conn)
    df_result = df_ap[~df_ap.index.isin(df_exist.code)]
    mysql_conn.close() 
    return list(df_result.index)
示例#9
0
文件: equity.py 项目: garyjoy/pyswing
    def dataFrame(self):

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = "select * from Equities where Code = '%s'" % (self._tickerCode)
        equityData = read_sql_query(query, connection, 'Date')
        connection.close()

        return equityData
示例#10
0
    def calculateExitValues(self):

        Logger.log(logging.INFO, "Calculating Exit Values", {"scope":__name__, "Rule":self._tableName, "code":self._tickerCode})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        self._selectBuyQuery = "select e.Date as Date, e.Date as TradeDate, e.Code, e.Open, e.Close, e.High, e.Low, x.Type, x.ExitValue, x.NumberOfDays, x.ExitDetail from Equities e left join '%s' x on e.Date = x.MatchDate and e.Code = x.Code and x.Type = 'Buy' where e.Code = '%s' and x.ExitValue is NULL" % (self._tableName, self._tickerCode)
        self._buyExitValueDataFrame = read_sql_query(self._selectBuyQuery, connection, "Date")

        numberOfRows = self._buyExitValueDataFrame.shape[0]
        for i in range(0, numberOfRows):
            self.calculateExitValueForBuy(i, numberOfRows - i)

        self._buyExitValueDataFrame.drop('Open', axis=1, inplace=True)
        self._buyExitValueDataFrame.drop('Close', axis=1, inplace=True)
        self._buyExitValueDataFrame.drop('High', axis=1, inplace=True)
        self._buyExitValueDataFrame.drop('Low', axis=1, inplace=True)
        self._buyExitValueDataFrame['MatchDate'] = self._buyExitValueDataFrame['TradeDate'].shift(1)
        self._buyExitValueDataFrame.drop('TradeDate', axis=1, inplace=True)

        newRecords = self._buyExitValueDataFrame.query("Type=='Buy'")
        connection.executemany(self._insertQuery, newRecords.to_records(index=True))
        connection.commit()

        self._selectSellQuery = "select e.Date as Date, e.Date as TradeDate, e.Code, e.Open, e.Close, e.High, e.Low, x.Type, x.ExitValue, x.NumberOfDays, x.ExitDetail from Equities e left join '%s' x on e.Date = x.MatchDate and e.Code = x.Code and x.Type = 'Sell' where e.Code = '%s' and x.ExitValue is NULL" % (self._tableName, self._tickerCode)
        self._sellExitValueDataFrame = read_sql_query(self._selectSellQuery, connection, "Date")

        numberOfRows = self._sellExitValueDataFrame.shape[0]
        for i in range(0, numberOfRows):
            self.calculateExitValueForSell(i, numberOfRows - i)

        self._sellExitValueDataFrame.drop('Open', axis=1, inplace=True)
        self._sellExitValueDataFrame.drop('Close', axis=1, inplace=True)
        self._sellExitValueDataFrame.drop('High', axis=1, inplace=True)
        self._sellExitValueDataFrame.drop('Low', axis=1, inplace=True)
        self._sellExitValueDataFrame['MatchDate'] = self._sellExitValueDataFrame['TradeDate'].shift(1)
        self._sellExitValueDataFrame.drop('TradeDate', axis=1, inplace=True)

        newRecords = self._sellExitValueDataFrame.query("Type=='Sell'")
        connection.executemany(self._insertQuery, newRecords.to_records(index=True))
        connection.commit()

        connection.close()
示例#11
0
    def test_read_sql_delegate(self):
        iris_frame1 = sql.read_sql_query(
            "SELECT * FROM iris", self.conn, flavor=self.flavor)
        iris_frame2 = sql.read_sql(
            "SELECT * FROM iris", self.conn, flavor=self.flavor)
        tm.assert_frame_equal(iris_frame1, iris_frame2,
                              "read_sql and read_sql_query have not the same"
                              " result with a query")

        self.assertRaises(ValueError, sql.read_sql, 'iris', self.conn,
                          flavor=self.flavor)
示例#12
0
    def test_read_sql_delegate(self):
        iris_frame1 = sql.read_sql_query(
            "SELECT * FROM iris", self.conn)
        iris_frame2 = sql.read_sql(
            "SELECT * FROM iris", self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2,
                              "read_sql and read_sql_query have not the same"
                              " result with a query")

        iris_frame1 = sql.read_sql_table('iris', self.conn)
        iris_frame2 = sql.read_sql('iris', self.conn)
        tm.assert_frame_equal(iris_frame1, iris_frame2)
示例#13
0
    def test_date_and_index(self):
        # Test case where same column appears in parse_date and index_col

        df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn,
                                flavor='sqlite', index_col='DateCol',
                                parse_dates=['DateCol', 'IntDateCol'])

        self.assertTrue(issubclass(df.index.dtype.type, np.datetime64),
                        "DateCol loaded with incorrect type")

        self.assertTrue(issubclass(df.IntDateCol.dtype.type, np.datetime64),
                        "IntDateCol loaded with incorrect type")
示例#14
0
def to_df(queryset):
    """
    :param queryset: django.db.models.query.QuerySet
    :return: pandas.core.frame.DataFrame
    """
    try:
        query, params = queryset.query.sql_with_params()
    except EmptyResultSet:
        # Occurs when Django tries to create an expression for a
        # query which will certainly be empty
        # e.g. Book.objects.filter(author__in=[])
        return pd.DataFrame()
    return read_sql_query(query, connection, params=params)
示例#15
0
def get_data(code,excel):
    conn=pymysql.connect(**config)
    sql_st="select Date as date,Open as open,High as high,Close as close,Low as low,Amount as volume, \
      AmountPrice as amount,Resumption as factor from StockPrice where Date>'2005-01-03' and\
      Date<'2016-07-15' and ShortID={} order by Date desc;".format(i)
    df1=ts.get_h_data(code=code,start='2005-01-04',end='2016-07-04',autype='hfq',drop_factor=False)
    df2=sql.read_sql_query(sql_st,conn,index_col='date')
    df3=df1-df2
    df=df3[(abs(df3.open)>=0.01)|(abs(df3.close)>=0.01)|(abs(df3.low)>=0.01)|(abs(df3.volume)>=0.01)|(abs(df3.amount)>=0.01)]
    if not df.empty:
        #对df增加一列,列的内容是股票的代码
            df.insert(len(df.columns), 'code_name', value=int(i))
            df.to_excel(excel,sheet_name=code)
示例#16
0
    def test_roundtrip(self):
        sql.to_sql(self.test_frame1, 'test_frame_roundtrip',
                   con=self.conn, flavor='sqlite')
        result = sql.read_sql_query(
            'SELECT * FROM test_frame_roundtrip',
            con=self.conn,
            flavor='sqlite')

        # HACK!
        result.index = self.test_frame1.index
        result.set_index('level_0', inplace=True)
        result.index.astype(int)
        result.index.name = None
        tm.assert_frame_equal(result, self.test_frame1)
示例#17
0
    def test_to_sql_index_label_multiindex(self):
        temp_frame = DataFrame({'col1': range(4)},
            index=MultiIndex.from_product([('A0', 'A1'), ('B0', 'B1')]))

        # no index name, defaults to 'level_0' and 'level_1'
        sql.to_sql(temp_frame, 'test_index_label', self.conn)
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[0], 'level_0')
        self.assertEqual(frame.columns[1], 'level_1')

        # specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label=['A', 'B'])
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'],
                         "Specified index_labels not written to database")

        # using the index name
        temp_frame.index.names = ['A', 'B']
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace')
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[:2].tolist(), ['A', 'B'],
                         "Index names not written to database")

        # has index name, but specifying index_label
        sql.to_sql(temp_frame, 'test_index_label', self.conn,
                   if_exists='replace', index_label=['C', 'D'])
        frame = sql.read_sql_query('SELECT * FROM test_index_label', self.conn)
        self.assertEqual(frame.columns[:2].tolist(), ['C', 'D'],
                         "Specified index_labels not written to database")

        # wrong length of index_label
        self.assertRaises(ValueError, sql.to_sql, temp_frame,
                          'test_index_label', self.conn, if_exists='replace',
                          index_label='C')
示例#18
0
    def analyse(self):

        # Logger.log(logging.INFO, "Analyse Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Type":self._type})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        query = self.analyseStrategySql % (self._rule1, self._rule2, self._rule3, self._exit, self._type)
        self._strategyData = read_sql_query(query, connection, 'Date')
        self._strategyData['ExitValueAfterCosts'] = self._strategyData['ExitValue'] - 0.2
        connection.close()

        exitValueDataFrame = self._strategyData.ix[:,'ExitValueAfterCosts']

        mean = exitValueDataFrame.mean()
        median = exitValueDataFrame.median()
        sum = exitValueDataFrame.sum()
        count = exitValueDataFrame.count()

        tradesPerYear = count / 10
        sharpeRatio = sqrt(tradesPerYear) * exitValueDataFrame.mean() / exitValueDataFrame.std()

        self._strategyData["Sum"] = expanding_sum(exitValueDataFrame)
        self._strategyData["Max"] = expanding_max(self._strategyData["Sum"])
        self._strategyData["Min"] = expanding_min(self._strategyData["Sum"])
        self._strategyData["DD"] = self._strategyData["Max"] - self._strategyData["Min"]

        runningSum = expanding_sum(exitValueDataFrame)
        max2here = expanding_max(runningSum)
        dd2here = runningSum - max2here
        drawDown = dd2here.min()

        Logger.log(logging.INFO, "Analysing Strategy", {"scope":__name__, "Rule 1":self._rule1, "Rule 2":self._rule2, "Rule 3":self._rule3, "Exit":self._exit, "Type":self._type, "Mean":str(mean), "Median":str(median), "Sum":str(sum), "Count":str(count), "SharpeRatio":str(sharpeRatio), "DrawDown":str(drawDown)})

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)
        c = connection.cursor()

        deleteSql = self.deleteStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type)
        c.executescript(deleteSql)
        connection.commit()

        insertSql = self.insertStrategySql % (pyswing.globals.pySwingStrategy, self._rule1, self._rule2, self._rule3, self._exit, self._type, str(mean), str(median), str(sum), str(count), str(sharpeRatio), str(drawDown))
        c.executescript(insertSql)
        connection.commit()

        c.close()
        connection.close()
示例#19
0
def get_dataframe_query_cmp_day(query, user, interval, start_date, end_date, switch_id):
    """
    build sql query return the dataframe
    """
    upd_query = sqlquery
    upd_query = upd_query.replace("#SECOND_INDEX#", "switch_id")
    upd_query = upd_query.replace("#USER_CONDITION#", condition_user(user))
    upd_query = upd_query.replace("#DATEDAY_FORMAT#", "extract(hour from dateday) as dateday")
    upd_query = upd_query.replace("#SWITCH_CONDITION#", condition_switch_id(switch_id))
    upd_query = upd_query.replace("#INTERVAL#", interval)
    upd_query = upd_query.replace("#COUNTRY_CONDITION#", "")
    params = {
        'start_date': start_date,
        'end_date': end_date,
    }
    # df = sql.read_sql_query(upd_query, connection, params=params, index_col=["dateday", "switch_id"])
    df = sql.read_sql_query(upd_query, connection, params=params)
    return df
示例#20
0
    def evaluateRule(self, tickerCode):
        """
        ?

        :param tickerCode:
        """

        self._tickerCode = tickerCode

        start = self._getLatestDate()

        Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)})

        self._restrictedSelectQuery = "%s where r.Code = '%s' and r.Date >= '%s'" % (self._selectQuery, self._tickerCode, start)

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date')

        self._ruleData['LastCrosser'] = self._ruleData['Crosser'].shift(1)
        self._ruleData['LastCrossee'] = self._ruleData['Crossee'].shift(1)

        try:
            self._ruleData['Match'] = (self._ruleData['Crosser'] > self._ruleData['Crossee']) & (self._ruleData['LastCrossee'] > self._ruleData['LastCrosser'])
            self._ruleData['Match'] = self._ruleData['Match'].astype(float)
        except TypeError as e:
            # NOTE:  Throws "TypeError: unorderable types: float() > NoneType()" if there is no data (e.g. SMA_200 for datasets smaller than 200)
            Logger.log(logging.ERROR, "Error Evaluating Rule", {"scope": __name__, "Rule":self._ruleTableName, "exception": str(e)})
            self._ruleData['Match'] = 0.0
            self._ruleData['Match'] = self._ruleData['Match'].astype(float)

        self._ruleData.drop('Crosser', axis=1, inplace=True)
        self._ruleData.drop('Crossee', axis=1, inplace=True)
        self._ruleData.drop('LastCrosser', axis=1, inplace=True)
        self._ruleData.drop('LastCrossee', axis=1, inplace=True)

        newRecords = self._ruleData.query("Date > '%s'" % (str(start)))

        connection.executemany(self._insertQuery, newRecords.to_records(index=True))
        connection.commit()

        connection.close()
示例#21
0
    def __init__(self):
        """
        Class Constructor.
        """

        tableName = "Indicator_ADI"

        self._insertQuery = "insert or replace into %s (Date, ADI, ADI_SUM, ADI_ROC, ADI_EMA) values (?,?,?,?,?)" % (tableName)
        self._selectQuery = "SELECT Date, SUM(CASE WHEN Close > Open THEN 1 ELSE 0 END) - SUM(CASE WHEN Close < Open THEN 1 ELSE 0 END) as ADI FROM Equities group by Date"

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        self._indicatorDataFrame = read_sql_query(self._selectQuery, connection, 'Date')

        self._indicatorDataFrame['ADI'] = self._indicatorDataFrame['ADI'].astype(float)

        self._indicatorDataFrame['ADI_SUM'] = self._indicatorDataFrame['ADI'].cumsum()
        self._indicatorDataFrame['ADI_ROC'] = abstract.ROC(self._indicatorDataFrame, timeperiod=5, price='ADI_SUM')
        self._indicatorDataFrame['ADI_EMA'] = abstract.EMA(self._indicatorDataFrame, timeperiod=5, price='ADI')

        self._tableName = "Indicator_ADI"
示例#22
0
    def executeQuery(self, query=None, throughReload=0, DK=None, DC=None):

        self.connectDB()
        query = self.queryTextEdit.toPlainText()

        if query is None:
            query = str(self.queryTextEdit.toPlainText())
        # try:
        self.pandas = psql.read_sql_query(query, self.cnxn)
        # except Exception:
        #    self.setInfo(('Query failed:', str('')))
        #    df = pd.DataFrame()

        self.data = convert_dataframe_to_orange(self.pandas)

        self.send("Data", self.data)
        self.send("Pandas", self.pandas)
        self.setInfo(("Query returned", "Read " + str(len(self.data)) + " examples!"))
        self.send("Feature Definitions", self.data.domain)
        self.setMeta()
        self.lastQuery = query
示例#23
0
def get_transcript_data(db):
    turn_query = select([Turn, Advocacy.side, Argument.date,
                         Argument.case_id]).where(and_(Section.id == Turn.section_id,
                                 Advocacy.id == Section.advocacy_id,
                                 Advocate.id == Advocacy.advocate_id,
                                 Argument.id == Section.argument_id))
   
    turn_data = sql.read_sql_query(turn_query, con=db.engine, index_col='id')
                                   #parse_dates=['date'])
    turn_data.drop(['section_id', 'advocate_id'], inplace=True, axis=1)
    turn_data.columns = ['kind', 'turn_number', 'text', 'time_start', 'time_end',
                         'justice_id', 'side', 'date', 'case_id']   
    turn_data['length'] = np.abs(turn_data['time_end'] - turn_data['time_start'])
    turn_data.drop(['time_start', 'time_end', 'turn_number'], inplace=True, axis=1)
    turn_data['interrupted'] = turn_data['text'].str.endswith('--').astype(int)
    turn_data['interruption'] = turn_data['interrupted'].shift(1).fillna(False)
    #turn_data['gender'] = turn_data['gender'].apply(gender_encode)
    turn_data['choppiness'] = (turn_data['text'].str.count('--')>1).astype(int)
    turn_data['humor'] = turn_data['text'].str.contains(r'\[Laughter\]').astype(int)
    turn_data['question'] = turn_data['text'].str.contains(r'[?]').astype(int)
    return turn_data
示例#24
0
    def executeQuery(self, query=None, throughReload=0, DK=None, DC=None):

        self.connectDB()
        query = self.queryTextEdit.toPlainText()

        if query is None:
            query = str(self.queryTextEdit.toPlainText())
        # try:
        self.pandas = psql.read_sql_query(query, self.cnxn)
        # except Exception:
        #    self.setInfo(('Query failed:', str('')))
        #    df = pd.DataFrame()

        self.data = convert_dataframe_to_orange(self.pandas)

        self.send("Data", self.data)
        self.send("Pandas", self.pandas)
        self.setInfo(
            ('Query returned', 'Read ' + str(len(self.data)) + ' examples!'))
        self.send("Feature Definitions", self.data.domain)
        self.setMeta()
        self.lastQuery = query
示例#25
0
def sql_one(limit_num=50):
    """
	This version of the sql API allows us to set a number that we want to 
	limit our sample query to, it will also naively set the table result to 
	json (note that you'd want to probably have some more logic in a real 
	sitaution, but turning a pandas table into a json so that it can be used 
	as part of an API can be a powerful tool if you are using it properly. 
	This also demonstrates connection info. I may also show a twist on this 
	where we would also pass in the dbname as well.
	"""

    conn = ps2.connect(
    dbname = config.dbname, \
    host = config.host, \
    port = config.port, \
    user = config.user, \
    password = config.password)

    q1 = f'''SELECT * FROM film ORDER BY film_id DESC LIMIT {limit_num}'''
    data = sqlio.read_sql_query(q1, conn)
    print(data)
    return (data.to_json())
def sql_total_activeppl_in_postcode(conn):

    sql_get_totalactiveppl_in_postcode = """
    with activepeople_activityaddress as
    (
    SELECT work_address, count(individual_id) as number_pplactive
    FROM """ + table_individual_by_id + """
    WHERE age_category_id != 0 --- young children, 0-3 yr old
    GROUP BY work_address
    ),
    activepeople_homeaddress as
    (
    SELECT home_address, count(individual_id) as number_pplliving
    FROM """ + table_individual_by_id + """
    WHERE age_category_id != 0 --- young children, 0-3 yr old
    GROUP BY home_address
    ),
    ppl_counting as 
    (
    SELECT a.address_id, a.sla_postcode, 
    CASE WHEN number_pplactive is NULL THEN 0 ELSE number_pplactive END AS number_pplactive, 
    CASE WHEN number_pplliving is NULL THEN 0 ELSE number_pplliving END AS number_pplliving
    FROM """ + table_sla_addresses + """ a
    LEFT JOIN activepeople_activityaddress b ON a.address_id = b.work_address
    LEFT JOIN activepeople_homeaddress c ON a.address_id = c.number_pplliving
    ),
    address_total_counting as
    (
    SELECT address_id, sla_postcode, number_pplactive, number_pplliving, number_pplactive + number_pplliving as total_ppl 
    FROM ppl_counting
    )
    SELECT sla_postcode, sum(number_pplactive) as number_pplactive, sum(number_pplliving) as number_pplliving, sum(total_ppl) as total_ppl
    FROM address_total_counting
    GROUP BY sla_postcode
    """

    df_pplcount_inpostcode = sqlio.read_sql_query(
        sql_get_totalactiveppl_in_postcode, conn)
    return df_pplcount_inpostcode
def correlation_funda_data_sellingprice_sellingtime():
    #start connection with database
    with open('db_login.txt', 'r') as myfile:
        data = myfile.read()
    conn = psycopg2.connect(data)
    cur = conn.cursor()

    #Create dataframe to select columns of housing_data
    housinginfo_sellingpricetime_table = "SELECT sellingPrice, fullDescription, houseType, categoryObject, yearOfBuilding, garden, parcelSurface, numberRooms, numberBathrooms, energylabelClass, surface, sellingtime FROM funda;"
    housinginfo_sellingpricetime = sqlio.read_sql_query(
        housinginfo_sellingpricetime_table, conn)

    #Look for correlations between columns housing_data and sellingprice and sellingtime
    print(housinginfo_sellingpricetime.corr(method='pearson'))
    '''' 
    Conclusions with regard to sellingprice: 1)garden+sellingprice=-0,258484 2)parcelSurface+sellingprice=0.076516 3)numberrooms+sellingprice=0.100043 
    4)numberbathrooms+sellingprice=0.069725 5)surface+sellingprice=0.580748 6)sellingtime+sellingprice=0.145279
    
    Conclusion with reagrd to sellingtime: 1)garden+sellingtime=0.145279 2)garden+sellingtime=-0.085790 3)parcelsurface+sellingtime=0.002927 
    4)numberrooms+sellingtime= 0.136939 5)numberbathrooms+sellingtime=-0.073602 6)surface+sellingtime=0.153849'''

    return print('Analysis succesfully done')
def correlation_crime_info():
    #Start connection with database
    with open('db_login.txt', 'r') as myfile:
        data = myfile.read()
    conn = psycopg2.connect(data)
    cur = conn.cursor()

    #Select municipality name, sellingprice, sellingtime and number of national monuments
    crime_info_sellingtime_and_price_table = "SELECT sellingPrice, MunicipalityCode, sellingtime, Number_of_registered_crimes FROM funda NATURAL LEFT JOIN zipcodes NATURAL LEFT JOIN crime_info;"
    crime_info_sellingtime_and_price = sqlio.read_sql_query(
        crime_info_sellingtime_and_price_table, conn)

    #Look for correlations between number of monuments (tourist info) and sellingprice and sellingtime
    print(crime_info_sellingtime_and_price.corr(method='pearson'))

    #Make changes to db persistent
    conn.commit()

    #End connection
    cur.close()
    conn.close()
    return print('Crime info analysis succesfully done')
示例#29
0
    def getDelayedTrains(self, f, t):

        # Open Connection
        self.openConnection()

        query = """
            SELECT rid,tpl,ptd,dep_at,tpl_to,pta,arr_at FROM
                (SELECT rid,tpl,ptd,dep_at FROM nrch_livst_a51 
                WHERE tpl = '{1}'
                AND dep_at IS NOT NULL
                ) AS x
                JOIN
                (SELECT rid AS rid_to,tpl AS tpl_to,pta,arr_at FROM nrch_livst_a51 
                WHERE tpl = '{2}'
                AND arr_at IS NOT NULL
                ) AS y on x.rid = y.rid_to
            WHERE ptd < dep_at
            ORDER BY rid
            """.format(norwich_to_london, f, t)

        # Execute query and get results
        return sqlio.read_sql_query(query, self.connection)
示例#30
0
    def _query(self, sql, return_df=None):
        """
        Execute query and return results

        sql str sql to execute

        return list of sets
        """
        if return_df is not None:
            return_df=return_df
        else:
            return_df=self.return_df

        self._connect()
        with self.conn as conn:
            if return_df:
                return self._df_to_geodf(sqlio.read_sql_query(sql, conn))
            else:
                with conn.cursor() as curs:
                    curs.execute(sql)
                    results = curs.fetchall()
                    return results
def fetch_data(anno):

    engine = create_engine('postgresql://*****:*****@localhost/geonode-imports')
    connection = engine.connect()

    from sqlalchemy.orm import sessionmaker, scoped_session
    conta_sql = "SELECT COUNT (*) FROM conflicts.gd_" + str(anno) + ";"
    Session = scoped_session(sessionmaker(bind=engine))
    s = Session()
    num_records = list(s.execute(conta_sql))[0][0]
    #print num_records

    stringa_sql = 'SELECT "SQLDATE","Actor1Code","GoldsteinScale" FROM conflicts.gd_' + str(anno) + ';'
    #stringa_sql = "SELECT * FROM sparc_wfp_areas;"
    #print stringa_sql

    df = psql.read_sql_query(stringa_sql, con=engine)
    #print df.columns.values
    #print df.describe()
    connection.close()

    return df
示例#32
0
def get_dataframe_query(query, user, interval, start_date, end_date, switch_id, country_id_list, second_index):
    """
    build sql query return the dataframe
    """
    upd_query = sqlquery
    upd_query = upd_query.replace("#SECOND_INDEX#", second_index)
    upd_query = upd_query.replace("#USER_CONDITION#", condition_user(user))
    upd_query = upd_query.replace("#DATEDAY_FORMAT#", "dateday AS dateday")
    upd_query = upd_query.replace("#SWITCH_CONDITION#", condition_switch_id(switch_id))
    upd_query = upd_query.replace("#INTERVAL#", interval)
    if country_id_list and len(country_id_list) > 0:
        select_country = ", ".join(str(int(l)) for l in country_id_list)
        upd_query = upd_query.replace("#COUNTRY_CONDITION#", "AND country_id IN (" + select_country + ")")
    else:
        upd_query = upd_query.replace("#COUNTRY_CONDITION#", "")

    params = {
        'start_date': start_date,
        'end_date': end_date,
    }
    df = sql.read_sql_query(upd_query, connection, params=params)
    return df
示例#33
0
    def evaluateRule(self, tickerCode):
        """
        ?.
        """

        self._tickerCode = tickerCode
        start = self._getLatestDate()

        Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)})

        self._restrictedSelectQuery = "%s where t1.Code = '%s' and t1.Date > '%s'" % (self._selectQuery, self._tickerCode, start)

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date')

        self._ruleData['Match'] = self._ruleData['Match'].astype(float)

        connection.executemany(self._insertQuery, self._ruleData.to_records(index=True))
        connection.commit()

        connection.close()
示例#34
0
def get_surficial_markers(host=None, from_memory=True):
    """
    - Description.

    Args:
        Args (str): Args.

    Returns:
        Returns.

    Raises:
        MySQLdb.OperationalError: Error in database connection.

    """
    mc = memory.get_handle()
    sc = memory.server_config()

    if from_memory:
        return mc.get("surficial_markers")

    if not host:
        print("Host defaults to datadb")
        host = sc["resource"]["datadb"]

    query = ("select m2.marker_id, m3.marker_name, m4.site_id from "
             "(select max(history_id) as history_id, "
             "marker_id from marker_history as m1 "
             "group by m1.marker_id "
             ") as m2 "
             "inner join marker_names as m3 "
             "on m2.history_id = m3.history_id "
             "inner join markers as m4 "
             "on m2.marker_id = m4.marker_id ")

    engine = dbio.connect(resource="sensor_data", conn_type=0)
    surficial_markers = psql.read_sql_query(query, engine)
    mc.set("surficial_markers", surficial_markers)

    return surficial_markers
示例#35
0
    def evaluateRule(self, tickerCode):
        """
        ?

        :param tickerCode:
        """

        self._tickerCode = tickerCode

        start = self._getLatestDate()

        Logger.log(logging.INFO, "Evaluating Rule", {"scope":__name__, "Rule":self._ruleTableName, "code":self._tickerCode, "start":str(start)})

        # We can't use self._getLatestDate() because we need data from before that date...
        self._restrictedSelectQuery = "%s where Code = '%s'" % (self._selectQuery, self._tickerCode)

        connection = sqlite3.connect(pyswing.database.pySwingDatabase)

        self._ruleData = read_sql_query(self._restrictedSelectQuery, connection, 'Date')

        self._ruleData['Relative'] = self._ruleData[self._indicatorColumn].shift(self._relativeIndex * -1)

        if self._comparison == Comparison.GreaterThan :
            self._ruleData['Match'] = self._ruleData[self._indicatorColumn] > self._multiplier * self._ruleData['Relative']
        else:
            self._ruleData['Match'] = self._ruleData[self._indicatorColumn] < self._multiplier * self._ruleData['Relative']

        self._ruleData['Match'] = self._ruleData['Match'].astype(float)

        self._ruleData.drop('Relative', axis=1, inplace=True)
        self._ruleData.drop(self._indicatorColumn, axis=1, inplace=True)

        newRecords = self._ruleData.query("Date > '%s'" % (str(start)))

        connection.executemany(self._insertQuery, newRecords.to_records(index=True))
        connection.commit()

        connection.close()
def calculate_lead_changes():
    
    # Query the database
    the_data = sql.read_sql_query("SELECT game_id, current_score_home, current_score_away FROM mikes_db.ncaa_pxp_detail_2015 where bool_non_play_event not in ('1');", db)
    
    # Get a uique list of the games
    unique_game_list = the_data.loc[:, 'game_id'].unique()
    
    all_lead_chg_summaries = []
    
    for game in unique_game_list:
        the_game_id = str(game)
        
        # Subset the data
        the_data_subset = the_data[the_data.game_id == str(the_game_id)]
        
        # If positive, the home team is ahead
        the_data_subset['current_score_diff'] = the_data_subset['current_score_home'].astype(int) - the_data_subset['current_score_away'].astype(int)
        # If positive, the home team is ahead
        the_data_subset['current_score_sign'] = np.sign(the_data_subset['current_score_diff'])
        # Get the sign of the previus play
        the_data_subset['prev_score_sign'] = np.sign(the_data_subset['current_score_diff'].shift())
        # There will be an NaN at the beginning, give it a value of 0
        the_data_subset['prev_score_sign'] = the_data_subset['prev_score_sign'].fillna(0)
        # if the sign of the current play and the last play are the same, then there was no lead change, otherwise there was
        the_data_subset['lead_change_bool'] = np.where(the_data_subset['prev_score_sign'] == the_data_subset['current_score_sign'], 0, 1)
        
        nLeadChanges = the_data_subset['lead_change_bool'].sum()
        
        print [the_game_id, nLeadChanges]
        
        all_lead_chg_summaries.append([the_game_id, nLeadChanges])
    
    all_lead_chg_summaries = pandas.DataFrame(all_lead_chg_summaries) 
    all_lead_chg_summaries.to_csv('/home/mrhodes/Documents/Code/Eclipse_Workspaces/NCAABasketballAnalysis/Sample_score_Diff.csv')
        
        
    return the_data_subset
示例#37
0
def etl_data(db_data, pull_table, target_table, chunk_size):
    standardize = StandardScaler()
    offset = 0  # For counting the chunk
    chunk = chunk_size  # Set chunk variable

    # Connect to database for count of rows in table
    connection = pymysql.connect(**db_data)

    schema = db_data['db']

    # Connect to database for sqlalchemy -- mysql as a generic example
    engine_string = ("""mysql+pymysql://{}:{}@{}:{}/{}""").format(db_data['user'],db_data['password'],db_data['host'],db_data['port'],db_data['db'])

    with connection.cursor() as cursor:
        cursor.execute(("""SELECT COUNT(1) FROM {}.{}""").format(schema, pull_table))
        count = cursor.fetchone()
        row_count = count['COUNT(1)']
        print("Starting smoothing of " + str(row_count))
        try:
            print('Starting chunks...')
            while offset < row_count:
            raw = sql.read_sql_query(("""SELECT * FROM {}.{} LIMIT {} OFFSET{}""").format(schema, pull_table, chunk, offset))
            data = data.fillna(data.median())  # Fill NaN to avoid analysis issues - Median as example
            data.loc[:, data.dtypes != object] = standardize.fit_transform(data.loc[:, data.dtypes != object])
            try:
                engine = create_engine(engine_string, echo=False)
                # Warning: 'replace' will drop and recreate the table - read to_sql documentation
                data.to_sql(name = target_table, con = engine, if_exists = 'replace', index = False)
            except Exception as ex:
                print(ex)
            offset += chunk
            print("Up to " + str(offset) + "\tchunked rows transformed.")
            if offset >= row_count:
                print("Done:\n Offset: " + str(offset) + "\nRow Count: " + str(row_count))
                break
        except Exception as ex:
            print(ex)
        connection.close()
示例#38
0
def _load_table_config_file(executer_instance, cube_obj):
    """
    Load tables from config file.

    :param cube_obj: cubes object
    :return: tables dict with table name as key and DataFrame as value
    """
    tables = {}
    # just one facts table right now
    executer_instance.facts = cube_obj.facts[0].table_name

    db = MyDB(db_config_file_path=os.path.dirname(executer_instance.cube_path),
              db=executer_instance.cube)

    for dimension in cube_obj.dimensions:

        df = psql.read_sql_query("SELECT * FROM {0}".format(dimension.name),
                                 db.engine)
        # only certain columns
        if dimension.columns.keys():
            df = df[dimension.columns.keys()]

        # change table display name
        if dimension.displayName:
            table_name = dimension.displayName
        else:
            table_name = dimension.name

        # rename columns if value not None
        df.rename(columns=(dict(
            (k, v) for k, v in dimension.columns.items() if v)),
                  inplace=True)

        tables[table_name] = df[[
            col for col in df.columns if col.lower()[-2:] != 'id'
        ]]

    return tables
示例#39
0
def get_feature_info(connection):
    query = '''
    select a.id,name,description, category, b.is_categorical, b.lower_bound, b.upper_bound FROM features a, data_ranges b WHERE a.id=b.id;
    '''

    df = sqlio.read_sql_query(query, connection)

    feature_ids = df['id'].values
    feature_names = df['name'].values
    feature_desc = df['description'].values
    feature_categ = df['category'].values
    feature_type = df['is_categorical'].values
    feature_lb = df['lower_bound'].values
    feature_ub = df['upper_bound'].values

    feature_info = {}
    for i in range(len(feature_ids)):
        feature_info[feature_ids[i]] = [
            feature_names[i], feature_desc[i], feature_categ[i],
            feature_type[i], feature_lb[i], feature_ub[i]
        ]

    return feature_info
示例#40
0
    def get_glue_jobs_from_db(self):
        """
        get all glue jobs from the 'jobs' table. This function returns a pandas sql data frame
        """
        conn, cur = self.create_db_conn()
        sql_stmt = sq.select_from_jobs
        df, err = None, None
        try:
            cur.execute(sq.use_schema)
            df = sqlio.read_sql_query(sql_stmt, conn)
        except Exception as e:
            log.info("Error: select *")
            log.error(e)
            err = e
            # raise
        finally:
            if conn:
                cur.close()
                conn.close()
                log.info("successfully closed the db connection")

        # log.info("successfully executed function 'get_glue_jobs_from_db'")
        return df, err
def recommend(movie_user_likes, cosine_sim):
    with psycopg2.connect(user="******",
                          password="******",
                          database="Movie Rec") as connection:
        cmd = '''SELECT * FROM "Everything";'''
        df = sqlio.read_sql_query(cmd, connection)

    # cosine_sim = np.load('D:\\Documents\\Python\\movieRec\\similarity.npy')

    def get_index_from_title(title):
        return df[df.Title == title].index.values[0]

    movie_index = get_index_from_title(movie_user_likes)
    similar_movies = list(enumerate(cosine_sim[movie_index]))
    sorted_similar_movies = sorted(similar_movies,
                                   key=lambda x: x[1],
                                   reverse=True)

    recommendations = [(df[df.index == i[0]]["Title"].values[0],
                        df[df.index == i[0]]["Image URL"].values[0],
                        df[df.index == i[0]]["IMDB ID"].values[0])
                       for i in sorted_similar_movies[1:6]]
    return recommendations
示例#42
0
    def test_sql_open_close(self):
        # Test if the IO in the database still work if the connection closed
        # between the writing and reading (as in many real situations).

        self._load_test2_data()

        with tm.ensure_clean() as name:

            conn = self.connect(name)
            sql.to_sql(self.test_frame2,
                       "test_frame2_legacy",
                       conn,
                       flavor="sqlite",
                       index=False)
            conn.close()

            conn = self.connect(name)
            result = sql.read_sql_query("SELECT * FROM test_frame2_legacy;",
                                        conn,
                                        flavor="sqlite")
            conn.close()

        tm.assert_frame_equal(self.test_frame2, result)
示例#43
0
def ListDrugdruginteractions(dbcon, fout=None):
    sql = """\
SELECT
	ddi.id AS ddi_id,
	ddi.drug_class1,
	ddi.drug_class2,
	ddi.source_id,
	drug_class1.id drug_class_id1,
	drug_class1.source source1,
	drug_class1.is_group is_group1,
	drug_class2.id drug_class_id2,
	drug_class2.source source2,
	drug_class2.is_group is_group2
FROM
	ddi
JOIN drug_class drug_class1 ON drug_class1.name = ddi.drug_class1
JOIN drug_class drug_class2 ON drug_class2.name = ddi.drug_class2
"""
    logging.debug(f"SQL: {sql}")
    df = read_sql_query(sql, dbcon)
    if fout: df.to_csv(fout, "\t", index=False)
    logging.info(f"n_out: {df.shape[0]}")
    return df
示例#44
0
def fit_anom_sql(min_price=.15, min_quant=30, days_released=45):
    """
    A single SQL query to skip the filtering step when creating the dataframe
    :param min_price, min_quant, days_released: filter parameters
    :return: filtered dataframe
    """
    conn = pg2.connect(dbname='steam_capstone', host='localhost')
    query = (
        """
        select t_days_released.item_name, t_days_released.date as timestamp, t_days_released.price as median_sell_price 
        from (select *, count(*) over (partition by item_name order by date asc) as days_released from sales) as t_days_released
        inner join (select item_name 
                    from (select *, count(*) over (partition by item_name order by date asc) as days_released from sales) as t 
                    where days_released > %(days_released)s 
                    group by item_name 
                    having min(price) > %(min_price)s and min(quantity) > %(min_quant)s) as t_keep_items
        on t_days_released.item_name = t_keep_items.item_name
        where days_released > %(days_released)s
        order by t_days_released.item_name, timestamp;
        """)
    df = sqlio.read_sql_query(query, conn, parse_dates=['timestamp'],
                              params={'min_price': min_price, 'min_quant': min_quant+1, 'days_released': days_released})
    print_top(anom_consensus(df), n=10)
示例#45
0
def checkCobra(con, frameID, loadPref):
    """ compare matched fibres """

    #read from database
    comm = """select * from cobra_status where mcs_frame_id =""" + str(
        int(frameID)) + """ order by cobra_id"""
    data = sqlio.read_sql_query(comm, con)

    #read from file  (which was used to populate database)
    fps, sz = db.readFPS(frameID, loadPref)

    #all the entries here are numbers
    for key in fps:
        for i in range(sz[0]):

            #turn None into NaN for comparison
            if (fps[key][i] == None):
                fps[key][i] = np.nan

            #equal_nan so that un matched cobras are compared correctly
            if (not np.isclose(
                    np.float(fps[key][i]), data[key][i], equal_nan=True)):
                print(key, i, fps[key][i], data[key][i])
def Greenblatt(날짜='2011-12-31', 기간구분='년간'):
    result = DataFrame()

    query = """
    SELECT A.날짜, A.기간구분, A.종목코드, C.종목명, B.종가, A.매출액, A.영업이익, A.당기순이익, A.자산총계, A.부채총계, A.자본총계, A.자본금, 
        A.부채비율, A.유보율, A.영업이익률, A.순이익률, A.ROA, A.ROE, A.EPS, A.BPS, A.DPS, A.PER, 1/A.PER as RPER, A.PBR, A.발행주식수, A.배당수익률, C.종목상태
    FROM 재무정보 A, (select 종목코드, 종가 from 일별주가 where 일자 = (select max(일자) from 일별주가 where 일자 <= '%s')) B, 종목코드 C
    WHERE 날짜='%s' and 기간구분='%s' and A.종목코드=B.종목코드 and A.종목코드=C.종목코드
    """ % (날짜, 날짜, 기간구분)

    conn = mysqlconn()
    df = pdsql.read_sql_query(query, con=conn)
    conn.close()

    df['rank1'] = df['ROA'].rank(ascending=False)
    df['rank2'] = df['RPER'].rank(ascending=False)
    df['ranksum'] = df['rank1'] + df['rank2']
    df['rank'] = df['ranksum'].rank(ascending=True)

    result = df.sort_values(['rank', 'rank1', 'rank2'],
                            ascending=[True, True, True])

    return result
def get_data(start,end,stock_name):
    postgre_db = psycopg2.connect(dbname = postgresql_db_config.NAME,
                                    user = postgresql_db_config.USER,
                                    password = postgresql_db_config.PASSWORD,
                                    host = postgresql_db_config.HOST,
                                    port = postgresql_db_config.PORT)
    sql =f'''
    select * from public.stock_data_full where stock_name = '{stock_name}' order by time_stamp asc
    '''
    dat = sqlio.read_sql_query(sql, postgre_db)
    dat = dat.dropna()
    dat = dat.reset_index(drop=True)
    print(f"Now we are processing stock : {dat['stock_name'][0]}")
    features = dat[['open','volume','volume_obv','trend_macd','trend_macd_signal','trend_macd_diff','momentum_rsi','volume_vpt']]
    dataset = features.values
    data_mean = dataset.mean(axis=0)
    data_std = dataset.std(axis=0)
    dataset = (dataset-data_mean)/data_std
    if end == None:
        end = dataset.shape[0]
    if start == None:
        start = dataset.shape[0]-140
    return dataset[start:end]
示例#48
0
    def get_held_shares(self, sid):
        try:
            sql = "select trade_type,price,SUM(volume) from public.trade where session_id = '" + str(
                sid) + "' GROUP BY trade_type,price"
            db = self.get_connection()
            data = sqlio.read_sql_query(sql, db)
        except:
            print("failed to query held_stocks for sid: " + str(sid))
            raise
        finally:
            db.close()

        #1. Sum BUY's
        #2. Sum SELL's
        #3. Compute and return diff
        held_stocks = 0
        for trade in data.itertuples():
            _, t_type, price, vol = trade
            if t_type == "BUY":
                held_stocks += vol
            elif t_type == "SELL":
                held_stocks -= vol
        return held_stocks
示例#49
0
    def checkTableExists(self, table_name, connection):

        if not isinstance(table_name, str):

            Exception('Input(table_name) : not string')

        # ---------------------------------------------------------------------

        if connection != psycopg2.extensions.connection:

            Exception('Input(connection) : not valid psycopg2 connection')

        # ---------------------------------------------------------------------

        table_name = table_name.upper()

        table_name = table_name.strip()

        # ---------------------------------------------------------------------

        sql = \
            '''
            SELECT * FROM INFORMATION_SCHEMA.TABLES
            WHERE UPPER(TABLE_NAME) = '{}'
            '''.format(table_name)

        data = sqlio.read_sql_query(sql, connection)

        # ---------------------------------------------------------------------

        if data.empty:

            return False

        else:

            return True
示例#50
0
def paper_rank():
    print("start page ranking .....")

    dg = digraph()

    conn = sqlite3.connect(PM.db)
    qry = 'select p_citer,p_cited from reference'
    p_id = sql.read_sql_query(qry, conn)
    print(str(p_id.shape) + '<---------p_id')

    citer = p_id.p_citer.unique()
    p_id = p_id.dropna(axis=0)
    cited = p_id.p_cited.unique()
    nd = set(citer).union(set(cited))
    nd = list(nd)

    print('node is created .....')
    # add nodes
    nodes = np.array(nd).astype(np.int64)
    dg.add_nodes(nodes)
    print("add nodes finished .... ")
    # add edges

    edges = [
        x for x in zip(p_id['p_citer'].astype(np.int64),
                       p_id['p_cited'].astype(np.int64))
    ]
    for ed in edges:
        dg.add_edge(ed)
    print('add edges finished ....')

    pg = pagerank(dg, damping_factor=0.85, max_iterations=100, min_delta=1e-06)
    pprk = pd.DataFrame(pd.Series(pg))
    pprk.columns = ['pp_ranking']
    pprk.index.name = 'paper_index'
    pprk.to_csv(PM.paper_rank, sep=u'|', header=1, index=True)
    print(pprk[:2])
示例#51
0
def rankVideos():
    cursor, connection = connect()
    try:
        # retrieve video stats
        sql = "SELECT v_id, likes, dislikes, views FROM video;"
        df = sqlio.read_sql_query(sql, connection)
        connection = None

        # compute average views
        total_views = 0
        for i in range(0, len(df)):
            total_views += df.iat[i, 3]
        avg_views = total_views / len(df)

        # video ranking = [(likes-dislikes) / views]*log(views/avg_views)
        video_rankings = {}
        for i in range(0, len(df)):
            v_id = df.iat[i, 0]
            likes = df.iat[i, 1]
            dislikes = df.iat[i, 2]
            views = df.iat[i, 3]

            if views == 0:
                rank = 0
            else:
                rank = (
                    (likes - dislikes) / views) * math.log(views / avg_views)

            video_rankings[v_id] = rank

        return video_rankings

    except Exception as e:
        print("Exception in rank videos: ", e)

    finally:
        closeConnection(connection, cursor)
示例#52
0
def ListDiseases(dbcon, fout):
    sql = """
SELECT
	d.dtype,
	dt.description dtype_description,
	d.name diseaseName,
	d.ncats_name ncatsDiseaseName,
	d.did diseaseId,
	d.description diseaseDescription,
	d.reference,
	d.drug_name,
	d.source,
	COUNT(d.protein_id) n_target_associations
FROM
	disease d
	LEFT OUTER JOIN disease_type dt ON dt.name = d.dtype
GROUP BY
	d.dtype,
	dt.description,
	d.name,
	d.ncats_name,
	d.did,
	d.description,
	d.reference,
	d.drug_name,
	d.source
"""
    df = read_sql_query(sql, dbcon)
    if fout: df.to_csv(fout, "\t", index=False)
    logging.info(f"rows: {df.shape[0]}")
    logging.info(f"diseaseIDs: {df.diseaseId.nunique()}")
    logging.info(f"diseaseNames: {df.diseaseName.nunique()}")
    logging.info(f"ncatsDiseaseNames: {df.ncatsDiseaseName.nunique()}")
    for dtype in df.dtype.unique().tolist():
        logging.info(
            f"[{dtype}] diseaseIDs: {df[df.dtype==dtype].diseaseId.nunique()}")
    return df
示例#53
0
def __cut_duplicates(dataframe, table_name):
    """
    Pd.DataFrame -> pd.DataFrame
    
    Tests the data retrieved against the existing sql table, and drops duplicate keys
    """
    print('<<<Removing duplicates from data retrieved ...>>>')
    # Connect to the database
    conn = pg2.connect(database='news_summary',
                       user="******",
                       host='localhost',
                       password=config.passwords['postgresql'])
    # Pull table data for checking
    select_call = '''SELECT * FROM {}'''.format(table_name)
    existing_df = sqlio.read_sql_query(select_call, conn)

    # Combine to look for dupes
    full_df = existing_df.append(dataframe)
    full_df['duplicated'] = full_df.duplicated(
        ['headline', 'newssource', 'weblink'], keep=False)

    # Cut back to retrieved data, split into dupes & dedupes
    n = len(full_df)
    original_df = full_df[n - len(dataframe):n]

    unique_df = original_df.loc[original_df['duplicated'] == False]
    unique_df.drop(columns=['duplicated'], inplace=True)

    duplicates_df = original_df.loc[original_df['duplicated'] == True]
    duplicates_df.drop(columns=['duplicated'], inplace=True)

    new_stories_num = len(dataframe) - len(unique_df)
    print('Of {} stories retrieved in this call, {} were unique'.format(
        len(dataframe), new_stories_num))
    print('<<<Removing duplicates from data retrieved ... COMPLETE>>>')
    return (unique_df, duplicates_df)
示例#54
0
def tcrd_fetchdata_iter():
    sql = '''
SELECT
	target.id tcrdTargetId,
	target.name tcrdTargetName,
	target.fam tcrdTargetFamily,
	target.tdl TDL,
	target.ttype tcrdTargetType,
	target.idg idgList,
	protein.id tcrdProteinId,
	protein.sym tcrdGeneSymbol,
	protein.family tcrdProteinFamily,
	protein.geneid ncbiGeneId,
	protein.uniprot uniprotId,
	protein.up_version uniprotVersion,
	protein.chr,
	protein.description tcrdProteinDescription,
	protein.dtoid dtoId,
	protein.dtoclass dtoClass,
	protein.stringid ensemblProteinId
FROM
	target
JOIN
	t2tc ON t2tc.target_id = target.id
JOIN
	protein ON protein.id = t2tc.protein_id
'''
    dbcon = tcrd_Connect()
    df = read_sql_query(sql, dbcon)
    total = df.shape[0]
    logging.info("Targets: {}".format(total))
    NMAX = 10
    for i in range(total):
        #if i>NMAX: break
        target = df.iloc[i].to_dict()
        yield target
示例#55
0
def generate_table(chart_dropdown, year_slider):
    max_rows = 12
    year_filter = " WHERE date_part('year', agg_date) = " + str(year_slider)

    # Open a cursor to perform database operations
    cur = conn.cursor()

    if chart_dropdown == "select_T":
        sql = "select agg_date AS date, state, county_name as county, " + \
              "avg_value as avg_temperature, sum_mort as mortality_count from combined_mo " + year_filter + \
              " order by state, county, date limit 100;"

    if chart_dropdown == "select_stacoun":
        sql = "select * from stacoun limit 100;"

    dataframe = sqlio.read_sql_query(sql, conn)
    return html.Table([
        html.Thead(html.Tr([html.Th(col) for col in dataframe.columns])),
        html.Tbody([
            html.Tr(
                [html.Td(dataframe.iloc[i][col]) for col in dataframe.columns])
            for i in range(min(len(dataframe), max_rows))
        ])
    ])
示例#56
0
    def construct_star_schema(self, facts):
        """Construct star schema DataFrame from configuration file for excel
        client.

        :param facts:  Facts table name
        :return: star schema DataFrame
        """
        fusion = self.load_one_table(facts)
        for fact_key, dimension_and_key in self.cube_config["facts"][
                "keys"].items():
            if self.cube_config["source"].upper() == "CSV":
                file = os.path.join(self.cube_path,
                                    dimension_and_key.split(".")[0] + ".csv")
                # with extension or not
                if not os.path.isfile(file):
                    file.replace(".csv", "")
                df = pd.read_csv(file, sep=self.sep)
            else:
                df = psql.read_sql_query(
                    f"SELECT * FROM {dimension_and_key.split('.')[0]}",
                    self.sqla_engine,
                )

            for dimension in self.cube_config["dimensions"]:
                if dimension_and_key.split(".")[0] == dimension["name"]:
                    df.rename(columns=dimension["columns"], inplace=True)

            fusion = fusion.merge(
                df,
                left_on=fact_key,
                right_on=dimension_and_key.split(".")[1],
                how="left",
                # remove suffixe from dimension and keep the same column name for facts
                suffixes=("", "_y"),
            )
        return fusion
示例#57
0
def getDataAtDB(select_mun, select_dp, select_crime):
    conn = db.connect(host='localhost',
                      database='crimes',
                      user='******',
                      password='******',
                      port='5432')

    if select_dp == "Todos":
        sql_command = """
            SELECT o.datas, SUM(o.ocorrencia)
            FROM crime_ocorrencia o, crime_localizacao l
            WHERE o.id = l.id and l.municipio = '{}' and l.tipo = '{}'
            GROUP BY o.datas ORDER BY o.datas;
         """.format(select_mun, select_crime)
    else:
        sql_command = """
            SELECT o.datas, o.ocorrencia 
            FROM crime_ocorrencia o, crime_localizacao l 
            WHERE o.id = l.id and l.municipio = '{}' 
            and l.delegacia = '{}' and l.tipo = '{}'
        """.format(select_mun, select_dp, select_crime)
    dat = sqlio.read_sql_query(sql_command, conn)

    return dat
示例#58
0
import psycopg2 as pg
import numpy
import pandas.io.sql as psql
from sklearn.feature_extraction.text import CountVectorizer


# get connected to the database
connection = pg.connect(database="housing_data_server_development", host="localhost")

dataframe = psql.read_sql_query("SELECT id, description FROM listings", connection)
count_vectorizer = CountVectorizer()
counts = count_vectorizer.fit_transform(dataframe['description'].values)
print(dataframe.columns.values)
示例#59
0
import pandas as pd
import numpy as np

from sqlalchemy import create_engine
engine = create_engine('postgresql://*****:*****@10.0.0.56:5433/sharesprod')


import pandas.io.sql as psql
sql = "SELECT isin, TO_TIMESTAMP(min(tradetime)/1000) min,TO_TIMESTAMP(max(tradetime)/1000) max, count(*) FROM trade group by 1;"
data = psql.read_sql_query(sql, engine)

print data