示例#1
0
    def similarity(email):
        members = pd.read_sql_table('members', engine.connect())
        engine.connect().close()
        isZeroBalMem = members[(members['balance'] == 0) & (members['email'] != email)].index
        members = members.drop(isZeroBalMem)

        preprocessing = RecommendStockPreprocessing()
        refined_members = preprocessing.hook_process(members)
        
        isExitedMem = refined_members[refined_members['exited']==1].index
        refined_members = refined_members.drop(isExitedMem)
        isLessBalMem = refined_members[(refined_members['balance'] <= 2) & (refined_members['email'] != email)].index
        refined_members = refined_members.drop(isLessBalMem)

        refined_members.set_index(refined_members['email'], inplace=True)
        refined_members = refined_members.drop(['email'], axis=1)

        base_index = refined_members.index
        base_columns = refined_members.columns

        row_dist = pd.DataFrame(squareform(pdist(refined_members, metric='euclidean')), columns=base_index, index=base_index)

        this_mem = row_dist[row_dist.index == email]
        this_mem = this_mem.reset_index(drop=True)

        return this_mem.to_dict(orient='records')[-1]
示例#2
0
    def dataframe(self):
        main_df = self.df

        df = pd.read_sql_table('korea_finance', engine.connect())
        df = df.loc[(df['ticker'] == self.ticker) & (df['date'] > '2019-12-31')
                    & (df['date'] < '2020-07-01')]
        df['date'] = pd.to_datetime(df['date'])
        df = df.drop(['ticker', 'id'], axis=1)
        df = df[df['date'].notnull() == True].set_index('date')
        # print(df)

        tic = [t for t in self.tickers if t != self.ticker]
        df2 = pd.read_sql_table('korea_finance', engine.connect())
        df2 = df2.loc[(df2['ticker'] == tic[0]) & (df2['date'] > '2019-12-31')
                      & (df2['date'] < '2020-07-01')]
        df2 = df2.rename(
            columns={
                'open': tic[0] + '_open',
                'close': tic[0] + '_close',
                'high': tic[0] + '_high',
                'low': tic[0] + '_low'
            })
        df2 = df2.drop(['id', 'ticker', 'volume'], axis=1)
        df2 = df2[df2['date'].notnull() == True].set_index('date')

        covid_json = json.dumps(KoreaCovids.get()[0],
                                default=lambda x: x.__dict__)
        df3 = pd.read_json(covid_json)
        df3 = df3.loc[(df3['date'] > '2019-12-31')
                      & (df3['date'] < '2020-07-01')]
        df3 = df3.drop([
            'id', 'total_cases', 'total_deaths', 'seoul_cases', 'seoul_deaths'
        ],
                       axis=1)
        df3 = df3[df3['date'].notnull() == True].set_index('date')

        main_df = df.join(df2, how='outer')
        main_df = main_df.join(df3, how='outer')
        main_df[['ko_cases', 'ko_deaths', 'se_cases', 'se_deaths']] = main_df[[
            'ko_cases', 'ko_deaths', 'se_cases', 'se_deaths'
        ]].fillna(value=0)

        main_df = main_df.astype(float).interpolate()
        main_df = main_df.fillna(value=0)
        print(main_df)
        print(main_df.columns)

        output_file = self.ticker + '_dataset.csv'
        result = os.path.join(self.path, output_file)
        main_df.to_csv(result)
        return main_df
示例#3
0
    def get_data(self):
        members = pd.read_sql_table('members', engine.connect())
        kospis = pd.read_sql_table('korea_finance', engine.connect())
        nasdaqs = pd.read_sql_table('yahoo_finance', engine.connect())
        
        # kospi의 금액을 모두 20201030 현재 환율 1129.16으로 나눔
        kospis['open'] = [round(float(k)/1129.16, 4) for k in kospis['open']]
        kospis['close'] = [round(float(k)/1129.16, 4) for k in kospis['close']]
        kospis['high'] = [round(float(k)/1129.16, 4) for k in kospis['high']]
        kospis['low'] = [round(float(k)/1129.16, 4) for k in kospis['low']]

        self.members = members
        self.kospis = kospis
        self.nasdaqs = nasdaqs
示例#4
0
    def similarity(email):
        members = pd.read_sql_table('members', engine.connect())
        preprocessing = RecommendStockPreprocessing()
        refined_members = preprocessing.hook_process(members)
        
        isExitedMem = refined_members[refined_members['exited']==1].index
        refined_members = refined_members.drop(isExitedMem)
        isZeroBalMem = refined_members[refined_members['balance']==0].index
        refined_members = refined_members.drop(isZeroBalMem)
        print(len(refined_members))

        refined_members.set_index(refined_members['email'], inplace=True)
        refined_members = refined_members.drop(['email'], axis=1)
        print(f'REFINED MEMBERS: \n{refined_members}')

        base_index = refined_members.index
        base_columns = refined_members.columns

        row_dist = pd.DataFrame(squareform(
            pdist(refined_members, metric='euclidean')), columns=base_index, index=base_index)
        # print(f'ROWDIST: \n {row_dist}')

        this_mem = row_dist[row_dist.index == email]
        this_mem = this_mem.reset_index(drop=True)
        # print(this_mem.to_dict(orient='records'))

        return this_mem.to_dict(orient='records')[-1]
示例#5
0
    def get_data(self):
        data = pd.read_sql_table('members', engine.connect())

        # 전처리
        modeling_data_process = MemberModelingDataPreprocessing()
        refined_data = modeling_data_process.hook_process(data)
        data = refined_data.to_numpy()

        table_col = data.shape[1]
        y_col = 1
        x_col = table_col - y_col
        x = data[:, 0:x_col]
        y = data[:, x_col:]

        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.4)
        x_test, x_validation, y_test, y_validation = train_test_split(
            x_test, y_test, test_size=0.4)

        self.x_train = x_train
        self.x_validation = x_validation
        self.x_test = x_test
        self.y_train = y_train
        self.y_validation = y_validation
        self.y_test = y_test
示例#6
0
    def substitute(self):
        members = pd.read_sql_table('members', engine.connect())
        preprocessing = RecommendStockPreprocessing()
        refined_members = preprocessing.hook_process(members)

        isExitedMem = refined_members[refined_members['exited'] == 1].index
        refined_members = refined_members.drop(isExitedMem)
        print(f'REFINED MEMBERS: \n{refined_members}')

        refined_members.set_index(refined_members['email'], inplace=True)
        refined_members = refined_members.drop(['email'], axis=1)
        print(f'REFINED MEMBERS AFTER EMAIL INDEXING: \n{refined_members}')

        base_columns = refined_members.columns

        for email in refined_members.index:

            this_member = pd.DataFrame(refined_members.loc[email,
                                                           base_columns]).T
            else_members = refined_members.loc[:, base_columns].drop(email,
                                                                     axis=0)

            for mem in else_members.index:

                self._feed_dict = {
                    'this_member': this_member.loc[email, base_columns],
                    'target_member': else_members.loc[mem, base_columns]
                }
                self.create_similarity_model()
示例#7
0
    def similarMembersTradings(sim_members, email):
        tradings = pd.read_sql_table('tradings', engine.connect())
        this_members_tradings = list(tradings[tradings['email'] == email]['stock_ticker'])

        match_tradings = pd.DataFrame(columns=('id', 'email', 'stock_type', 'stock_ticker', 'stock_qty', 'price', 'trading_date'))
        for mem, prob in sim_members:
            match_tradings = pd.concat([match_tradings, tradings[tradings['email'] == mem]])
        stocks_size = list(match_tradings.groupby('stock_ticker').size().sort_values(ascending=False).index)
        stocks_list = [{'stock_ticker':s, 
        'stock_type': str(match_tradings[match_tradings['stock_ticker'] == s]['stock_type'].unique()[0]),
        'email': email} for s in stocks_size if s not in this_members_tradings]
        return stocks_list
示例#8
0
    def hook_process(self, member_data):
        this = self.filereader
        
        members = member_data
        this.train = members

        if isinstance(this.train, MemberDto):
            m = this.train
            _data = {'email': m.email, 'password': m.password, 'name': m.name, 'geography': m.geography, 'gender': m.gender, 'age': int(m.age), 'profile': m.profile, 
            'tenure': int(m.tenure), 'stock_qty': int(m.stock_qty), 'balance': float(m.balance), 'has_credit': int(m.has_credit), 'credit_score': int(m.credit_score), 'is_active_member': int(m.is_active_member),
                'estimated_salary': float(m.estimated_salary), 'role': m.role, 'probability_churn': float(m.probability_churn), 'exited': int(m.exited)}
            this.train = pd.DataFrame([_data])
            self.isNewMember = True 
            members_data = pd.read_sql_table('members', engine.connect())
            this.train = pd.concat([members_data, this.train], ignore_index=True)

        # isAdmin = this.train['email'] == '*****@*****.**'
        # this.train = this.train[~isAdmin]
        
        # 컬럼 삭제
        this = self.drop_feature(this, 'email')
        this = self.drop_feature(this, 'password')
        this = self.drop_feature(this, 'name')
        this = self.drop_feature(this, 'profile')
        this = self.drop_feature(this, 'role')
        this = self.drop_feature(this, 'probability_churn')
        
        # 데이터 정제
        this = self.geography_nominal(this)
        this = self.gender_nominal(this)
        this = self.age_ordinal(this)
        this = self.drop_feature(this, 'age')
        this = self.creditScore_ordinal(this)
        this = self.balance_ordinal(this)
        this = self.estimatedSalary_ordinal(this)

        # 고객의 서비스 이탈과 각 칼럼간의 상관계수
        # self.correlation_member_secession(this.train)

        # label 컬럼 재배치
        this = self.columns_relocation(this)

        if self.isNewMember:
            this.train = this.train.tail(1)
            this.train.index = [0]
            print(f'EVERYTHING IS DONE: \n{this.train}')

        return this.train
    def create_dataframe(self):
        main_df = self.df

        #1. Bring history of a chosen ticker
        df = pd.read_sql_table('yahoo_finance', engine.connect())
        df = df.loc[(df['ticker'] == self.ticker) & (df['date'] > '2009-12-31')
                    & (df['date'] < '2020-07-01')]
        df = df.drop(['ticker', 'id'], axis=1)

        #1-1. Adding three more features: moving average, increase in volume(%), increase in adj_close(%)
        avg = []
        df['moving_avg'] = df['adjclose'].rolling(window=50,
                                                  min_periods=0).mean()

        i = 1
        vol_rate = [0]
        adjclose_rate = [0]

        while i < len(df):
            vol_rate.append((df.iloc[i]['volume'] - df.iloc[i - 1]['volume']) /
                            df.iloc[i - 1]['volume'])
            adjclose_rate.append(
                (df.iloc[i]['adjclose'] - df.iloc[i - 1]['adjclose']) /
                df.iloc[i - 1]['adjclose'])
            i += 1

        df['increase_rate_vol'] = vol_rate
        df['increase_rate_adjclose'] = adjclose_rate

        #1-2. Bring history of the other ticker
        df4 = pd.read_sql_table('yahoo_finance', engine.connect())
        tic = [t for t in self.tickers if t != self.ticker]
        df4 = df4.loc[(df4['ticker'] == tic[0]) & (df4['date'] > '2009-12-31')
                      & (df4['date'] < '2020-07-01')]
        df4 = df4.rename(
            columns={
                'open': tic[0] + '_open',
                'high': tic[0] + '_high',
                'low': tic[0] + '_low',
                'close': tic[0] + '_close',
                'adjclose': tic[0] + '_adjclose'
            })
        df4 = df4.drop(['ticker', 'id', 'volume'], axis=1)

        temp_df = pd.DataFrame()
        df5 = pd.read_sql_table('korea_finance', engine.connect())
        k_tickers = {
            'lgchem': '051910',
            'lginnotek': '011070'
        }  #LG Chem & LG Innotek
        for k_tic, v in k_tickers.items():
            df5 = pd.read_sql_table('korea_finance', engine.connect())

            df5['date'] = pd.to_datetime(df5['date'])
            df5 = df5.loc[(df5['ticker'] == v) & (df5['date'] > '2009-12-31') &
                          (df5['date'] < '2020-07-01')]
            df5 = df5.rename(
                columns={
                    'open': k_tic + '_open',
                    'close': k_tic + '_close',
                    'high': k_tic + '_high',
                    'low': k_tic + '_low'
                })
            df5 = df5.drop(['id', 'ticker', 'volume'], axis=1)
            df5 = df5[df5['date'].notnull() == True].set_index('date')

            temp_df = temp_df.join(df5, how='outer')

        temp_df['date'] = temp_df.index

        #2. Bring news sentiment
        if (self.ticker == 'AAPL'):
            apple_json = json.dumps(AppleSentiment.get()[0],
                                    default=lambda x: x.__dict__)
            df2 = pd.read_json(apple_json)
            df2 = df2.drop(['id', 'pos', 'neg', 'neu'], axis=1)

        else:
            tesla_json = json.dumps(TeslaSentiment.get()[0],
                                    default=lambda x: x.__dict__)
            df2 = pd.read_json(tesla_json)
            df2 = df2.drop(['id', 'pos', 'neg', 'neu'], axis=1)

        #3. Bring US_covid status per day
        covid_json = json.dumps(USCovids.get()[0],
                                default=lambda x: x.__dict__)
        df3 = pd.read_json(covid_json)
        df3 = df3.loc[(df3['date'] > '2019-12-31')
                      & (df3['date'] < '2020-07-01')]
        df3 = df3.drop(
            ['id', 'total_cases', 'total_deaths', 'ca_cases', 'ca_deaths'],
            axis=1)

        #4. Combine all features in one csv file
        df = df[df['date'].notnull() == True].set_index('date')
        df4 = df4[df4['date'].notnull() == True].set_index('date')
        temp_df = temp_df[temp_df['date'].notnull() == True].set_index('date')

        df2 = df2[df2['date'].notnull() == True].set_index('date')
        df3 = df3[df3['date'].notnull() == True].set_index('date')

        if self.ticker == 'AAPL':
            main_df = df.join(df4, how='outer')
        else:
            main_df = df.join(df4, how='inner')

        main_df = main_df.join(temp_df, how='outer')
        main_df = main_df.join(df2, how='outer')
        main_df = main_df.join(df3, how='outer')
        main_df[[
            'new_us_cases', 'new_us_deaths', 'new_ca_cases', 'new_ca_deaths'
        ]] = main_df[[
            'new_us_cases', 'new_us_deaths', 'new_ca_cases', 'new_ca_deaths'
        ]].fillna(value=0)

        # Fill Nan values in stock proices with interpolated values
        main_df = main_df.astype(float).interpolate()
        main_df = main_df.fillna(value=0)

        #6. Save to CSV file
        output_file = self.ticker + '_dataset.csv'
        result = os.path.join(self.path, output_file)
        main_df.to_csv(result)

        print(main_df)
        return main_df
        '''
示例#10
0
    def predict(self):
        K.clear_session()

        checkpoint_path = os.path.join(self.path, 'checkpoint.h5')

        with graph.as_default():
            model = keras.models.load_model(checkpoint_path)
            model.summary()

            print('====================5===========================')

            ######### PREPARE FOR PREDICTION FEATURES ###############
            x = []
            x.append(self.open)
            x.append(self.high)
            x.append(self.low)
            x.append((self.high + self.low) / 2)  #'moving_avg'
            x.append(0.05)  #'increase_rate_vol'
            x.append(0.05)  #'increase_rate_adjclose'

            #the other NASDAQ stock price from the MarisDB
            apple_df = pd.read_sql_table('yahoo_finance', engine.connect())
            op_tic = ['TSLA', 'AAPL']
            tic = [t for t in op_tic if t != self.ticker]
            apple_df = apple_df.loc[(apple_df['ticker'] == tic[0])].iloc[-1]

            x.append(apple_df['open'])
            x.append(apple_df['high'])
            x.append(apple_df['low'])
            x.append(apple_df['close'])
            x.append(apple_df['adjclose'])

            #the other KOSPI stocks pricees from the MarisDB
            KOSPI = {'lgchem': '051910', 'lginnotek': '011070'}

            for k_tic, v in KOSPI.items():
                kospi_df = pd.read_sql_table('korea_finance', engine.connect())
                kospi_df = kospi_df.loc[(kospi_df['ticker'] == v)].iloc[-1]
                x.append(float(kospi_df['open']))
                x.append(float(kospi_df['close']))
                x.append(float(kospi_df['high']))
                x.append(float(kospi_df['low']))

            x.append(self.compound)

            #Append covid cases
            covid_json = json.dumps(USCovids.get()[0],
                                    default=lambda x: x.__dict__)
            covid_df = pd.read_json(covid_json)
            covid_df = covid_df.iloc[-1]

            x.append(covid_df['new_us_cases'])
            x.append(covid_df['new_us_deaths'])
            x.append(covid_df['new_ca_cases'])
            x.append(covid_df['new_ca_deaths'])

            x = [x]

            #normalize data
            scaler = MinMaxScaler(feature_range=(0, 1))
            scaled = scaler.fit_transform(x)
            X = scaled.reshape((scaled.shape[0], 1, scaled.shape[1]))

            #predict the price and back to actual price
            price = model.predict(X)
            scaled[0, 0] = price[0, 0]
            unscaled = scaler.inverse_transform(scaled)

        return round(unscaled[0, 0], 2)