import numpy as np import pandas as pd import plotly.offline as py py.init_notebook_mode(connected=True) import plotly.graph_objs as go import plotly.tools as tls from kaggle.competitions import twosigmanews pd.options.mode.chained_assignment = None pd.options.display.max_columns = 999 # Get 2Sigma environment env = twosigmanews.make_env() # Get the data mt_df, nt_df = env.get_training_data() mt_df.head() print("We have {:,} market samples in the training dataset.".format(mt_df.shape[0])) mt_df.dtypes mt_df.isna().sum() mt_df.nunique() asset1Code = 'AAPL.O' asset1_df = mt_df[(mt_df['assetCode'] == asset1Code) & (mt_df['time'] > '2015-01-01') & (mt_df['time'] < '2017-01-01')] # Create a trace trace1 = go.Scatter( x = asset1_df['time'].dt.strftime(date_format='%Y-%m-%d').values, y = asset1_df['close'].values )
import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') import lightgbm as lgb from itertools import chain %matplotlib inline # Input data files are available in the "../input/" directory. # For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory import os print(os.listdir("../input")) env = twosigmanews.make_env() # load env df_market = env.get_training_data()[0] # load only market data df_news = env.get_training_data()[1] # load only news data # Any results you write to the current directory are saved as output. print(df_market.isnull().sum()) # Fill empty market fields def fillMarketEmpty(df_market): fill_value=-9999.99 df_market['returnsClosePrevMktres1'] = df_market['returnsClosePrevMktres1'].fillna(fill_value) df_market['returnsOpenPrevMktres1'] = df_market['returnsOpenPrevMktres1'].fillna(fill_value) df_market['returnsClosePrevMktres10'] = df_market['returnsOpenPrevMktres10'].fillna(fill_value) df_market['returnsOpenPrevMktres10'] = df_market['returnsOpenPrevMktres10'].fillna(fill_value) return df_market