def demo(code='N225', name='日経平均株価', start='2014-01-01', days=240, csvfile=os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'test', 'stock_N225.csv'), update=False): # Handling ti object example. io = FileIO() stock_d = io.read_from_csv(code, csvfile) ti = TechnicalIndicators(stock_d) ti.calc_ret_index() print(ti.stock['ret_index'].tail(10)) io.save_data(io.merge_df(stock_d, ti.stock), code, 'demo_') # Run analysis code example. analysis = Analysis(code=code, name=name, start=start, days=days, csvfile=csvfile, update=True) return analysis.run()
def __init__(self, path, depth): self.clf = tree.DecisionTreeClassifier(max_depth=3) self.file_io = FileIO() #self.pca = PCAProcess() #self.chart = DrawChart() self.test = Test() self.file_path = path
def __init__( self, id_path, cust_payment_path, cust_attr_path, product_attr_path, cust_path, cancel_path, contact_path, cti_path, register_type_path, status_path, stay_time_path, pv_sum_path, session_path, char_type): self.file_io = FileIO() self.encode = CategoryEncode() self.count_rec = CountRecord() self.extract_col = ExtractColumns() # ファイルオープン self.id = self.file_io.open_file_as_pandas(id_path,char_type) self.cust_payment = self.file_io.open_file_as_pandas(cust_payment_path, char_type) self.cust_attr = self.file_io.open_file_as_pandas(cust_attr_path, char_type) self.product_attr = self.file_io.open_file_as_pandas(product_attr_path, char_type) self.cust = self.file_io.open_file_as_pandas(cust_path, char_type) self.cancel = self.file_io.open_file_as_pandas(cancel_path, char_type) self.contact = self.file_io.open_file_as_pandas(contact_path, char_type) self.cti = self.file_io.open_file_as_pandas(cti_path, char_type) self.register_type = self.file_io.open_file_as_pandas(register_type_path, char_type) self.status = self.file_io.open_file_as_pandas(status_path, char_type) self.stay_time = self.file_io.open_file_as_pandas(stay_time_path, char_type) self.pv_sum = self.file_io.open_file_as_pandas(pv_sum_path, char_type) self.session = self.file_io.open_file_as_pandas(session_path, char_type)
def __init__(self, con_path, char_type): # 初期化 self.file_io = FileIO() self.extract_col = ExtractColumns() self.con_path = con_path # ファイルオープン self.con = self.file_io.open_file_as_pandas(con_path, char_type)
def test_read_csv(): io = FileIO() filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'stock_N225.csv') df = io.read_from_csv("N225", filename) result = round(df.ix['2015-03-20', 'Adj Close'], 2) expected = 19560.22 eq_(expected, result)
def main(): args = sys.argv[1:] # Проверка количества аргументов if len(args) < 5 or (len(args) & 1) != 1: raise ValueError(f'Invalid number of arguments: {len(args)}') f_in = args[0] f_out = args[1] prof = args[2].upper() sections = [] for i in range(3, len(args), 2): try: start = float(args[i]) end = float(args[i + 1]) except ValueError: raise ValueError(f'Incorrect input: [{start}, {end}]') # Проверка корректности интервала if start > end or start < 0: raise ValueError(f'Incorrect section: [{start}, {end}]') # Проверка дублирования интервала sec = Section(start, end) if not sec in sections: sections.append(sec) # Для .zip файлов if f_in.endswith('.zip'): with zipfile.ZipFile(f_in, 'r') as zp: f_in = f_in[:-4] + '.csv' if f_in in zp.namelist(): zp.extract(f_in) else: raise ValueError(f'File {f_in} was not found in archive') if prof == 'CPU': cpu = CPU(f_in, sections) cpu.filter() top_processes = cpu.get_top_processes() top_modules = cpu.get_top_modules() # Вывод в файл log_cpu(f_out, top_processes, top_modules) elif prof == 'FILE_IO': file_io = FileIO(f_in, sections) file_io.filter() top_durations = file_io.get_top_durations() top_sizes = file_io.get_top_sizes() # Вывод в файл log_file_io(f_out, top_durations, top_sizes)
def __init__(self, in_path, in_char, payment_path, out_char, cust_attr_path, product_attr_path): self.count_rec = CountRecord() self.file_io = FileIO() self.in_path = in_path self.in_char = in_char self.payment_path = payment_path self.out_char = out_char self.cust_attr_path = cust_attr_path self.product_attr_path = product_attr_path
def main(file_name): file_io = FileIO(file_name) corpus = file_io.read() tc = TextCleanser() corpus_words = tc.clean_text(corpus) # Get a list of every words in corpus text_gen = TextGenerator(corpus_words) ui = UserInterface(text_gen) ui.runProgram() # Starts the program
def __init__(self): self.lr = LinearRegression() self.file_io = FileIO() #self.pca = PCAProcess() #self.chart = DrawChart() self.test = Test() self.individual = IndividualTest() self.sc = StandardScaler() self.ms = MinMaxScaler() self.drop_na = DropNaN()
def __init__(self, in_path, in_char, payment_path, out_char, cust_attr_path, target_attr_path, average_attr_path): self.count_rec = CountRecord() self.file_io = FileIO() self.in_path = in_path self.in_char = in_char self.payment_path = payment_path self.out_char = out_char self.cust_attr_path = cust_attr_path self.target_attr_path = target_attr_path self.average_attr_path = average_attr_path
def __init__(self, root_cat, depth, log_file, output_dir, root_dir): # init logger self._logger = mylogger.get_logger( DistantExtractor.__name__, log_file, mylogger.DEBUG ) io_logger = mylogger.get_logger( FileIO.__name__, log_file, mylogger.DEBUG ) wiki_logger = mylogger.get_logger( WikipediaExtractor.__name__, log_file, mylogger.DEBUG ) morph_logger = mylogger.get_logger( MorphemeTagger.__name__, log_file, mylogger.DEBUG ) # init instance self._file_io = FileIO(output_dir, io_logger) self._wiki_extractor = WikipediaExtractor(wiki_logger, self._file_io) self._morpheme_tagger = MorphemeTagger(morph_logger, root_dir) # init args self._root_cat = root_cat self._limit_depth = depth #TODO 後々は複数クラスのシードを持てるようにする # name をkey, seeds(list)をvalueなdictにする # ラベリングのところはそうなってる self._seed_name = 'Car' self._seeds = list() self._categories = [self._root_cat] # init name self._seed_dir = 'seeds' self._unlabeled_dir = 'unlabeled_corpora' self._cleaned_dir = 'cleaned_corpora' self._mecab_dir = 'mecab_corpora' self._labeled_dir = 'labeled_corpora' self._train_dir = 'train_corpora' self._output = 'output' self._temp_dir = 'temp' self._templatefile = '%s/templates/template' % root_dir self._trainfile = '%s/train.txt' % output_dir self._decodefile = '%s/decode.txt' % output_dir self._modelfile = '%s/model' % output_dir self._all_labeledfile = '%s/all_labeled.txt' % output_dir
def test_save_data(): stock = testdata() io = FileIO() filename = 'test_N225.csv' io.save_data(stock, "N225", "test_") expected = True eq_(expected, os.path.exists(filename)) if os.path.exists(filename): os.remove(filename)
def __init__(self): #self.lr = LinearRegression() self.file_io = FileIO() #self.pca = PCAProcess() #self.chart = DrawChart() self.test = Test() self.individual = IndividualTest() self.sc = StandardScaler() self.ms = MinMaxScaler() self.drop_na = DropNaN() self.droplist = [] with open('droplist.txt') as f: self.droplist = [s.strip() for s in f.readlines()]
import matplotlib.pyplot as plt #%matplotlib inline from matplotlib.colors import ListedColormap from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_moons, make_circles, make_classification from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis h = .02 # step size in the mesh file_io = FileIO() inifile = configparser.ConfigParser() inifile.read('./config.ini', 'UTF-8') names = [ "Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree", "Random Forest", "AdaBoost", "Naive Bayes", "Linear Discriminant Analysis", "Quadratic Discriminant Analysis" ] classifiers = [ KNeighborsClassifier(3), SVC(kernel="linear", C=0.025), SVC(gamma=2, C=1), DecisionTreeClassifier(max_depth=5), RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), AdaBoostClassifier(),
def run(self): io = FileIO() will_update = self.update if self.csvfile: stock_tse = io.read_from_csv(self.code, self.csvfile) msg = "".join([ "Read data from csv: ", self.code, " Records: ", str(len(stock_tse)) ]) print(msg) if self.update and len(stock_tse) > 0: index = pd.date_range(start=stock_tse.index[-1], periods=2, freq='B') ts = pd.Series(None, index=index) next_day = ts.index[1] t = next_day.strftime('%Y-%m-%d') newdata = io.read_data(self.code, start=t, end=self.end) msg = "".join([ "Read data from web: ", self.code, " New records: ", str(len(newdata)) ]) print(msg) if len(newdata) < 1: will_update = False else: print(newdata.ix[-1, :]) stock_tse = stock_tse.combine_first(newdata) io.save_data(stock_tse, self.code, 'stock_') else: stock_tse = io.read_data(self.code, start=self.start, end=self.end) msg = "".join([ "Read data from web: ", self.code, " Records: ", str(len(stock_tse)) ]) print(msg) if stock_tse.empty: msg = "".join(["Data empty: ", self.code]) print(msg) return None if not self.csvfile: io.save_data(stock_tse, self.code, 'stock_') try: stock_d = stock_tse.asfreq('B').dropna()[self.days:] ti = TechnicalIndicators(stock_d) ti.calc_sma() ti.calc_sma(timeperiod=5) ti.calc_sma(timeperiod=25) ti.calc_sma(timeperiod=50) ti.calc_sma(timeperiod=75) ewma = ti.calc_ewma(span=5) ewma = ti.calc_ewma(span=25) ewma = ti.calc_ewma(span=50) ewma = ti.calc_ewma(span=75) bbands = ti.calc_bbands() sar = ti.calc_sar() draw = Draw(self.code, self.fullname) ret = ti.calc_ret_index() ti.calc_vol(ret['ret_index']) rsi = ti.calc_rsi(timeperiod=9) rsi = ti.calc_rsi(timeperiod=14) mfi = ti.calc_mfi() roc = ti.calc_roc(timeperiod=10) roc = ti.calc_roc(timeperiod=25) roc = ti.calc_roc(timeperiod=50) roc = ti.calc_roc(timeperiod=75) roc = ti.calc_roc(timeperiod=150) ti.calc_cci() ultosc = ti.calc_ultosc() stoch = ti.calc_stoch() ti.calc_stochf() ti.calc_macd() willr = ti.calc_willr() ti.calc_momentum(timeperiod=10) ti.calc_momentum(timeperiod=25) tr = ti.calc_tr() ti.calc_atr() ti.calc_natr() vr = ti.calc_volume_rate() ret_index = ti.stock['ret_index'] clf = Classifier(self.clffile) train_X, train_y = clf.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) print(msg) clf_result = clf.classify(ret_index)[0] msg = "".join(["Classified: ", str(clf_result)]) print(msg) ti.stock.ix[-1, 'classified'] = clf_result reg = Regression(self.regfile, alpha=1, regression_type="Ridge") train_X, train_y = reg.train(ret_index, will_update) msg = "".join(["Train Records: ", str(len(train_y))]) base = ti.stock_raw['Adj Close'][0] reg_result = int(reg.predict(ret_index, base)[0]) msg = "".join(["Predicted: ", str(reg_result)]) print(msg) ti.stock.ix[-1, 'predicted'] = reg_result if len(self.reference) > 0: ti.calc_rolling_corr(self.reference) ref = ti.stock['rolling_corr'] else: ref = [] io.save_data(io.merge_df(stock_d, ti.stock), self.code, 'ti_') draw.plot(stock_d, ewma, bbands, sar, rsi, roc, mfi, ultosc, willr, stoch, tr, vr, clf_result, reg_result, ref, axis=self.axis, complexity=self.complexity) return ti except (ValueError, KeyError): msg = "".join(["Error occured in ", self.code]) print(msg) return None
def __init__(self): self.file_io = FileIO() self.visual = Visualization() self.logger = set_logger(LOGGER_PATH, self.LOGGER_FILE, self.LOGGER_LEVEL, __name__) ktf.set_session(get_session())
def run(self): io = FileIO() will_update = self.update self.logger.info("".join(["Start Analysis: ", self.code])) if self.csvfile: stock_tse = io.read_from_csv(self.code, self.csvfile) self.logger.info("".join([ "Read data from csv: ", self.code, " Records: ", str(len(stock_tse)) ])) if self.update and len(stock_tse) > 0: index = pd.date_range(start=stock_tse.index[-1], periods=2, freq='B') ts = pd.Series(None, index=index) next_day = ts.index[1] t = next_day.strftime('%Y-%m-%d') newdata = io.read_data(self.code, start=t, end=self.end) self.logger.info("".join([ "Read data from web: ", self.code, " New records: ", str(len(newdata)) ])) if len(newdata) < 1: will_update = False else: print(newdata.ix[-1, :]) stock_tse = stock_tse.combine_first(newdata) io.save_data(stock_tse, self.code, 'stock_') else: stock_tse = io.read_data(self.code, start=self.start, end=self.end) self.logger.info("".join([ "Read data from web: ", self.code, " Records: ", str(len(stock_tse)) ])) if stock_tse.empty: self.logger.warn("".join(["Data empty: ", self.code])) return None if not self.csvfile: io.save_data(stock_tse, self.code, 'stock_') try: stock_d = stock_tse.asfreq('B').dropna()[self.minus_days:] ti = TechnicalIndicators(stock_d) ti.calc_sma() ti.calc_sma(timeperiod=5) ti.calc_sma(timeperiod=25) ti.calc_sma(timeperiod=50) ti.calc_sma(timeperiod=75) ti.calc_sma(timeperiod=200) ewma = ti.calc_ewma(span=5) ewma = ti.calc_ewma(span=25) ewma = ti.calc_ewma(span=50) ewma = ti.calc_ewma(span=75) ewma = ti.calc_ewma(span=200) bbands = ti.calc_bbands() sar = ti.calc_sar() draw = Draw(self.code, self.fullname) ret = ti.calc_ret_index() ti.calc_vol(ret['ret_index']) rsi = ti.calc_rsi(timeperiod=9) rsi = ti.calc_rsi(timeperiod=14) mfi = ti.calc_mfi() roc = ti.calc_roc(timeperiod=10) roc = ti.calc_roc(timeperiod=25) roc = ti.calc_roc(timeperiod=50) roc = ti.calc_roc(timeperiod=75) roc = ti.calc_roc(timeperiod=150) ti.calc_cci() ultosc = ti.calc_ultosc() stoch = ti.calc_stoch() ti.calc_stochf() ti.calc_macd() willr = ti.calc_willr() ti.calc_momentum(timeperiod=10) ti.calc_momentum(timeperiod=25) tr = ti.calc_tr() ti.calc_atr() ti.calc_natr() vr = ti.calc_volume_rate() ret_index = ti.stock['ret_index'] clf = Classifier(self.clffile) train_X, train_y = clf.train(ret_index, will_update) self.logger.info("".join( ["Classifier Train Records: ", str(len(train_y))])) clf_result = clf.classify(ret_index)[0] self.logger.info("".join(["Classified: ", str(clf_result)])) ti.stock.ix[-1, 'classified'] = clf_result reg = Regression(self.regfile, alpha=1, regression_type="Ridge") train_X, train_y = reg.train(ret_index, will_update) self.logger.info("".join( ["Regression Train Records: ", str(len(train_y))])) base = ti.stock_raw['Adj Close'][0] reg_result = int(reg.predict(ret_index, base)[0]) self.logger.info("".join(["Predicted: ", str(reg_result)])) ti.stock.ix[-1, 'predicted'] = reg_result if will_update is True: io.save_data(io.merge_df(stock_d, ti.stock), self.code, 'ti_') if self.minus_days < -300: _prefix = 'long' elif self.minus_days >= -60: _prefix = 'short' else: _prefix = 'chart' draw.plot(stock_d, _prefix, ewma, bbands, sar, rsi, roc, mfi, ultosc, willr, stoch, tr, vr, clf_result, reg_result, axis=self.axis, complexity=self.complexity) self.logger.info("".join(["Finish Analysis: ", self.code])) return ti except (ValueError, KeyError) as e: self.logger.error("".join( ["Error occured in ", self.code, " at analysis.py"])) self.logger.error("".join(['ErrorType: ', str(type(e))])) self.logger.error("".join(['ErrorMessage: ', str(e)])) return None
def test_FileIO_methods(self): f_in = 'file_io_test.csv' sections = [Section(11, 15), Section(15, 17)] with open(f_in, 'w') as f: f.write( f'Process,Event Type,Event SubType,Thread,Start (s),End (s),Duration (µs),IRP,File Object,Size (B),File Path,Offset,Flags,Result,Count\n' ) f.write( f'p1,Create,,,"11,1","11,2","100000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p1,Create,,,"11,1","12,1","1000000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p1,Read,,,"13,1","14,2","1100000",,,1,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p1,Read,,,"13,1","14,2","1100000",,,2,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p1,Read,,,"15,1","16,2","1100000",,,3,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p1,Read,,,"15,1","16,2","1100000",,,4,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p1,Write,,,"12,1","12,4","300000",,,5,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p1,Write,,,"15,1","15,4","300000",,,6,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p2,Create,,,"11,1","11,2","100000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p2,Create,,,"11,1","12,1","1000000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p2,Read,,,"13,1","14,2","1100000",,,1,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p2,Read,,,"13,1","14,2","1100000",,,2,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p2,Read,,,"15,1","16,2","1100000",,,3,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p2,Read,,,"15,1","16,2","1100000",,,4,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p2,Write,,,"12,1","12,4","300000",,,5,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p2,Write,,,"15,1","15,4","300000",,,6,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p3,Create,,,"11,1","11,2","100000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p3,Create,,,"11,1","12,1","1000000",,,0,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p3,Read,,,"13,1","14,2","1100000",,,1,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p3,Read,,,"13,1","14,2","1100000",,,2,C:\\Program Files...,0,...,...,1\n' ) f.write( f'p3,Read,,,"15,1","16,2","1100000",,,3,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p3,Read,,,"15,1","16,2","1100000",,,4,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p3,Write,,,"12,1","12,4","300000",,,5,C:\\ProgramData...,0,...,...,1\n' ) f.write( f'p3,Write,,,"15,1","15,4","300000",,,6,C:\\Program Files...,0,...,...,1\n' ) file_io = FileIO(f_in, sections) file_io.filter() top_durations = file_io.get_top_durations() top_sizes = file_io.get_top_sizes() expected_top_durations = { Section(11, 15): [('p1', 300000), ('p2', 300000), ('p3', 300000)], Section(15, 17): [('p1', 300000), ('p2', 300000), ('p3', 300000)] } expected_top_sizes = { Section(11, 15): [('p1', 1), ('p2', 1), ('p3', 1)], Section(15, 17): [('p1', 7), ('p2', 7), ('p3', 7)] } self.assertEquals(top_durations, expected_top_durations) self.assertEquals(top_sizes, expected_top_sizes)
def __init__(self): self.scrapper = WebScrapper() self.io = FileIO() self.notifier = TelegramNotifier()
def __init__(self, file=None, text=None): self.text = text if text else ''.join(FileIO(file).read_file())
def __init__(self): self.file_io = FileIO() self.pca = PrincipleComponentAnalysis() self.chart = DrawChart()