def main(function, lookback, start, end, tickers, file, provider, plot_vs, plot_pct_levels, verbose): """ Tool for analyzing and plotting market internals <lookback>: Integer to specify lookback period <function>: Available analysis methods 'hilo': to calculate number of stocks at X-day highs/lows. 'dma': calculate number of stocks below/above any moving average. """ context = Context(start, end, tickers, file, provider, verbose) df_list = context.data_frames click.echo("Fetching data for {:d} tickers".format(len(df_list))) plot_vs_df = None if plot_vs: plot_vs_df = context.data_provider.get_data([plot_vs], from_date=context.start_date, to_date=context.end_date)[0] if function == 'hilo': hilo_analysis(lookback, context.start_date, context.end_date, df_list, plot_vs_df, plot_pct_levels.split(",")) if function == 'dma': # Similar to SPXA50R http://stockcharts.com/h-sc/ui?s=$SPXA50R dma_analysis(lookback, context.start_date, context.end_date, df_list, plot_vs_df, plot_pct_levels.split(",")) if context.data_provider.errors > 0: logger.warning("Missing data for {:d} tickers.".format(provider.errors))
def main(argv): try: opts, args = getopt.getopt(argv, "c:", ["configure=", "help"]) except getopt.GetoptError: usage() sys.exit() conf_path = os.path.join(os.path.abspath("."), 'configs/conf') for opt, arg in opts: if opt == '--help': usage() sys.exit() elif opt in ('-c', '--configure'): conf_path = arg else: usage() exit() def _exit_w_info(info): print('\n%s\n' % info) usage() exit() def _ok_conf(conf): def check_cfg(cfg): cpath = os.path.join(conf, cfg) return ((os.path.exists(cpath) and cpath) or _exit_w_info('missing %s.' % cpath)) return [check_cfg(cfg) for cfg in ('api.yaml', 'logging.yaml')] api_conf, logging_conf = _ok_conf(conf_path) app_conf = {} with codecs.open(logging_conf, 'r', 'utf-8') as logging_file: log_conf_dict = yaml.load(logging_file) logfile_path = os.path.split( log_conf_dict['handlers']['file']['filename'])[0] if not os.path.exists(logfile_path): os.makedirs(logfile_path) logging.config.dictConfig(log_conf_dict) with codecs.open(api_conf, 'r', 'utf-8') as conff: app_conf.update(yaml.load(conff)) _log = logging.getLogger(__name__) # 等logging配置好了再导入 from mysqldal.sql_engine import sql_init from RESTFul_flask.app import app from RESTFul_flask.app import app_init from utils.context import Context context = Context() context.init(app_conf) app_init() sql_init() _log.debug('start from main.py') app.run(host="0.0.0.0", debug=True)
def main(function, start, end, tickers, file, provider, verbose): """Simple tool (based on https://github.com/pmorissette/ffn) for intermarket analysis. <function>: Available analysis methods: 'average': display average combined returns 'heat': display correlations heatmap 'scatter': display scatter matrix """ context = Context(start, end, tickers, file, provider, verbose) df_list = context.data_frames if len(df_list) < 1: click.echo("No dataframes. Exiting.") return closes = [] for df in df_list: closes.append(df['Close'].rename(df['Ticker'][0])) if function == 'heat': g = ffn.GroupStats(*closes) g.plot_correlation() plt.show() elif function == 'scatter': g = ffn.GroupStats(*closes) axes = g.plot_scatter_matrix() plt.show() elif function == 'average': col = "Close" tickers = "Average: " + ", ".join([df['Ticker'][0] for df in df_list]) rebased_merged = ffn.core.merge(*[ffn.core.rebase(c) for c in closes]) average = pd.DataFrame(columns=[col]) for index, row in rebased_merged.iterrows(): average.set_value(index, col, row.values.mean()) average = ta.add_ma(average, 200) average.plot() plt.title(tickers) plt.show() else: click.echo("{:s} not recognized".format(function)) if context.data_provider.errors > 0: logger.warning("Missing data for {0} tickers.".format(provider.errors))
loss = loss_compute(out, batch.trg_y, batch.ntokens) total_loss += loss total_tokens += batch.ntokens tokens += batch.ntokens if i % 50 == 1: elapsed = time.time() - start ctx.logger.info("Epoch Step: %d Loss: %f Tokens per Sec: %f", i, loss / batch.ntokens, tokens / elapsed) start = time.time() tokens = 0 return total_loss / total_tokens if __name__ == "__main__": # Train the simple copy task. ctx = Context(desc="Train") logger = ctx.logger vocab_size = 11 # V_Size criterion = LabelSmoothing(size=vocab_size, padding_idx=0, smoothing=0.0) model = build_model(ctx, vocab_size, vocab_size) logger.info(model) model_opt = NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(100): logger.debug("Training Epoch %d", epoch) model.train() run_epoch(data_gen(vocab_size, 30, 20), model, SimpleLossCompute(model.generator, criterion, model_opt),
""" synopsis: flask app. author: [email protected] (zhanghaoran) """ import logging import os from flask import Flask from flask import request from flask_cors import CORS from flask_restful import Api, Resource from utils.context import Context app = Flask(__name__) api = Api(app) context = Context() _log = logging.getLogger(__name__) class Hello(Resource): def get(self): print(request) print(dir(request)) print(request.url_root) print(context.conf_dict['woodenwerewolf']['url_root']) return {'hello': 'name'} def app_init(log_conf_dict): """ flask app 初始化
hypothesises, attentions = [], [] for i, (times, k) in enumerate(ks[:num_candidates]): hypothesis, attention = beam.get_hypothesis(times, k) hypothesises.append(hypothesis) attentions.append(attention) self.attentions = attentions self.hypothesises = [[token.item() for token in h] for h in hypothesises] hs = [self.postprocess(h) for h in self.hypothesises] return list(reversed(hs)) if __name__ == "__main__": context = Context(desc="Prediction") logger = context.logger logger.info('Constructing dictionaries...') source_dictionary = IndexDictionary.load( context.project_processed_dir, mode='source', vocabulary_size=context.vocabulary_size) target_dictionary = IndexDictionary.load( context.project_processed_dir, mode='target', vocabulary_size=context.vocabulary_size) logger.info('Building model...') model = build_model(context, source_dictionary.vocabulary_size, target_dictionary.vocabulary_size)
# Default settings import os # Internal from utils.context import Context def _(name, default, cast=str): return cast(os.environ.get(name, default)) es = Context(host=_("ELASTIC_HOST", "localhost"), port=_("ELASTIC_PORT", 8080, int), index=_("ELASTIC_INDEX", "pravega_telemetry")) pravega = Context(host=_("PRAVEGA_HOST", "localhost"), port=_("PRAVEGA_PORT", 9090, int), scope=_("PRAVEGA_SCOPE", "dell-scope9"), stream=_("PRAVEGA_STREAM", "dell-stream9"), group=_("PRAVEGA_READER_GROUP", "rg0"), reader_id=_("PRAVEGA_READER_ID", "rdr"))
def run(): ctx = Context("Train_MultiGPU") logger = ctx.logger nums_batch = ctx.batch_size epochs = ctx.epochs # For data loading. from torchtext import data, datasets logger.info(f"Preparing dataset with batch size ... ") import spacy # !pip install torchtext spacy # !python -m spacy download en # !python -m spacy download de logger.info("Load en/de data from local ...") spacy_de = spacy.load('de', path=ctx.project_raw_dir) spacy_en = spacy.load('en', path=ctx.project_raw_dir) def tokenize_de(text): return [tok.text for tok in spacy_de.tokenizer(text)] # tokenize_en("I am a Chinese") --> ['I', 'am', 'a', 'Chinese'] def tokenize_en(text): return [tok.text for tok in spacy_en.tokenizer(text)] # Preparing dataset logger.info("Build SRC and TGT Fields ...") BOS_WORD = '<s>' EOS_WORD = '</s>' BLANK_WORD = "<blank>" SRC = data.Field(tokenize=tokenize_de, pad_token=BLANK_WORD) TGT = data.Field(tokenize=tokenize_en, init_token=BOS_WORD, eos_token=EOS_WORD, pad_token=BLANK_WORD) logger.info("Split datasets into train, val and test using SRC/TGT fileds ...") MAX_LEN = 150 # Spilt dataset in root path into train, val, and test dataset train, val, test = datasets.IWSLT.splits( exts=('.de', '.en'), # A tuple containing the extension to path for each language. fields=(SRC, TGT), # A tuple containing the fields that will be used for data in each language. root=ctx.project_raw_dir, # Root dataset storage directory. filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN) logger.info("Build vocabularies for src and tgt ...") MIN_FREQ = 2 SRC.build_vocab(train.src, min_freq=MIN_FREQ) TGT.build_vocab(train.trg, min_freq=MIN_FREQ) # GPUs to use devices = ctx.device_id # [0, 1, 2, 3] pad_idx = TGT.vocab.stoi["<blank>"] logger.info("Build Model ...") model = build_model(ctx, len(SRC.vocab), len(TGT.vocab)) model.cuda() if ctx.is_cuda else None # Print out log info for debug ... logger.info(model) criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1) criterion.cuda() if ctx.is_cuda else None logger.info("Generating Training and Validating Batch datasets ...") train_iter = MyIterator(train, batch_size=nums_batch, device=ctx.device, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=True) logger.info(f"Trainning Dataset: epoch[{epochs}], iterations[{train_iter.iterations}], batch size [{nums_batch}]") valid_iter = MyIterator(val, batch_size=nums_batch, device=ctx.device, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=False) logger.info(f"Validate Dataset: epoch[{epochs}], iterations[{valid_iter.iterations}], batch size [{nums_batch}]") if ctx.is_gpu_parallel: # Using multiple GPU resource to train ... model_parallel = nn.DataParallel(model, device_ids=devices) loss_func = MultiGPULossCompute elif ctx.is_cuda: # Using Single GPU resource to train ... model_parallel = model loss_func = SimpleLossCompute else: # Using Single CPU resource to train ... model_parallel = model loss_func = SimpleLossCompute logger.info("Training Process is begining ...") # Training or load model from checkpoint if True: model_opt = NoamOpt(model_size = model.src_embed[0].d_model, factor = 1, warmup = 2000, optimizer = torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(epochs): # Set model in train model_parallel.train() run_epoch((rebatch(pad_idx, b) for b in train_iter), model_parallel, loss_func(model.generator, criterion, devices, opt=model_opt), ctx) # Evaluation Model model_parallel.eval() # Get loss loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter), model_parallel, loss_func(model.generator, criterion, devices, opt=None), ctx) logger.info("The loss is %d", loss) else: model = torch.load("iwslt.pt") logger.info("Training is over and Evaluate Model ...") for i, batch in enumerate(valid_iter): src = batch.src.transpose(0, 1)[:1] src_mask = (src != SRC.vocab.stoi["<blank>"]).unsqueeze(-2) out = greedy_decode(model, src, src_mask, max_len=60, start_symbol=TGT.vocab.stoi["<s>"]) print("Translation:", end="\t") for i in range(1, out.size(1)): sym = TGT.vocab.itos[out[0, i]] if sym == "</s>": break print(sym, end=" ") print() print("Target:", end="\t") for i in range(1, batch.trg.size(0)): sym = TGT.vocab.itos[batch.trg.data[i, 0]] if sym == "</s>": break print(sym, end=" ") print() break
smoothing_function=smoothing_function.method3) line = "{bleu_score}\t{source}\t{target}\t|\t{prediction}".format( bleu_score=sentence_bleu_score, source=source, target=target, prediction=prediction) file.write(line + '\n') return corpus_bleu(list_of_references, hypotheses, smoothing_function=smoothing_function.method3) if __name__ == "__main__": context = Context("Evaluation") logger = context.logger logger.info('Constructing dictionaries...') source_dictionary = IndexDictionary.load( context.project_processed_dir, mode='source', vocabulary_size=context.vocabulary_size) target_dictionary = IndexDictionary.load( context.project_processed_dir, mode='target', vocabulary_size=context.vocabulary_size) logger.info('Building model...') model = build_model(context, source_dictionary.vocabulary_size, target_dictionary.vocabulary_size)
if self.epoch > 0: self.logger.info("Saved model to {}".format(checkpoint_filepath)) torch.save(self.model.state_dict(), checkpoint_filepath) self.history.append(save_state) def _elapsed_time(self): now = datetime.now() elapsed = now - self.start_time return str(elapsed).split('.')[0] # remove milliseconds def postprocess(self): pass if __name__ == "__main__": ctx = Context(desc="Learning-fix based on Transformer") logger = ctx.logger logger.info("Build Data Process Engine based on input parsed dataset ...") engine = DataProcessEngine(ctx) logger.info("Preparing dataset and build model for trani ...") engine.preprocess(data_source_type="small") logger.info("Training and evaluating the model ...") engine.run(loss_func=SimpleLossComputeWithLablSmoothing, opt=get_std_opt(engine.model)) logger.info("Testing and data clean ...") engine.postprocess()
else: loss_function = TokenCrossEntropyLoss() accuracy_function = AccuracyMetric() if ctx.optimizer == 'Noam': optimizer = NoamOptimizer(model.parameters(), d_model=ctx.d_model) elif ctx.optimizer == 'Adam': optimizer = Adam(model.parameters(), lr=ctx.lr) else: raise NotImplementedError() logger.info('Start training...') trainer = TransformerTrainer(model=model, train_dataloader=train_dataloader, val_dataloader=val_dataloader, loss_function=loss_function, metric_function=accuracy_function, optimizer=optimizer, run_name=run_name, ctx=ctx) trainer.run(ctx.epochs) return trainer if __name__ == '__main__': run_trainer_standalone( Context(desc="Train Example Project with GPU Resource!"))
import logging from benchmarks.example.pipe import shared_tokens_generator, source_tokens_generator, target_tokens_generator from utils.context import Context from benchmarks.example.datasets import TranslationDataset, TokenizedTranslationDataset from benchmarks.example.datasets import IndexedInputTargetTranslationDataset from benchmarks.example.datasets import IndexedInputTargetTranslationDatasetOnTheFly, TranslationDatasetOnTheFly from benchmarks.example.dictionaries import IndexDictionary if __name__ == "__main__": context = Context(desc="dataset") logger = context.logger if logger.isEnabledFor(logging.DEBUG): # Preparing Raw train/val dataset: a file of each line (src, tgt) # src-train.txt + tgt-train.txt --> raw-train.txt # src-val.txt + tgt-val.txt --> raw-val.txt logger.debug("The raw train and validate datasets are generating ...") TranslationDataset.prepare(context) # a list of train dataset: [(src, tgt), ..., (src, tgt)], build from raw-train.txt logger.info( "The train dataset [(src, tgt), ..., (src, tgt)] is generating ...") translation_dataset = TranslationDataset(context, 'train') if logger.isEnabledFor(logging.DEBUG): # a list of train dataset: [(src, tgt), ..., (src, tgt)], build from src-train.txt, tgt-train.txt logger.debug( "The train dataset [(src, tgt), ..., (src, tgt)] is generating on the fly ..." ) translation_dataset_on_the_fly = TranslationDatasetOnTheFly( context, 'train')
def seasonality_analysis(ticker, provider, start, end, plot_vs, plot_label, monthly, verbose): click.echo("Seasonality for {}".format(ticker)) context = Context(start, end, ticker, None, provider, verbose) dataframes = context.data_frames if len(dataframes) == 0: click.echo("Found no data for {}. Exiting.".format(ticker)) return else: df = dataframes[0] if monthly: rebased_dataframes = [ df for df in seasonality.seasonality_monthly_returns(df) ] fig = plt.figure() fig.suptitle("{0} montly seasonality".format(ticker), fontsize=16) for i, data in enumerate(rebased_dataframes): ax = fig.add_subplot(4, 3, i + 1) ax.plot(data) ax.set_title(data.columns[0]) # ax.set_xticks(data.index) ax.set_yticks([]) plt.tight_layout() plt.show() else: seasonlity_data = seasonality.seasonality_returns(df).apply( seasonality.rebase) fig, ax = plt.subplots() if plot_label == 'calendar': ax.plot(seasonlity_data, label="{0} {1}-{2}".format(ticker, df.index[0].year, df.index[-1].year)) # months = mdates.MonthLocator() # every month yearsFmt = mdates.DateFormatter('%b') ax.xaxis.set_major_formatter(yearsFmt) # ax.xaxis.set_minor_locator(months) days = mdates.DayLocator() ax.xaxis.set_minor_locator(days) fig.autofmt_xdate() title = "{0} Seasonality".format(ticker) if plot_vs: plot_vs_start = "{0}-01-01".format( datetime.datetime.now().year) plot_vs_end = datetime.datetime.strftime( datetime.datetime.now(), "%Y-%m-%d") plot_vs_df = context.data_provider.get_data([plot_vs], plot_vs_start, plot_vs_end) if len(plot_vs_df) == 0: click.echo("No dataframe for {}. Exiting.".format(plot_vs)) return plot_vs_df_close = plot_vs_df[0]['Close'] # Reindex the the plot_vs data to seasonality_data datetimes new_index = seasonlity_data.index[0:len(plot_vs_df_close)] col = "{} {} to {}".format(plot_vs, plot_vs_start, plot_vs_df_close.index[-1].date()) df = pd.DataFrame(plot_vs_df_close.values, index=new_index, columns=[col]) df = seasonality.normalize(df).apply(seasonality.rebase) ax.plot(df, label=col) minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) title = "{0} Seasonality vs {0}".format(ticker) elif plot_label == 'day': # days = range(1,len(plot_data)-1) # plot_data_days = pd.DataFrame({ticker:plot_data[ticker].values}).reindex(days) seasonality_data_days = seasonality.trading_day_reindex( seasonlity_data, ticker, ticker) seasonality_data_days = seasonality.normalize( seasonality_data_days).apply(seasonality.rebase_days) ax.plot(seasonality_data_days, label="{0} {1}-{2}".format(ticker, df.index[0].year, df.index[-1].year)) minorLocator = MultipleLocator(1) ax.xaxis.set_minor_locator(minorLocator) ax.set_xlabel("trading day") title = "{0} Seasonality".format(ticker) if plot_vs: plot_vs_start = "{0}-01-01".format( datetime.datetime.now().year) plot_vs_end = datetime.datetime.strftime( datetime.datetime.now(), "%Y-%m-%d") plot_vs_df = context.data_provider.get_data([plot_vs], plot_vs_start, plot_vs_end) if len(plot_vs_df) == 0: click.echo("No dataframe for {}. Exiting.".format(plot_vs)) return plot_vs_df = plot_vs_df[0] df = seasonality.trading_day_reindex(plot_vs_df, ticker, 'Close') df = seasonality.normalize(df).apply(seasonality.rebase_days) col = "{} {} to {}".format(plot_vs, plot_vs_start, plot_vs_df.index[-1].date()) ax.plot(df, label=col) title = "{0} Seasonality vs {0}".format(ticker) legend = ax.legend(loc='upper left', shadow=False, fontsize=12) plt.title(title) plt.show()