예제 #1
0
def main(function, lookback, start, end, tickers, file, provider, plot_vs, plot_pct_levels, verbose):
    """
    Tool for analyzing and plotting market internals

    <lookback>: Integer to specify lookback period

    <function>: Available analysis methods

    'hilo': to calculate number of stocks at X-day highs/lows.

    'dma': calculate number of stocks below/above any moving average.

    """
    context = Context(start, end, tickers, file, provider, verbose)
    df_list = context.data_frames

    click.echo("Fetching data for {:d} tickers".format(len(df_list)))
    
    plot_vs_df = None
    if plot_vs:
        plot_vs_df = context.data_provider.get_data([plot_vs], from_date=context.start_date,
                                                    to_date=context.end_date)[0]
    if function == 'hilo':
        hilo_analysis(lookback, context.start_date, context.end_date, df_list, plot_vs_df, plot_pct_levels.split(","))

    if function == 'dma':
        # Similar to SPXA50R http://stockcharts.com/h-sc/ui?s=$SPXA50R
        dma_analysis(lookback, context.start_date, context.end_date, df_list, plot_vs_df, plot_pct_levels.split(","))

    if context.data_provider.errors > 0:
        logger.warning("Missing data for {:d} tickers.".format(provider.errors))
예제 #2
0
파일: main.py 프로젝트: zhourudong/scmdb
def main(argv):
    try:
        opts, args = getopt.getopt(argv, "c:", ["configure=", "help"])
    except getopt.GetoptError:
        usage()
        sys.exit()

    conf_path = os.path.join(os.path.abspath("."), 'configs/conf')
    for opt, arg in opts:
        if opt == '--help':
            usage()
            sys.exit()
        elif opt in ('-c', '--configure'):
            conf_path = arg
        else:
            usage()
            exit()

    def _exit_w_info(info):
        print('\n%s\n' % info)
        usage()
        exit()

    def _ok_conf(conf):
        def check_cfg(cfg):
            cpath = os.path.join(conf, cfg)
            return ((os.path.exists(cpath) and cpath)
                    or _exit_w_info('missing %s.' % cpath))

        return [check_cfg(cfg) for cfg in ('api.yaml', 'logging.yaml')]

    api_conf, logging_conf = _ok_conf(conf_path)
    app_conf = {}
    with codecs.open(logging_conf, 'r', 'utf-8') as logging_file:
        log_conf_dict = yaml.load(logging_file)
        logfile_path = os.path.split(
            log_conf_dict['handlers']['file']['filename'])[0]
        if not os.path.exists(logfile_path):
            os.makedirs(logfile_path)
        logging.config.dictConfig(log_conf_dict)
    with codecs.open(api_conf, 'r', 'utf-8') as conff:
        app_conf.update(yaml.load(conff))
    _log = logging.getLogger(__name__)

    # 等logging配置好了再导入
    from mysqldal.sql_engine import sql_init
    from RESTFul_flask.app import app
    from RESTFul_flask.app import app_init
    from utils.context import Context

    context = Context()
    context.init(app_conf)
    app_init()
    sql_init()
    _log.debug('start from main.py')
    app.run(host="0.0.0.0", debug=True)
예제 #3
0
def main(function, start, end, tickers, file, provider, verbose):
    """Simple tool (based on https://github.com/pmorissette/ffn) for intermarket analysis.

    <function>: Available analysis methods:
    
    'average': display average combined returns

    'heat': display correlations heatmap

    'scatter': display scatter matrix

    """
    context = Context(start, end, tickers, file, provider, verbose)
    df_list = context.data_frames

    if len(df_list) < 1:
        click.echo("No dataframes. Exiting.")
        return

    closes = []
    for df in df_list:
        closes.append(df['Close'].rename(df['Ticker'][0]))

    if function == 'heat':
        g = ffn.GroupStats(*closes)
        g.plot_correlation()
        plt.show()

    elif function == 'scatter':
        g = ffn.GroupStats(*closes)
        axes = g.plot_scatter_matrix()
        plt.show()

    elif function == 'average':
        col = "Close"
        tickers = "Average: " + ", ".join([df['Ticker'][0] for df in df_list])
        rebased_merged = ffn.core.merge(*[ffn.core.rebase(c) for c in closes])

        average = pd.DataFrame(columns=[col])
        for index, row in rebased_merged.iterrows():
            average.set_value(index, col, row.values.mean())

        average = ta.add_ma(average, 200)

        average.plot()
        plt.title(tickers)
        plt.show()

    else:
        click.echo("{:s} not recognized".format(function))

    if context.data_provider.errors > 0:
        logger.warning("Missing data for {0} tickers.".format(provider.errors))
예제 #4
0
        loss = loss_compute(out, batch.trg_y, batch.ntokens)
        total_loss += loss
        total_tokens += batch.ntokens
        tokens += batch.ntokens
        if i % 50 == 1:
            elapsed = time.time() - start
            ctx.logger.info("Epoch Step: %d Loss: %f Tokens per Sec: %f", i,
                            loss / batch.ntokens, tokens / elapsed)
            start = time.time()
            tokens = 0
    return total_loss / total_tokens


if __name__ == "__main__":
    # Train the simple copy task.
    ctx = Context(desc="Train")
    logger = ctx.logger
    vocab_size = 11  # V_Size
    criterion = LabelSmoothing(size=vocab_size, padding_idx=0, smoothing=0.0)
    model = build_model(ctx, vocab_size, vocab_size)
    logger.info(model)
    model_opt = NoamOpt(
        model.src_embed[0].d_model, 1, 400,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98),
                         eps=1e-9))

    for epoch in range(100):
        logger.debug("Training Epoch %d", epoch)
        model.train()
        run_epoch(data_gen(vocab_size, 30, 20), model,
                  SimpleLossCompute(model.generator, criterion, model_opt),
예제 #5
0
"""
synopsis: flask app.
author: [email protected] (zhanghaoran)
"""
import logging
import os
from flask import Flask
from flask import request
from flask_cors import CORS
from flask_restful import Api, Resource

from utils.context import Context

app = Flask(__name__)
api = Api(app)
context = Context()
_log = logging.getLogger(__name__)


class Hello(Resource):
    def get(self):
        print(request)
        print(dir(request))
        print(request.url_root)
        print(context.conf_dict['woodenwerewolf']['url_root'])
        return {'hello': 'name'}


def app_init(log_conf_dict):
    """
    flask app 初始化
예제 #6
0
        hypothesises, attentions = [], []
        for i, (times, k) in enumerate(ks[:num_candidates]):
            hypothesis, attention = beam.get_hypothesis(times, k)
            hypothesises.append(hypothesis)
            attentions.append(attention)

        self.attentions = attentions
        self.hypothesises = [[token.item() for token in h]
                             for h in hypothesises]
        hs = [self.postprocess(h) for h in self.hypothesises]
        return list(reversed(hs))


if __name__ == "__main__":

    context = Context(desc="Prediction")
    logger = context.logger

    logger.info('Constructing dictionaries...')
    source_dictionary = IndexDictionary.load(
        context.project_processed_dir,
        mode='source',
        vocabulary_size=context.vocabulary_size)
    target_dictionary = IndexDictionary.load(
        context.project_processed_dir,
        mode='target',
        vocabulary_size=context.vocabulary_size)

    logger.info('Building model...')
    model = build_model(context, source_dictionary.vocabulary_size,
                        target_dictionary.vocabulary_size)
예제 #7
0
# Default settings

import os
# Internal
from utils.context import Context


def _(name, default, cast=str):
    return cast(os.environ.get(name, default))


es = Context(host=_("ELASTIC_HOST", "localhost"),
             port=_("ELASTIC_PORT", 8080, int),
             index=_("ELASTIC_INDEX", "pravega_telemetry"))

pravega = Context(host=_("PRAVEGA_HOST", "localhost"),
                  port=_("PRAVEGA_PORT", 9090, int),
                  scope=_("PRAVEGA_SCOPE", "dell-scope9"),
                  stream=_("PRAVEGA_STREAM", "dell-stream9"),
                  group=_("PRAVEGA_READER_GROUP", "rg0"),
                  reader_id=_("PRAVEGA_READER_ID", "rdr"))
예제 #8
0
def run():
    ctx = Context("Train_MultiGPU")
    logger = ctx.logger
    nums_batch = ctx.batch_size
    epochs = ctx.epochs

    # For data loading.
    from torchtext import data, datasets
    logger.info(f"Preparing dataset with batch size ... ")
    import spacy

    # !pip install torchtext spacy
    # !python -m spacy download en
    # !python -m spacy download de

    logger.info("Load en/de data from local ...")
    spacy_de = spacy.load('de', path=ctx.project_raw_dir)
    spacy_en = spacy.load('en', path=ctx.project_raw_dir)


    def tokenize_de(text):
        return [tok.text for tok in spacy_de.tokenizer(text)]

    # tokenize_en("I am a Chinese")  --> ['I', 'am', 'a', 'Chinese']
    def tokenize_en(text):
        return [tok.text for tok in spacy_en.tokenizer(text)]

    # Preparing dataset
    logger.info("Build SRC and TGT Fields ...")
    BOS_WORD = '<s>'
    EOS_WORD = '</s>'
    BLANK_WORD = "<blank>"
    SRC = data.Field(tokenize=tokenize_de, pad_token=BLANK_WORD)
    TGT = data.Field(tokenize=tokenize_en, init_token=BOS_WORD,
                     eos_token=EOS_WORD, pad_token=BLANK_WORD)
    logger.info("Split datasets into train, val and test using SRC/TGT fileds ...")
    MAX_LEN = 150
    # Spilt dataset in root path into train, val, and test dataset
    train, val, test = datasets.IWSLT.splits(
        exts=('.de', '.en'),  # A tuple containing the extension to path for each language.
        fields=(SRC, TGT),  # A tuple containing the fields that will be used for data in each language.
        root=ctx.project_raw_dir,  # Root dataset storage directory.
        filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN)

    logger.info("Build vocabularies for src and tgt ...")
    MIN_FREQ = 2
    SRC.build_vocab(train.src, min_freq=MIN_FREQ)
    TGT.build_vocab(train.trg, min_freq=MIN_FREQ)

    # GPUs to use
    devices = ctx.device_id   #  [0, 1, 2, 3]
    pad_idx = TGT.vocab.stoi["<blank>"]
    logger.info("Build Model ...")
    model = build_model(ctx, len(SRC.vocab), len(TGT.vocab))
    model.cuda() if ctx.is_cuda else None

    # Print out log info for debug ...
    logger.info(model)


    criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1)
    criterion.cuda() if ctx.is_cuda else None

    logger.info("Generating Training and Validating Batch datasets ...")
    train_iter = MyIterator(train, batch_size=nums_batch, device=ctx.device,
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=True)
    logger.info(f"Trainning Dataset: epoch[{epochs}], iterations[{train_iter.iterations}], batch size [{nums_batch}]")

    valid_iter = MyIterator(val, batch_size=nums_batch, device=ctx.device,
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=False)

    logger.info(f"Validate Dataset: epoch[{epochs}], iterations[{valid_iter.iterations}], batch size [{nums_batch}]")

    if ctx.is_gpu_parallel:
        # Using multiple GPU resource to train ...
        model_parallel = nn.DataParallel(model, device_ids=devices)
        loss_func = MultiGPULossCompute
    elif ctx.is_cuda:
        # Using Single GPU resource to train ...
        model_parallel = model
        loss_func = SimpleLossCompute
    else:
        # Using Single CPU resource to train ...
        model_parallel = model
        loss_func = SimpleLossCompute

    logger.info("Training Process is begining ...")

    # Training or load model from checkpoint
    if True:
        model_opt = NoamOpt(model_size = model.src_embed[0].d_model,
                            factor = 1,
                            warmup = 2000,
                            optimizer = torch.optim.Adam(model.parameters(),
                                                         lr=0, betas=(0.9, 0.98), eps=1e-9))
        for epoch in range(epochs):
            # Set model in train
            model_parallel.train()
            run_epoch((rebatch(pad_idx, b) for b in train_iter),
                      model_parallel,
                      loss_func(model.generator, criterion, devices, opt=model_opt),
                      ctx)

            # Evaluation Model
            model_parallel.eval()

            # Get loss
            loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter),
                             model_parallel,
                             loss_func(model.generator, criterion, devices, opt=None),
                             ctx)
            logger.info("The loss is %d", loss)
    else:
        model = torch.load("iwslt.pt")

    logger.info("Training is over and Evaluate Model  ...")
    for i, batch in enumerate(valid_iter):
        src = batch.src.transpose(0, 1)[:1]
        src_mask = (src != SRC.vocab.stoi["<blank>"]).unsqueeze(-2)
        out = greedy_decode(model, src, src_mask,
                            max_len=60, start_symbol=TGT.vocab.stoi["<s>"])
        print("Translation:", end="\t")
        for i in range(1, out.size(1)):
            sym = TGT.vocab.itos[out[0, i]]
            if sym == "</s>":
                break
            print(sym, end=" ")
        print()
        print("Target:", end="\t")
        for i in range(1, batch.trg.size(0)):
            sym = TGT.vocab.itos[batch.trg.data[i, 0]]
            if sym == "</s>": break
            print(sym, end=" ")
        print()
        break
예제 #9
0
                    smoothing_function=smoothing_function.method3)
                line = "{bleu_score}\t{source}\t{target}\t|\t{prediction}".format(
                    bleu_score=sentence_bleu_score,
                    source=source,
                    target=target,
                    prediction=prediction)
                file.write(line + '\n')

        return corpus_bleu(list_of_references,
                           hypotheses,
                           smoothing_function=smoothing_function.method3)


if __name__ == "__main__":

    context = Context("Evaluation")
    logger = context.logger

    logger.info('Constructing dictionaries...')
    source_dictionary = IndexDictionary.load(
        context.project_processed_dir,
        mode='source',
        vocabulary_size=context.vocabulary_size)
    target_dictionary = IndexDictionary.load(
        context.project_processed_dir,
        mode='target',
        vocabulary_size=context.vocabulary_size)

    logger.info('Building model...')
    model = build_model(context, source_dictionary.vocabulary_size,
                        target_dictionary.vocabulary_size)
예제 #10
0
        if self.epoch > 0:
            self.logger.info("Saved model to {}".format(checkpoint_filepath))
            torch.save(self.model.state_dict(), checkpoint_filepath)
            self.history.append(save_state)


    def _elapsed_time(self):
        now = datetime.now()
        elapsed = now - self.start_time
        return str(elapsed).split('.')[0]  # remove milliseconds

    def postprocess(self):
        pass


if __name__ == "__main__":
    ctx = Context(desc="Learning-fix based on Transformer")
    logger = ctx.logger

    logger.info("Build Data Process Engine based on input parsed dataset ...")
    engine = DataProcessEngine(ctx)

    logger.info("Preparing dataset and build model for trani ...")
    engine.preprocess(data_source_type="small")

    logger.info("Training and evaluating the model ...")
    engine.run(loss_func=SimpleLossComputeWithLablSmoothing, opt=get_std_opt(engine.model))

    logger.info("Testing and data clean ...")
    engine.postprocess()
예제 #11
0
    else:
        loss_function = TokenCrossEntropyLoss()

    accuracy_function = AccuracyMetric()

    if ctx.optimizer == 'Noam':
        optimizer = NoamOptimizer(model.parameters(), d_model=ctx.d_model)
    elif ctx.optimizer == 'Adam':
        optimizer = Adam(model.parameters(), lr=ctx.lr)
    else:
        raise NotImplementedError()

    logger.info('Start training...')
    trainer = TransformerTrainer(model=model,
                                 train_dataloader=train_dataloader,
                                 val_dataloader=val_dataloader,
                                 loss_function=loss_function,
                                 metric_function=accuracy_function,
                                 optimizer=optimizer,
                                 run_name=run_name,
                                 ctx=ctx)

    trainer.run(ctx.epochs)

    return trainer


if __name__ == '__main__':
    run_trainer_standalone(
        Context(desc="Train Example Project with GPU Resource!"))
예제 #12
0
import logging
from benchmarks.example.pipe import shared_tokens_generator, source_tokens_generator, target_tokens_generator
from utils.context import Context
from benchmarks.example.datasets import TranslationDataset, TokenizedTranslationDataset
from benchmarks.example.datasets import IndexedInputTargetTranslationDataset
from benchmarks.example.datasets import IndexedInputTargetTranslationDatasetOnTheFly, TranslationDatasetOnTheFly
from benchmarks.example.dictionaries import IndexDictionary

if __name__ == "__main__":
    context = Context(desc="dataset")
    logger = context.logger

    if logger.isEnabledFor(logging.DEBUG):
        # Preparing Raw train/val dataset: a file of each line (src, tgt)
        # src-train.txt + tgt-train.txt --> raw-train.txt
        # src-val.txt + tgt-val.txt --> raw-val.txt
        logger.debug("The raw train and validate datasets are generating ...")
        TranslationDataset.prepare(context)

    # a list of train dataset: [(src, tgt), ..., (src, tgt)], build from raw-train.txt
    logger.info(
        "The train dataset [(src, tgt), ..., (src, tgt)] is generating ...")
    translation_dataset = TranslationDataset(context, 'train')

    if logger.isEnabledFor(logging.DEBUG):
        # a list of train dataset: [(src, tgt), ..., (src, tgt)], build from src-train.txt, tgt-train.txt
        logger.debug(
            "The train dataset [(src, tgt), ..., (src, tgt)] is generating on the fly ..."
        )
        translation_dataset_on_the_fly = TranslationDatasetOnTheFly(
            context, 'train')
예제 #13
0
def seasonality_analysis(ticker, provider, start, end, plot_vs, plot_label,
                         monthly, verbose):
    click.echo("Seasonality for {}".format(ticker))

    context = Context(start, end, ticker, None, provider, verbose)
    dataframes = context.data_frames

    if len(dataframes) == 0:
        click.echo("Found no data for {}. Exiting.".format(ticker))
        return
    else:
        df = dataframes[0]

    if monthly:
        rebased_dataframes = [
            df for df in seasonality.seasonality_monthly_returns(df)
        ]

        fig = plt.figure()
        fig.suptitle("{0} montly seasonality".format(ticker), fontsize=16)
        for i, data in enumerate(rebased_dataframes):
            ax = fig.add_subplot(4, 3, i + 1)
            ax.plot(data)
            ax.set_title(data.columns[0])
            # ax.set_xticks(data.index)
            ax.set_yticks([])

        plt.tight_layout()
        plt.show()

    else:
        seasonlity_data = seasonality.seasonality_returns(df).apply(
            seasonality.rebase)

        fig, ax = plt.subplots()
        if plot_label == 'calendar':
            ax.plot(seasonlity_data,
                    label="{0} {1}-{2}".format(ticker, df.index[0].year,
                                               df.index[-1].year))
            # months = mdates.MonthLocator()  # every month
            yearsFmt = mdates.DateFormatter('%b')
            ax.xaxis.set_major_formatter(yearsFmt)
            # ax.xaxis.set_minor_locator(months)
            days = mdates.DayLocator()
            ax.xaxis.set_minor_locator(days)
            fig.autofmt_xdate()
            title = "{0} Seasonality".format(ticker)

            if plot_vs:
                plot_vs_start = "{0}-01-01".format(
                    datetime.datetime.now().year)
                plot_vs_end = datetime.datetime.strftime(
                    datetime.datetime.now(), "%Y-%m-%d")
                plot_vs_df = context.data_provider.get_data([plot_vs],
                                                            plot_vs_start,
                                                            plot_vs_end)
                if len(plot_vs_df) == 0:
                    click.echo("No dataframe for {}. Exiting.".format(plot_vs))
                    return
                plot_vs_df_close = plot_vs_df[0]['Close']

                # Reindex the the plot_vs data to seasonality_data datetimes
                new_index = seasonlity_data.index[0:len(plot_vs_df_close)]
                col = "{} {} to {}".format(plot_vs, plot_vs_start,
                                           plot_vs_df_close.index[-1].date())
                df = pd.DataFrame(plot_vs_df_close.values,
                                  index=new_index,
                                  columns=[col])
                df = seasonality.normalize(df).apply(seasonality.rebase)

                ax.plot(df, label=col)

                minorLocator = MultipleLocator(1)
                ax.xaxis.set_minor_locator(minorLocator)
                title = "{0} Seasonality vs {0}".format(ticker)

        elif plot_label == 'day':
            # days = range(1,len(plot_data)-1)
            # plot_data_days = pd.DataFrame({ticker:plot_data[ticker].values}).reindex(days)

            seasonality_data_days = seasonality.trading_day_reindex(
                seasonlity_data, ticker, ticker)
            seasonality_data_days = seasonality.normalize(
                seasonality_data_days).apply(seasonality.rebase_days)
            ax.plot(seasonality_data_days,
                    label="{0} {1}-{2}".format(ticker, df.index[0].year,
                                               df.index[-1].year))

            minorLocator = MultipleLocator(1)
            ax.xaxis.set_minor_locator(minorLocator)
            ax.set_xlabel("trading day")
            title = "{0} Seasonality".format(ticker)

            if plot_vs:
                plot_vs_start = "{0}-01-01".format(
                    datetime.datetime.now().year)
                plot_vs_end = datetime.datetime.strftime(
                    datetime.datetime.now(), "%Y-%m-%d")
                plot_vs_df = context.data_provider.get_data([plot_vs],
                                                            plot_vs_start,
                                                            plot_vs_end)
                if len(plot_vs_df) == 0:
                    click.echo("No dataframe for {}. Exiting.".format(plot_vs))
                    return

                plot_vs_df = plot_vs_df[0]
                df = seasonality.trading_day_reindex(plot_vs_df, ticker,
                                                     'Close')
                df = seasonality.normalize(df).apply(seasonality.rebase_days)

                col = "{} {} to {}".format(plot_vs, plot_vs_start,
                                           plot_vs_df.index[-1].date())
                ax.plot(df, label=col)
                title = "{0} Seasonality vs {0}".format(ticker)

        legend = ax.legend(loc='upper left', shadow=False, fontsize=12)
        plt.title(title)
        plt.show()