示例#1
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    shutil.copyfile(
        os.path.expanduser(os.path.expandvars(config.get('cache',
                                                         'category'))),
        os.path.join(cache_dir, 'category'))
    category = utils.get_category(config)
    category_index = dict([(name, i) for i, name in enumerate(category)])
    datasets = config.get('cache', 'datasets').split()
    for phase in args.phase:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = []
        for dataset in datasets:
            logging.info('load %s dataset' % dataset)
            module, func = dataset.rsplit('.', 1)
            module = importlib.import_module(module)
            func = getattr(module, func)
            data += func(config, path, category_index)
        if config.getboolean('cache', 'shuffle'):
            random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
示例#2
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    mappers, _ = utils.get_dataset_mappers(config)
    for phase in args.phase:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = []
        for dataset in mappers:
            logging.info('load %s dataset' % dataset)
            module, func = dataset.rsplit('.', 1)
            module = importlib.import_module(module)
            func = getattr(module, func)
            data += func(config, path, mappers[dataset])
        if config.getboolean('cache', 'shuffle'):
            random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
示例#3
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    shutil.copyfile(os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category'))
    category = utils.get_category(config)
    category_index = dict([(name, i) for i, name in enumerate(category)])
    datasets = config.get('cache', 'datasets').split()
    for phase in args.phase:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = []
        for dataset in datasets:
            logging.info('load %s dataset' % dataset)
            module, func = dataset.rsplit('.', 1)
            module = importlib.import_module(module)
            func = getattr(module, func)
            data += func(config, path, category_index)
        if config.getboolean('cache', 'shuffle'):
            random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    cache_dir = utils.get_cache_dir(config)
    model_dir = utils.get_model_dir(config)
    category = utils.get_category(
        config, cache_dir if os.path.exists(cache_dir) else None)
    anchors = utils.get_anchors(config)
    anchors = torch.from_numpy(anchors).contiguous()
    path, step, epoch = utils.train.load_model(model_dir)
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels(
        config, state_dict), anchors, len(category))
    dnn.load_state_dict(state_dict)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    resize = transform.parse_transform(config,
                                       config.get('transform', 'resize_test'))
    transform_image = transform.get_transform(
        config,
        config.get('transform', 'image_test').split())
    transform_tensor = transform.get_transform(
        config,
        config.get('transform', 'tensor').split())
    # load image
    image_bgr = cv2.imread('image.jpg')
    image_resized = resize(image_bgr, height, width)
    image = transform_image(image_resized)
    tensor = transform_tensor(image).unsqueeze(0)
    # Checksum
    for key, var in dnn.state_dict().items():
        a = var.cpu().numpy()
        print('\t'.join(
            map(str, [
                key, a.shape,
                utils.abs_mean(a),
                hashlib.md5(a.tostring()).hexdigest()
            ])))
    output = dnn(torch.autograd.Variable(tensor, volatile=True)).data
    for key, a in [
        ('image_bgr', image_bgr),
        ('image_resized', image_resized),
        ('tensor', tensor.cpu().numpy()),
        ('output', output.cpu().numpy()),
    ]:
        print('\t'.join(
            map(str, [
                key, a.shape,
                utils.abs_mean(a),
                hashlib.md5(a.tostring()).hexdigest()
            ])))
示例#5
0
def main():
    config = configparser.ConfigParser()
    config.read('config.ini')
    cache_dir = utils.get_cache_dir(config)
    os.makedirs(cache_dir, exist_ok=True)
    for phase in ['train', 'val', 'test']:
        path = os.path.join(cache_dir, phase) + '.pkl'
        logging.info('save cache file: ' + path)
        data = cache(config, path)
        #if config.getboolean('cache', 'shuffle'):
        #    random.shuffle(data)
        with open(path, 'wb') as f:
            pickle.dump(data, f)
    logging.info('data are saved into ' + cache_dir)
示例#6
0
    model = model_from_json(open(os.path.join(basepath, 'model.json')).read())
    model.load_weights(os.path.join(basepath, 'weights.h5'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

def analyze(basepath, filename):
    global LOOKBACK, MAX_MIDPOINT_DELTA

    # Load the data & model
    df = load_data(basepath, filename)
    model = load_model(basepath)

    # Process data
    samples, labels = preprocess.process_data(df)

    # Test model and print results
    print "Running analysis..."
    predictions = model.predict_classes(samples, batch_size=32)
    print_results(labels, predictions)

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)

    filename = os.path.basename(sys.argv[1]).replace(".csv", ".labels")
    analyze(utils.get_cache_dir(), filename)
示例#7
0
    samples, labels = preprocess.process_data(df)

    # How many samples are we going to leave out for the test set?
    nb_test = int(len(labels) * 0.2)
    split = len(labels) - nb_test

    # Prepare training and test sets
    X_train = np.array(samples[:split])
    y_train = labels[:split]
    X_test = np.array(samples[split + 1:])
    y_test = labels[split + 1:]
    print len(X_train), 'train sequences'
    print len(X_test), 'test sequences'

    # How many classes?
    num_classes = np.max(labels) + 1
    print num_classes, 'classes'

    # Train Model
    train_and_save(X_train, X_test, y_train, y_test, num_classes, basepath)


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)

    features_file = os.path.basename(sys.argv[1]).replace(".csv", ".features")
    labels_file = os.path.basename(sys.argv[1]).replace(".csv", ".labels")
    learn(utils.get_cache_dir(), features_file, labels_file)
示例#8
0
    model = model_from_json(open(os.path.join(basepath, 'model.json')).read())
    model.load_weights(os.path.join(basepath, 'weights.h5'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

def analyze(basepath, filename):
    global LOOKBACK, MAX_MIDPOINT_DELTA

    # Load the data & model
    df = load_data(basepath, filename)
    model = load_model(basepath)

    # Process data
    samples, labels = preprocess.process_data(df)

    # Test model and print results
    print "Running analysis..."
    predictions = model.predict_classes(samples, batch_size=32)
    print_results(labels, predictions)

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)

    filename = os.path.basename(sys.argv[1]).replace(".csv", ".labels")
    analyze(utils.get_cache_dir(), filename)
示例#9
0
    computed = pd.DataFrame.from_dict(predictions, orient='columns')
    utils.print_distribution_graph(computed['label'],
                                   'Distribution of Label Buckets')
    return computed


if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)
    try:
        # Load file from ./cache directory
        filename = os.path.basename(sys.argv[1]).replace(".csv", ".features")
        gc.disable()
        packed = pickle.load(
            open(os.path.join(utils.get_cache_dir(), filename), 'rb'))
        df = pd.read_msgpack(packed['data'])
        gc.enable()
        if packed['format_version'] != FILE_FORMAT_VERSION:
            print "Encountered unexpected cache file format version %s" % str(
                data['file_format_version'])
            sys.exit(1)
    except IOError:
        print 'Cannot open features cache file "%s"' % cache
        sys.exit(1)

    # Calculate labels
    labels = calculate(df)

    # Pack data and save to the cache directory
    gc.disable()
示例#10
0
    # Process features
    samples, labels = preprocess.process_data(df)

    # How many samples are we going to leave out for the test set?
    nb_test = int(len(labels) * 0.2)
    split = len(labels) - nb_test

    # Prepare training and test sets
    X_train = np.array(samples[:split])
    y_train = labels[:split]
    X_test = np.array(samples[split+1:])
    y_test = labels[split+1:]
    print len(X_train), 'train sequences'
    print len(X_test), 'test sequences'

    # How many classes?
    num_classes = np.max(labels)+1
    print num_classes, 'classes'

    # Train Model
    train_and_save(X_train, X_test, y_train, y_test, num_classes, basepath)

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)

    features_file = os.path.basename(sys.argv[1]).replace(".csv", ".features")
    labels_file = os.path.basename(sys.argv[1]).replace(".csv", ".labels")
    learn(utils.get_cache_dir(), features_file, labels_file)
    try:
        reader = open(sys.argv[1], 'r')
        quotebook = researchBook.ResearchBook()
        for line in reader:
            if line[0] == 'B':
                last_tick = quotebook.bid(line.rstrip())
            elif line[0] == 'A':
                last_tick = quotebook.ask(line.rstrip())
            else:
                last_tick = quotebook.trade(line.rstrip())
            record(quotebook, last_tick)
        reader.close()
    except IOError:
        print 'Cannot open input file "%s"' % sys.argv[1]
        sys.exit(1)

    # Run computations
    df = compute()

    # Pack data and save to the cache directory
    gc.disable()
    packed = {
        'data' : df.to_msgpack(compress='blosc'),
        'format_version' : FILE_FORMAT_VERSION
        }

    filename = os.path.basename(sys.argv[1]).replace(".csv",".features")
    with open(os.path.join(utils.get_cache_dir(), filename), "wb") as outfile:
        pickle.dump(packed, outfile, 2)
    gc.enable()
        predictions['label'].append(bucket_price_delta(pt))

    # Print distribution of classes
    computed = pd.DataFrame.from_dict(predictions, orient='columns')
    utils.print_distribution_graph(computed['label'], 'Distribution of Label Buckets')
    return computed

if __name__ == '__main__':
    if len(sys.argv) != 2:
        print "usage: %s quotes.csv" % sys.argv[0]
        sys.exit(0)
    try:
        # Load file from ./cache directory
        filename = os.path.basename(sys.argv[1]).replace(".csv",".features")
        gc.disable()
        packed = pickle.load(open(os.path.join(utils.get_cache_dir(), filename), 'rb'))
        df = pd.read_msgpack(packed['data'])
        gc.enable()
        if packed['format_version'] != FILE_FORMAT_VERSION:
            print "Encountered unexpected cache file format version %s" % str(data['file_format_version'])
            sys.exit(1)
    except IOError:
        print 'Cannot open features cache file "%s"' % cache
        sys.exit(1)

    # Calculate labels
    labels = calculate(df)

    # Pack data and save to the cache directory
    gc.disable()
    packed = {