def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) shutil.copyfile( os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category')) category = utils.get_category(config) category_index = dict([(name, i) for i, name in enumerate(category)]) datasets = config.get('cache', 'datasets').split() for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in datasets: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, category_index) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) mappers, _ = utils.get_dataset_mappers(config) for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in mappers: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, mappers[dataset]) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) shutil.copyfile(os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category')) category = utils.get_category(config) category_index = dict([(name, i) for i, name in enumerate(category)]) datasets = config.get('cache', 'datasets').split() for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in datasets: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, category_index) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) model_dir = utils.get_model_dir(config) category = utils.get_category( config, cache_dir if os.path.exists(cache_dir) else None) anchors = utils.get_anchors(config) anchors = torch.from_numpy(anchors).contiguous() path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels( config, state_dict), anchors, len(category)) dnn.load_state_dict(state_dict) height, width = tuple(map(int, config.get('image', 'size').split())) resize = transform.parse_transform(config, config.get('transform', 'resize_test')) transform_image = transform.get_transform( config, config.get('transform', 'image_test').split()) transform_tensor = transform.get_transform( config, config.get('transform', 'tensor').split()) # load image image_bgr = cv2.imread('image.jpg') image_resized = resize(image_bgr, height, width) image = transform_image(image_resized) tensor = transform_tensor(image).unsqueeze(0) # Checksum for key, var in dnn.state_dict().items(): a = var.cpu().numpy() print('\t'.join( map(str, [ key, a.shape, utils.abs_mean(a), hashlib.md5(a.tostring()).hexdigest() ]))) output = dnn(torch.autograd.Variable(tensor, volatile=True)).data for key, a in [ ('image_bgr', image_bgr), ('image_resized', image_resized), ('tensor', tensor.cpu().numpy()), ('output', output.cpu().numpy()), ]: print('\t'.join( map(str, [ key, a.shape, utils.abs_mean(a), hashlib.md5(a.tostring()).hexdigest() ])))
def main(): config = configparser.ConfigParser() config.read('config.ini') cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) for phase in ['train', 'val', 'test']: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = cache(config, path) #if config.getboolean('cache', 'shuffle'): # random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('data are saved into ' + cache_dir)
model = model_from_json(open(os.path.join(basepath, 'model.json')).read()) model.load_weights(os.path.join(basepath, 'weights.h5')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model def analyze(basepath, filename): global LOOKBACK, MAX_MIDPOINT_DELTA # Load the data & model df = load_data(basepath, filename) model = load_model(basepath) # Process data samples, labels = preprocess.process_data(df) # Test model and print results print "Running analysis..." predictions = model.predict_classes(samples, batch_size=32) print_results(labels, predictions) if __name__ == '__main__': if len(sys.argv) != 2: print "usage: %s quotes.csv" % sys.argv[0] sys.exit(0) filename = os.path.basename(sys.argv[1]).replace(".csv", ".labels") analyze(utils.get_cache_dir(), filename)
samples, labels = preprocess.process_data(df) # How many samples are we going to leave out for the test set? nb_test = int(len(labels) * 0.2) split = len(labels) - nb_test # Prepare training and test sets X_train = np.array(samples[:split]) y_train = labels[:split] X_test = np.array(samples[split + 1:]) y_test = labels[split + 1:] print len(X_train), 'train sequences' print len(X_test), 'test sequences' # How many classes? num_classes = np.max(labels) + 1 print num_classes, 'classes' # Train Model train_and_save(X_train, X_test, y_train, y_test, num_classes, basepath) if __name__ == '__main__': if len(sys.argv) != 2: print "usage: %s quotes.csv" % sys.argv[0] sys.exit(0) features_file = os.path.basename(sys.argv[1]).replace(".csv", ".features") labels_file = os.path.basename(sys.argv[1]).replace(".csv", ".labels") learn(utils.get_cache_dir(), features_file, labels_file)
computed = pd.DataFrame.from_dict(predictions, orient='columns') utils.print_distribution_graph(computed['label'], 'Distribution of Label Buckets') return computed if __name__ == '__main__': if len(sys.argv) != 2: print "usage: %s quotes.csv" % sys.argv[0] sys.exit(0) try: # Load file from ./cache directory filename = os.path.basename(sys.argv[1]).replace(".csv", ".features") gc.disable() packed = pickle.load( open(os.path.join(utils.get_cache_dir(), filename), 'rb')) df = pd.read_msgpack(packed['data']) gc.enable() if packed['format_version'] != FILE_FORMAT_VERSION: print "Encountered unexpected cache file format version %s" % str( data['file_format_version']) sys.exit(1) except IOError: print 'Cannot open features cache file "%s"' % cache sys.exit(1) # Calculate labels labels = calculate(df) # Pack data and save to the cache directory gc.disable()
# Process features samples, labels = preprocess.process_data(df) # How many samples are we going to leave out for the test set? nb_test = int(len(labels) * 0.2) split = len(labels) - nb_test # Prepare training and test sets X_train = np.array(samples[:split]) y_train = labels[:split] X_test = np.array(samples[split+1:]) y_test = labels[split+1:] print len(X_train), 'train sequences' print len(X_test), 'test sequences' # How many classes? num_classes = np.max(labels)+1 print num_classes, 'classes' # Train Model train_and_save(X_train, X_test, y_train, y_test, num_classes, basepath) if __name__ == '__main__': if len(sys.argv) != 2: print "usage: %s quotes.csv" % sys.argv[0] sys.exit(0) features_file = os.path.basename(sys.argv[1]).replace(".csv", ".features") labels_file = os.path.basename(sys.argv[1]).replace(".csv", ".labels") learn(utils.get_cache_dir(), features_file, labels_file)
try: reader = open(sys.argv[1], 'r') quotebook = researchBook.ResearchBook() for line in reader: if line[0] == 'B': last_tick = quotebook.bid(line.rstrip()) elif line[0] == 'A': last_tick = quotebook.ask(line.rstrip()) else: last_tick = quotebook.trade(line.rstrip()) record(quotebook, last_tick) reader.close() except IOError: print 'Cannot open input file "%s"' % sys.argv[1] sys.exit(1) # Run computations df = compute() # Pack data and save to the cache directory gc.disable() packed = { 'data' : df.to_msgpack(compress='blosc'), 'format_version' : FILE_FORMAT_VERSION } filename = os.path.basename(sys.argv[1]).replace(".csv",".features") with open(os.path.join(utils.get_cache_dir(), filename), "wb") as outfile: pickle.dump(packed, outfile, 2) gc.enable()
predictions['label'].append(bucket_price_delta(pt)) # Print distribution of classes computed = pd.DataFrame.from_dict(predictions, orient='columns') utils.print_distribution_graph(computed['label'], 'Distribution of Label Buckets') return computed if __name__ == '__main__': if len(sys.argv) != 2: print "usage: %s quotes.csv" % sys.argv[0] sys.exit(0) try: # Load file from ./cache directory filename = os.path.basename(sys.argv[1]).replace(".csv",".features") gc.disable() packed = pickle.load(open(os.path.join(utils.get_cache_dir(), filename), 'rb')) df = pd.read_msgpack(packed['data']) gc.enable() if packed['format_version'] != FILE_FORMAT_VERSION: print "Encountered unexpected cache file format version %s" % str(data['file_format_version']) sys.exit(1) except IOError: print 'Cannot open features cache file "%s"' % cache sys.exit(1) # Calculate labels labels = calculate(df) # Pack data and save to the cache directory gc.disable() packed = {