示例#1
0
 def run(self, source=None):
     """
     starts the program's main run-loop
     """
     self.data = DataHandler(delegate=self)
     if not source:
         while 1:
             try:
                 if not self.data:
                     self.data = DataHandler()
                 if not self.stats:
                     self.stats = AnagramStats()
                 if not self.stream_handler:
                     self.stream_handler = StreamHandler()
                 logging.info('entering run loop')
                 self.start_stream()
             except KeyboardInterrupt:
                 break
             except NeedsSave:
                 print('\nclosing stream for scheduled maintenance')
                 # todo: this is where we'd handle pruning etc
             finally:
                 self.stream_handler.close()
                 self.stream_handler = None
                 self.data.finish()
                 self.data = None
                 self.stats.close()
                 self.stats = None                 
     else:
         # means we're running from local data
         self.run_with_data(source)
示例#2
0
 def train(self):
     datah = DataHandler()
     train_data = datah.getTrainSplit()
     print(type(train_data))
     print(np.shape(train_data[0]))
     print(np.shape(train_data[1]))
     print(train_data[1])
     # test_data=datah.getTestSplit()
     # validation_data=datah.getValidationSplit()
     saved = ModelCheckpoint(
         "Weights/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
         monitor='val_loss',
         verbose=0,
         save_best_only=False,
         save_weights_only=False,
         mode='auto',
         period=1)
     self.model.fit(np.array(train_data[0]),
                    train_data[1],
                    initial_epoch=self.start_epoch,
                    validation_split=0.8,
                    epochs=10000,
                    batch_size=500,
                    verbose=1,
                    callbacks=[saved])
示例#3
0
    def __init__(self, strategy, portfolio, analyser, **kwargs):
        self.strategy = strategy
        self.portfolio = portfolio
        self.analyser = [analyser] if type(analyser) != list else analyser
        self.backtest_modules = [self, self.strategy, self.portfolio]
        self.backtest_modules.extend(self.analyser)

        self.symbols = None
        self.qcodes = None
        self.date_start = None
        self.date_end = None
        self.frequency = None
        self.datas = None
        self.trade_time = None
        self.benchmark = None
        self.benchmark_qcode = None

        for module in self.backtest_modules:
            module.__dict__.update(kwargs)
        #self.__dict__.update(kwargs)

        self.validate_input()

        self.data_handler = DataHandler(self.symbols, self.qcodes, self.date_start, self.date_end, self.frequency, self.datas)
        self.benchmark_handler = DataHandler([self.benchmark], [self.benchmark_qcode], self.date_start, self.date_end, self.frequency, self.datas)
示例#4
0
 def __init__(self, indices, data_handler=None, data=None):
     self.indices = indices
     self.labels = None
     if data_handler:
         self.data_handler = data_handler
     else:
         self.data_handler = DataHandler(data)
示例#5
0
    def __init__(self, strategy, portfolio, analyser, **kwargs):
        self.strategy = strategy
        self.portfolio = portfolio
        self.analyser = [analyser] if type(analyser) != list else analyser
        self.backtest_modules = [self, self.strategy, self.portfolio]
        self.backtest_modules.extend(self.analyser)

        self.symbols = None
        self.qcodes = None
        self.date_start = None
        self.date_end = None
        self.frequency = None
        self.datas = None
        self.trade_time = None
        self.benchmark = None
        self.benchmark_qcode = None

        for module in self.backtest_modules:
            module.__dict__.update(kwargs)
        #self.__dict__.update(kwargs)

        self.validate_input()
        self.create_outdir()

        self.data_handler = DataHandler(self.symbols, self.qcodes,
                                        self.date_start, self.date_end,
                                        self.frequency, self.datas)
        self.benchmark_handler = DataHandler([self.benchmark],
                                             [self.benchmark_qcode],
                                             self.date_start, self.date_end,
                                             self.frequency, self.datas)
示例#6
0
def word_parser(final_callback=None):
    global model
    global datahandler
    model = Model()
    datahandler = DataHandler(noActualLoad=True)
    for i, word in enumerate(datahandler.getClasses()):
        current_word_prob[word] = 0
    capture_audio(callback_word, final_callback)
示例#7
0
    def __init__(self, debug_mode=False, timeout=10.0):
        self.data_handler = DataHandler()
        self.connector = SocketHandler(timeout)
        self.debug = debug_mode
        self.bot = Bot()

        self.DEFAULT_TICKS = 2*(1000//50)
        self.ticks = self.DEFAULT_TICKS
示例#8
0
def load_data(fname):
    data_handler = DataHandler(fname)
    try:
        data = data_handler.load_data()
        return data
    except (TypeError, IOError) as detail:
        print "Error: ", detail
        sys.exit(1)
示例#9
0
 def __init__(self, addr, port):
     self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     self.addr = addr
     self.port = port
     self.bind = False
     self.connection_list = []
     self.threadList = []
     self.dataList = DataHandler()
示例#10
0
def main():
    
    data_handler = DataHandler(symbols, qcodes, date_start, date_end, frequency, datas)
    datas_symbols = data_handler.generate_data()
    
    #prices = datas_symbols['Close'].iloc[:,0]
    prices = datas_symbols[trade_time]

    plot_time_series(prices)
    plot_scatter(prices)
    plot_residuals(prices)
    print cadf(prices)
示例#11
0
    def setUp(self) -> None:
        # Start with a clean slate
        self.total_deaths_df = DataHandler(
        ).get_total_deaths_per_country_and_day(self.csv_df)

        with self.db.create_connection() as con:
            cursor = con.cursor()
            cursor.execute('DROP TABLE IF EXISTS ' + self.total_deaths_table +
                           ';')
            cursor.execute('DROP TABLE IF EXISTS ' +
                           self.death_change_python_table + ';')
            con.commit()
示例#12
0
def main():

    data_handler = DataHandler(symbols, qcodes, date_start, date_end,
                               frequency, datas)
    datas_symbols = data_handler.generate_data()

    #prices = datas_symbols['Close'].iloc[:,0]
    prices = datas_symbols[trade_time]

    plot_time_series(prices)
    plot_scatter(prices)
    plot_residuals(prices)
    print cadf(prices)
 def __init__(self, indices, data_handler=None, data=None):
     self.indices = indices
     self.labels = None
     if data_handler:
         self.data_handler = data_handler
     else:
         self.data_handler = DataHandler(data)
示例#14
0
def main(exp, tag, seed):
    if exp == 'mnist':
        opts = configs.config_mnist
    elif exp == 'fashion':
        opts = configs.config_fashion
    elif exp == 'svhn':
        opts = configs.config_SVHN
    elif exp == 'cifar10':
        opts = configs.config_cifar10
    else:
        assert False, 'Unknown experiment configuration'

    opts['imbalance'] = FLAGS.imbalance
    opts['work_dir'] = data_dir
    opts['aug_rate'] = FLAGS.aug_rate
    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
    utils.create_dir(opts['work_dir'])
    utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints'))

    # Dumping all the configs to the text file
    with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
        text.write('Parameters:\n')
        for key in opts:
            text.write('%s : %s\n' % (key, opts[key]))

    # Loading the dataset
    data = DataHandler(opts, seed)
    assert data.num_points >= opts['batch_size'], 'Training set too small'

    model = CaLeG(opts, tag)
    model.train(data)
    del model
示例#15
0
def main():
    parser = argparse.ArgumentParser(description="Face detection demo")
    parser.add_argument("-v",
                        "--verbose",
                        action="count",
                        default=0,
                        help="Increase output verbosity (2 levels)")

    # set logging level
    set_logging_from_args(sys.argv, parser)
    args = parser.parse_args()

    # start servers
    StaticServer(settings.WEBSERVER_PORT).start()
    CommandSocketServer(settings.SOCKET_PORT).start()

    # instantiate data handler from db
    DataHandler()

    # instantiate face detection helper
    f = FaceDetection()

    try:
        while (True):
            sleep(settings.TIME_BETWEEN_SHOTS)
            f.detect_faces()
    except KeyboardInterrupt:
        os._exit(0)
    except:
        traceback.print_exc()
        # close all threads
        os._exit(1)
示例#16
0
文件: run.py 项目: knok/wae
def main():

    if FLAGS.exp == 'celebA':
        opts = configs.config_celebA
    elif FLAGS.exp == 'celebA_small':
        opts = configs.config_celebA_small
    elif FLAGS.exp == 'mnist':
        opts = configs.config_mnist
    elif FLAGS.exp == 'mnist_small':
        opts = configs.config_mnist_small
    elif FLAGS.exp == 'dsprites':
        opts = configs.config_dsprites
    elif FLAGS.exp == 'grassli':
        opts = configs.config_grassli
    elif FLAGS.exp == 'grassli_small':
        opts = configs.config_grassli_small
    elif FLAGS.exp == 'dir64':
        opts = configs.config_dir64
    else:
        assert False, 'Unknown experiment configuration'

    if FLAGS.zdim is not None:
        opts['zdim'] = FLAGS.zdim
    if FLAGS.lr is not None:
        opts['lr'] = FLAGS.lr
    if FLAGS.z_test is not None:
        opts['z_test'] = FLAGS.z_test
    if FLAGS.lambda_schedule is not None:
        opts['lambda_schedule'] = FLAGS.lambda_schedule
    if FLAGS.work_dir is not None:
        opts['work_dir'] = FLAGS.work_dir
    if FLAGS.wae_lambda is not None:
        opts['lambda'] = FLAGS.wae_lambda
    if FLAGS.enc_noise is not None:
        opts['e_noise'] = FLAGS.enc_noise
    if FLAGS.epoch_num is not None:
        opts['epoch_num'] = FLAGS.epoch_num

    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
    utils.create_dir(opts['work_dir'])
    utils.create_dir(os.path.join(opts['work_dir'],
                     'checkpoints'))
    # Dumping all the configs to the text file
    with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
        text.write('Parameters:\n')
        for key in opts:
            text.write('%s : %s\n' % (key, opts[key]))

    # Loading the dataset

    data = DataHandler(opts)
    assert data.num_points >= opts['batch_size'], 'Training set too small'

    # Training WAE

    wae = WAE(opts)
    wae.train(data)
示例#17
0
def createimgs(opts):
    net = fideval.restore_net(opts)

    n = 50
    k = 10
    m = 5

    NUM_POINTS = 10000
    BATCH_SIZE = 100

    data = DataHandler(opts)
    images = data.data[:n]

    enc_pics = net.sess.run(
        net.encoded,
        feed_dict={
            #net.sample_noise: np.random.normal(size=(5, opts['zdim'])),
            net.sample_points:
            images,
            net.is_training:
            False
        })

    #pos = net.sample_pz(n)
    #pos = pos[:,:2]

    #pca = PCA(n_components=2)
    #pos = pca.fit_transform(pos)

    pos = enc_pics

    tsne = TSNE(n_components=2)
    pos = tsne.fit_transform(pos)

    print(pos)

    zs = net.sample_pz(2 * k)
    zs = np.reshape(zs, (k, 2, -1))
    lows = zs[:, 0, :]
    highs = zs[:, 1, :]
    grid = interpolate(lows, highs)

    for img_index in range(NUM_POINTS // BATCH_SIZE):
        gen_pics = net.sess.run(
            net.decoded,
            feed_dict={
                #net.sample_noise: np.random.normal(size=(5, opts['zdim'])),
                net.sample_noise:
                grid,
                net.is_training:
                False
            })

    plotImages(gen_pics, m, k, 'gridpics')

    scatterpics(images, pos)
示例#18
0
def main():

    # Select dataset to use
    if FLAGS.dataset == 'dsprites':
        opts = configs.config_dsprites
    elif FLAGS.dataset == 'noisydsprites':
        opts = configs.config_noisydsprites
    elif FLAGS.dataset == 'screamdsprites':
        opts = configs.config_screamdsprites
    elif FLAGS.dataset == 'smallNORB':
        opts = configs.config_smallNORB
    elif FLAGS.dataset == '3dshapes':
        opts = configs.config_3dshapes
    elif FLAGS.dataset == '3Dchairs':
        opts = configs.config_3Dchairs
    elif FLAGS.dataset == 'celebA':
        opts = configs.config_celebA
    elif FLAGS.dataset == 'mnist':
        opts = configs.config_mnist
    else:
        assert False, 'Unknown dataset'

    # Set method param
    opts['data_dir'] = FLAGS.data_dir
    opts['fid'] = True
    opts['network'] = net_configs[FLAGS.net_archi]

    # Model set up
    opts['model'] = FLAGS.model
    if FLAGS.dataset == 'celebA':
        opts['zdim'] = 32
    elif FLAGS.dataset == '3Dchairs':
        opts['zdim'] = 16
    else:
        opts['zdim'] = 10

    # Create directories
    opts['out_dir'] = FLAGS.out_dir
    out_subdir = os.path.join(opts['out_dir'], opts['model'])
    opts['exp_dir'] = os.path.join(out_subdir, FLAGS.res_dir)
    if not tf.io.gfile.isdir(opts['exp_dir']):
        raise Exception("Experiment doesn't exist!")

    #Reset tf graph
    tf.reset_default_graph()

    # Loading the dataset
    data = DataHandler(opts)
    assert data.train_size >= opts['batch_size'], 'Training set too small'

    # init method
    run = Run(opts, data)

    # get fid
    run.fid_score(opts['exp_dir'], FLAGS.weights_file, FLAGS.compute_stats, FLAGS.fid_inputs)
示例#19
0
    def test_no_db_update_on_subsequent_daily_changes_calculations(self):

        # Calculate daily change with current data and insert them to deaths_change_python table
        self.db.create_deaths_change_python_table()
        daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)
        changed_rows = self.db.insert_to_deaths_change_python_table(
            daily_change)

        new_daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)

        self.assertTrue(daily_change.equals(new_daily_change))
        new_changed_rows = self.db.insert_to_deaths_change_python_table(
            new_daily_change)

        self.assertIs(
            new_changed_rows, 0,
            "There should not be any changes to deaths_change_python table in the second run"
        )
示例#20
0
def main():
    n_topics = 40
    offset = 0
    top_n = 20  #words per topic
    prefix = "t40_12gram_"
    path = "files/wordclouds/"
    ngram_range = (1, 2)

    dh = DataHandler(use_cache=True, ngram_range=ngram_range)
    tfidf, tfidf_vocab = dh.get_tfidf()
    tf, tf_vocab = dh.get_tf()

    # for n_topics in range(20, 27, 1):
    #     for offset in range (n_topics-2,n_topics+3,1):
    ch = ClusterHandler(
        n_topics=n_topics,
        top_n=top_n,  # words per topic
        soft_offset=offset,
        prefix=prefix,
        path=path,
    )

    #ch.calc_svd(matrix=tfidf, vocab=tfidf_vocab)
    cluster_assignments, topics = ch.calc_nmf(matrix=tfidf,
                                              vocab=tfidf_vocab,
                                              providers=dh.get_providers(),
                                              hardclustering=False)

    out.storeClustersToDB(cluster_assignments=cluster_assignments,
                          topics=topics,
                          source_uris=dh.get_uris(),
                          soft_clustering=True)
示例#21
0
文件: db.py 项目: wniroshan/covid19
    def _insert_deaths_data(self, new_data_df, table_name):
        """
        Inserts deaths data to COVID19 deaths data table, denoted by table_name. If the new data contain
        retrospectively modified rows, such rows are updated in the table

        :param new_data_df: Data frame with new data
        :param table_name: Name of the table to insert data
        :return: The number of updated rows
        """

        changed_rows = -1

        row_count = self.execute_query('SELECT COUNT(*) FROM ' + table_name + ';')[0][0]

        if row_count == 0:
            '''
            The table is empty, insert all the data in the data frame
            '''
            with self.create_connection() as con:
                new_data_df.to_sql(con=con, name=table_name, if_exists='append', index=False)

            changed_rows = len(new_data_df)

        else:
            '''
            If the table is not empty append only the new data rows because re-writing all the 
            data is too expensive
            '''
            # Read the current data from table as a data frame
            with self.create_connection() as con:
                curr_data = pd.read_sql_query('SELECT * FROM ' + table_name + ';', con=con)

            dh = DataHandler()
            # Filter changed or newly added country and date combinations
            modified_rows = dh.get_changed_rows(new_data_df, curr_data)
            # Update the database
            changed_rows = self.upsert_to_table(new_data_df.iloc[modified_rows, ], table_name)

        return changed_rows
示例#22
0
def main():
    config = Config()
    parser = argparse.ArgumentParser()
    parser.add_argument('-td',
                        '--test-dataset',
                        help='Walk through dataset \
        and test while preprocessing',
                        action='store_true')
    parser.add_argument('-e', '--execute', help='Execute', action='store_true')
    parser.add_argument('-t',
                        '--train',
                        help='Train Model',
                        action='store_true')
    parser.add_argument('-wp',
                        '--word-parser',
                        help='Listen to microphone parse the word',
                        action='store_true')
    parser.add_argument('-p', '--predict', help='Predict Audiofile', nargs='+')

    args = parser.parse_args()
    if args.test_dataset:
        datahandler = DataHandler()
        print("Test Passed")
        return
    if args.execute:
        from event_handler import EventHandler
        eh = EventHandler()
        word_parser(eh)
    if args.train:
        model = Model()
        model.train()
    if args.predict:
        model = Model()
        datahandler = DataHandler(noActualLoad=True)
        result_prob = model.predict(args.predict, datahandler.getClasses())
        for fname, rp in zip(args.predict, result_prob):
            print("%s\t%s\twith Probabity %f" % (fname, rp[0], rp[1]))
    if args.word_parser:
        word_parser()
示例#23
0
def main():

    data_handler = DataHandler(symbols, qcodes, date_start, date_end,
                               frequency, datas)
    datas_symbols = data_handler.generate_data()

    if len(symbols) == 1:
        prices = datas_symbols[trade_time].iloc[:, 0]
    else:
        prices = datas_symbols[trade_time]

    #plot_time_series(prices)

    #___ Single asset ___#
    #print adf(prices)
    #print hurst(prices, True)

    #___ Multiple assets ___#
    #plot_scatter(prices)
    plot_residuals(prices)
    #print cadf(prices)
    print halflife(residuals(prices))
示例#24
0
def main():
    
    data_handler = DataHandler(symbols, qcodes, date_start, date_end, 
                               frequency, datas)
    datas_symbols = data_handler.generate_data()
    
    if len(symbols) == 1:
        prices = datas_symbols[trade_time].iloc[:,0]
    else:
        prices = datas_symbols[trade_time]

    #plot_time_series(prices)
    
    #___ Single asset ___#
    #print adf(prices)
    #print hurst(prices, True)

    #___ Multiple assets ___#
    #plot_scatter(prices)
    plot_residuals(prices)
    #print cadf(prices)
    print halflife(residuals(prices))
示例#25
0
    def __init__(self, parent = None, *args, **kwargs):
        # Initialize TkInter frame and define parent
        tk.Frame.__init__(self, parent )
        self.parent = parent

        #initialize a starting deque size and poll rate
        self.pollRate = POLL_RATE_ms
        self.dequeSize = 100

        # Create arduino device and datadeque as object parameters
        self.device = arduino()
        self.dataHandler = DataHandler(dequeLength=DATA_POINTS_PER_PLOT)

        # Create serial port frame, data handling frame, and plot frame
        self.sH = sH.SerialHandlerUI( parent = parent, device = self.device )
        self.dH = dH.DataHandlerUI( parent = parent, dataHandler = self.dataHandler )
        self.pH = pH.PlotHandlerUI( parent = parent, dataHandler = self.dataHandler )

        # place into UI
        self.sH.grid(row=0, column=0, columnspan=6)
        self.dH.grid(row=1, column=0, columnspan=6)
        self.pH.grid(row=2, column=0, columnspan=6)

        # create poll rate menu and deque size slector
        self.create_poll_rate_menu()

        #create deque size selector
        # currently disabled because larger deque size == longer loop evaluation, undesirable
        #self.create_deque_size_selector()

        # generate quit button
        tk.Button(master=self.parent, text='Quit', command=self._quit).grid(row=5, column=2, columnspan=2)

        # start updating that data
        self.update_frequency = POLL_RATE_ms
        self.update_data()
示例#26
0
    def test_db_insert_daily_change_of_deaths(self):
        self.db.create_deaths_change_python_table()
        daily_change = DataHandler().get_daily_change_of_deaths(
            self.total_deaths_df)

        changed_rows = self.db.insert_to_deaths_change_python_table(
            daily_change)

        rows_in_table = self.db.execute_query(
            "SELECT COUNT(*) FROM " + self.death_change_python_table)[0][0]

        self.assertEqual(
            rows_in_table, changed_rows,
            "Data rows in deaths_change_python table and the number of changed_rows must match"
        )
示例#27
0
    def __init__(self):
        plt.style.use('ggplot')
        size = 5000
        self.data = DataHandler(size, usePickle=False)

        #Training parameters
        self.epochs = 100
        self.batchSize = 32
        self.validationSplit = 0.1

        #Model parameters
        self.features = self.data.inputs.shape[1]
        self.styles = len(self.data.styles[0])
        self.drives = len(self.data.drivetrains[0])
        self.transmissions = len(self.data.speeds[0])

        self.activ = 'linear'

        #Compile parameters
        self.optimizer = 'nadam'
        self.loss = 'huber_loss'
        self.metrics = ['mean_absolute_error']

        self.makeModel()

        self.model.compile(optimizer=self.optimizer,
                           loss=self.loss,
                           metrics=self.metrics)

        self.results = self.model.fit(
            {
                'specs': self.data.inputs,
                'style': self.data.styles,
                'drive': self.data.drivetrains,
                'trans': self.data.speeds
            },
            self.data.targets,
            epochs=self.epochs,
            batch_size=self.batchSize,
            validation_split=self.validationSplit)

        self.graphTrainingResults()

        self.predicted = self.predictions()

        self.graphPredictions()

        self.model.save('model')
    def detect_faces(self):
        logger.debug("Detect faces")
        # Read the image
        video_capture = cv2.VideoCapture(0)
        ret, image = video_capture.read()
        video_capture.release()

        # write the raw image to screenshot_path
        temp_file = "{}.new.png".format(self.screenshot_path)
        cv2.imwrite(temp_file, image)
        os.rename(temp_file, self.screenshot_path)

        # Detect faces in the image
        faces = self.faceCascade.detectMultiScale(
            image,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(30, 30),
            flags=cv2.CASCADE_SCALE_IMAGE
        )

        # Draw a rectangle around the faces
        num_faces = len(faces)

        if num_faces > 0:
            logger.debug("{} faces detected".format(num_faces))
            # Draw a rectangle around the faces
            for (x, y, w, h) in faces:
                cv2.rectangle(image, (x, y), (x + w, y + h), (255, 36, 36), 5)

            temp_file = "{}.new.png".format(self.faces_path)
            cv2.imwrite(temp_file, image)
            os.rename(temp_file, self.faces_path)
            timestamp = time()

        # Introduce a bug on purpose in newer version

        # On older ubuntu core version, SNAP_VERSION is the sideloaded one, so we don't rely on that for now
        #if os.getenv("SNAP_VERSION", "0.1") != "0.1":
        #    num_faces = -10
        file_path = os.path.join(os.getenv("SNAP_APP_PATH"), "meta", "package.yaml")
        with suppress(IOError):
            with open(file_path, 'rt') as f:
                if yaml.load(f.read())["version"] != 0.1:
                    num_faces = -10

        DataHandler().add_one_facedetect_entry(int(time()), num_faces)
示例#29
0
def main(tag, seed, dataset):
    opts = getattr(configs, 'config_%s' % dataset)
    opts['work_dir'] = './results/%s/' % tag

    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
    utils.create_dir(opts['work_dir'])
    utils.create_dir(os.path.join(opts['work_dir'],
                                  'checkpoints'))

    with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
        text.write('Parameters:\n')
        for key in opts:
            text.write('%s : %s\n' % (key, opts[key]))

    data = DataHandler(opts, seed)
    model = DGC(opts, tag)
    model.train(data)
示例#30
0
    def __init__(self,
                 generator,
                 test_dir='./data/test/',
                 result_dir='./result_1/',
                 weight_dir='./weight/generator_epoch_999.pkl',
                 batch_size=32,
                 patch_size=64,
                 num_workers=8,
                 self_dir=None,
                 self_test=False,
                 cuda=True,
                 extensions=('.png', '.jpeg', '.jpg')):
        self.patch_size = patch_size
        self.result_dir = result_dir
        if not os.path.exists(self.result_dir):
            os.mkdir(self.result_dir)

        self.generator = generator
        self.weight_dir = weight_dir
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() and cuda else "cpu")
        self.generator.load_state_dict(
            torch.load(self.weight_dir, map_location=self.device))
        self.generator.eval()
        self.self_test = self_test
        if self.self_test:
            self.self_dir = self_dir
            self.extensions = extensions
            self.test_file = [
                x.path for x in os.scandir(self.self_dir)
                if x.name.endswith(self.extensions)
            ]
        else:
            self.num_workers = num_workers
            self.batch_size = batch_size
            self.test_dh = DataHandler(test_dir,
                                       patch_size=self.patch_size,
                                       augment=False)
            self.test_loader = Data.DataLoader(self.test_dh,
                                               batch_size=self.batch_size,
                                               num_workers=self.num_workers,
                                               shuffle=False)
示例#31
0
文件: analogy.py 项目: knok/wae
def main():
    if FLAGS.exp == 'dir64':
        opts = configs.config_dir64
    else:
        assert False, 'Unknown experiment configuration'

    if FLAGS.zdim is not None:
        opts['zdim'] = FLAGS.zdim

    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

    data = DataHandler(opts)
    wae = WAE(opts)
    wae.restore_checkpoint(FLAGS.checkpoint)

    batch_img = data.data[0:2]
    enc_vec = wae.sess.run(wae.encoded,
                           feed_dict={
                               wae.sample_points: batch_img,
                               wae.is_training: False
                           })
    vdiff = enc_vec[1] - enc_vec[0]
    vdiff = vdiff / 10
    gen_vec = np.zeros((10, vdiff.shape[0]), dtype=np.float32)
    for i in range(10):
        gen_vec[i, :] = enc_vec[0] + vdiff * i

    sample_gen = wae.sess.run(wae.decoded,
                              feed_dict={
                                  wae.sample_noise: gen_vec,
                                  wae.is_training: False
                              })
    img = np.hstack(sample_gen)
    img = (img + 1.0) / 2
    plt.imshow(img)
    plt.savefig('analogy.png')
示例#32
0
def main():

    opts = configs.config_mnist

    opts['mode'] = 'train'

    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
    utils.create_dir(opts['work_dir'])
    utils.create_dir(os.path.join(opts['work_dir'], 'checkpoints'))

    if opts['e_noise'] == 'gaussian' and opts['pz'] != 'normal':
        assert False, 'Gaussian encoders compatible only with Gaussian prior'
        return

    # Dumping all the configs to the text file
    with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
        text.write('Parameters:\n')
        for key in opts:
            text.write('%s : %s\n' % (key, opts[key]))

    # Loading the dataset
    data = DataHandler(opts)
    assert data.num_points >= opts['batch_size'], 'Training set too small'

    if opts['mode'] == 'train':

        # Creating WAE model
        wae = WAE(opts, data.num_points)

        # Training WAE
        wae.train(data)

    elif opts['mode'] == 'test':

        # Do something else
        improved_wae.improved_sampling(opts)
示例#33
0
def input_fn(mode, params, ID=None, path=None):
    if mode == 'train' or mode == 'val':
        gen = DataHandler(mode).generate_batches
    elif mode == 'test_sequence':
        gen = DataHandler('test').generate_sequence
    elif mode == 'train_id':
        gen = DataHandler('train', ID).generate_sequence_ID
    elif mode == 'val_id':
        gen = DataHandler('val', ID).generate_sequence_ID
    elif mode == 'test_id':
        gen = DataHandler('test', ID).generate_sequence_ID
    elif mode == 'matched_id':
        gen = DataHandler('matched', ID).generate_sequence_ID
    elif mode == 'test_batch':
        gen = DataHandler('test').generate_batches
    ds = tf.data.Dataset.from_generator(gen,
                                        output_types=(tf.float32, tf.int32))
    ds = ds.prefetch(buffer_size=params.buffer_size)
    # todo implement reading and indexing going on in generator as map and use map_and_batch
    return ds.make_one_shot_iterator().get_next()
示例#34
0
    def test_daily_change_calculation(self):
        dummy_data = testutils.get_dummy_data()
        d = DataHandler()
        deaths = d.get_total_deaths_per_country_and_day(dummy_data)
        expected_df = testutils.get_dummy_change_data()

        actual = d.get_daily_change_of_deaths(deaths)
        actual.reset_index(drop=True, inplace=True)

        self.assertTrue(expected_df.equals(actual),
                        "Function generated daily change in deaths must be indentical to the expected")

        deaths.loc[0, 'deaths'] = 1
        actual = d.get_daily_change_of_deaths(deaths)
        self.assertEqual(actual.loc[0, 'deaths_change'], 1, "The first day's change in deaths should be 1")

        deaths.loc[0, 'deaths'] = 4
        actual = d.get_daily_change_of_deaths(deaths)
        self.assertEqual(actual.loc[0, 'deaths_change'], 4, "The first day's change in deaths should be 4")
示例#35
0
class Anagramer(object):
    """
    Anagramer hunts for anagrams on twitter.
    """

    def __init__(self):
        self.twitter_handler = TwitterHandler()
        self.stream_handler = StreamHandler()
        self.stats = AnagramStats()
        self.data = None  # wait until we get run call to load data
        # self.time_to_save = self.set_save_time()

    def run(self, source=None):
        """
        starts the program's main run-loop
        """
        self.data = DataHandler(delegate=self)
        if not source:
            while 1:
                try:
                    if not self.data:
                        self.data = DataHandler()
                    if not self.stats:
                        self.stats = AnagramStats()
                    if not self.stream_handler:
                        self.stream_handler = StreamHandler()
                    logging.info('entering run loop')
                    self.start_stream()
                except KeyboardInterrupt:
                    break
                except NeedsSave:
                    print('\nclosing stream for scheduled maintenance')
                    # todo: this is where we'd handle pruning etc
                finally:
                    self.stream_handler.close()
                    self.stream_handler = None
                    self.data.finish()
                    self.data = None
                    self.stats.close()
                    self.stats = None                 
        else:
            # means we're running from local data
            self.run_with_data(source)

    def start_stream(self):
        """
        main run loop
        """
        self.stats.start_time = time.time()
        self.stream_handler.start()
        for tweet in self.stream_handler:
            self.update_console()
            self.process_input(tweet)

    def run_with_data(self, data):
        """
        uses a supplied data source instead of a twitter connection (debug)
        """
        self.stats.start_time = time.time()
        self.stream_handler.start(source=data)
        # for tweet in data:
        #     self.process_input(tweet)
        #     # time.sleep(0.0001)
        #     self.stats.tweets_seen += 1
        #     self.stats.passed_filter += 1
        #     self.update_console()

        logging.debug('hits %g matches %g' % (self.stats.possible_hits, self.stats.hits))
        self.data.finish()

    def process_input(self, hashed_tweet):
        self.stats.new_hash(hashed_tweet['hash'])
        self.data.process_tweet(hashed_tweet)

    def process_hit(self, tweet_one, tweet_two):
        """
        called by datahandler when it has found a match in need of review.
        """
        self.stats.possible_hits += 1
        self.stats.new_hit(tweet_one['hash'])
        if self.compare(tweet_one['text'], tweet_two['text']):
            hit = {
                "id": int(time.time()*1000),
                "status": HIT_STATUS_REVIEW,
                "tweet_one": tweet_one,
                "tweet_two": tweet_two,
            }
            self.data.remove(tweet_one['hash'])
            self.data.add_hit(hit)
            self.stats.hits += 1
        else:
            pass

    def compare(self, tweet_one, tweet_two):
        """
        most basic test, finds if tweets are just identical
        """
        if not self.compare_chars(tweet_one, tweet_two):
            return False
        if not self.compare_words(tweet_one, tweet_two):
            return False
        return True

    def compare_chars(self, tweet_one, tweet_two, cutoff=0.5):
        """
        basic test, looks for similarity on a char by char basis
        """
        stripped_one = utils.stripped_string(tweet_one)
        stripped_two = utils.stripped_string(tweet_two)

        total_chars = len(stripped_two)
        same_chars = 0
        for i in range(total_chars):
            if stripped_one[i] == stripped_two[i]:
                same_chars += 1

        if (float(same_chars) / total_chars) < cutoff:
            return True
        return False

    def compare_words(self, tweet_one, tweet_two, cutoff=0.5):
        """
        looks for tweets containing the same words in different orders
        """
        words_one = utils.stripped_string(tweet_one, spaces=True).split()
        words_two = utils.stripped_string(tweet_two, spaces=True).split()

        word_count = len(words_one)
        if len(words_two) < len(words_one):
            word_count = len(words_two)

        same_words = 0
        # compare words to each other:
        for word in words_one:
            if word in words_two:
                same_words += 1
        # if more then $CUTOFF words are the same, fail test
        if (float(same_words) / word_count) < cutoff:
            return True
        else:
            return False

    def check_save(self):
        """check if it's time to save and save if necessary"""
        if (time.time() > self.time_to_save):
            self.time_to_save = self.set_save_time()
            raise NeedsSave

# displaying data while we run:
    def update_console(self):
        """
        prints various bits of status information to the console.
        """
        # what all do we want to have, here? let's blueprint:
        # tweets seen: $IN_HAS_TEXT passed filter: $PASSED_F% Hits: $HITS
        seen_percent = int(100*(float(
            self.stream_handler.passed_filter)/self.stream_handler.tweets_seen))
        runtime = time.time()-self.stats.start_time

        status = (
            'tweets seen: ' + str(self.stream_handler.tweets_seen) +
            " passed filter: " + str(self.stream_handler.passed_filter) +
            " ({0}%)".format(seen_percent) +
            " hits " + str(self.stats.possible_hits) +
            " agrams: " + str(self.stats.hits) +
            " buffer: " + str(self.stream_handler.bufferlength()) +
            " runtime: " + utils.format_seconds(runtime)
        )
        sys.stdout.write(status + '\r')
        sys.stdout.flush()

    def print_hits(self):
        hits = self.data.get_all_hits()
        for hit in hits:
            print(hit['tweet_one']['text'], hit['tweet_one']['id'])
            print(hit['tweet_two']['text'], hit['tweet_two']['id'])
示例#36
0
def main():
    opts = {}
    opts['random_seed'] = 821
    opts['dataset'] = 'gmm'  # gmm, circle_gmm,  mnist, mnist3, cifar ...
    opts['unrolled'] = FLAGS.unrolled  # Use Unrolled GAN? (only for images)
    opts['unrolling_steps'] = 5  # Used only if unrolled = True
    opts['data_dir'] = 'mnist'
    opts['trained_model_path'] = 'models'
    opts[
        'mnist_trained_model_file'] = 'mnist_trainSteps_19999_yhat'  # 'mnist_trainSteps_20000'
    opts['gmm_max_val'] = 15.
    opts['toy_dataset_size'] = 64 * 1000
    opts['toy_dataset_dim'] = 2
    opts['mnist3_dataset_size'] = 2 * 64  # 64 * 2500
    opts['mnist3_to_channels'] = False  # Hide 3 digits of MNIST to channels
    opts[
        'input_normalize_sym'] = False  # Normalize data to [-1, 1], applicable only for image datasets
    opts['adagan_steps_total'] = 10
    opts['samples_per_component'] = 5000  # 50000
    opts['work_dir'] = FLAGS.workdir
    opts['is_bagging'] = FLAGS.is_bagging
    opts['beta_heur'] = 'uniform'  # uniform, constant
    opts['weights_heur'] = 'theory_star'  # theory_star, theory_dagger, topk
    opts['beta_constant'] = 0.5
    opts['topk_constant'] = 0.5
    opts["init_std"] = FLAGS.init_std
    opts["init_bias"] = 0.0
    opts['latent_space_distr'] = 'normal'  # uniform, normal
    opts['optimizer'] = 'sgd'  # sgd, adam
    opts["batch_size"] = 64
    opts["d_steps"] = 1
    opts["g_steps"] = 1
    opts["verbose"] = True
    opts['tf_run_batch_size'] = 100
    opts['objective'] = 'JS'

    opts['gmm_modes_num'] = 3
    opts['latent_space_dim'] = FLAGS.zdim
    opts["gan_epoch_num"] = 15
    opts["mixture_c_epoch_num"] = 5
    opts['opt_learning_rate'] = FLAGS.learning_rate
    opts['opt_d_learning_rate'] = FLAGS.d_learning_rate
    opts['opt_g_learning_rate'] = FLAGS.g_learning_rate
    opts["opt_beta1"] = FLAGS.adam_beta1
    opts['batch_norm_eps'] = 1e-05
    opts['batch_norm_decay'] = 0.9
    opts['d_num_filters'] = 16
    opts['g_num_filters'] = 16
    opts['conv_filters_dim'] = 4
    opts["early_stop"] = -1  # set -1 to run normally
    opts["plot_every"] = 500  # set -1 to run normally
    opts["eval_points_num"] = 1000  # 25600
    opts['digit_classification_threshold'] = 0.999
    opts['inverse_metric'] = False  # Use metric from the Unrolled GAN paper?
    opts['inverse_num'] = 1  # Number of real points to inverse.

    saver = utils.ArraySaver('disk', workdir=opts['work_dir'])

    if opts['verbose']:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s - %(message)s')
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')

    opts["number_of_runs"] = 15
    likelihood = np.empty((opts["adagan_steps_total"], opts["number_of_runs"]))
    coverage = np.empty((opts["adagan_steps_total"], opts["number_of_runs"]))

    for run in range(opts["number_of_runs"]):
        logging.info('Beginning run {} of {}'.format(run + 1,
                                                     opts["number_of_runs"]))
        opts['random_seed'] += 1

        utils.create_dir(opts['work_dir'])
        with utils.o_gfile((opts['work_dir'], 'params.txt'), 'w') as text:
            text.write('Parameters:\n')
            for key in opts:
                text.write('%s : %s\n' % (key, opts[key]))

        data = DataHandler(opts)
        # saver.save('real_data_{0:02d}.npy'.format(run), data.data)
        saver.save(
            'real_data_params_mean_{0:02d}_var_{1:1.2f}.npy'.format(
                run, data.var), data.mean)
        # assert data.num_points >= opts['batch_size'], 'Training set too small'
        # adagan = AdaGan(opts, data)
        # metrics = Metrics()

        for step in range(opts["adagan_steps_total"]):
            logging.info('Running step {} of AdaGAN'.format(step + 1))
            adagan.make_step(opts, data)
            num_fake = opts['eval_points_num']
            logging.debug('Sampling fake points')

            fake_points = adagan.sample_mixture(num_fake)
            saver.save('fake_points_{:02d}.npy'.format(step), fake_points)

            logging.debug('Sampling more fake points')
            more_fake_points = adagan.sample_mixture(500)
            logging.debug('Plotting results')
            metrics.make_plots(opts, step, data.data[:500], fake_points[0:100],
                               adagan._data_weights[:500])
            logging.debug('Evaluating results')
            (lh, C) = metrics.evaluate(opts,
                                       step,
                                       data.data,
                                       fake_points,
                                       more_fake_points,
                                       prefix='')
            likelihood[step, run] = lh
            coverage[step, run] = C
            saver.save('likelihood.npy', likelihood)
            saver.save('coverage.npy', coverage)
        logging.debug("AdaGan finished working!")
class Subset(SubsetBase):

    K_FEATURES = 10

    def __init__(self, indices, data_handler=None, data=None):
        self.indices = indices
        self.labels = None
        if data_handler:
            self.data_handler = data_handler
        else:
            self.data_handler = DataHandler(data)

    def get_size(self):
        """
		Returns the size of the subset i.e the number
		of rows in the subset
		"""
        return len(self.indices)

    def purity(self):
        """
		Determines the "purity" of the subset by calculating
		the gini index of the data
		"""
        return self.data_handler.gini_index(self.indices)

    def majority_label(self):
        """
		Returns the mode of the all the labels in the subset
		"""
        labels = self.data_handler.get_freq(self.indices)
        # Loop frequency hash and find the mode
        majority, count = None, -1
        for label, value in labels.iteritems():
            if value > count:
                majority = label

        return majority

    def split(self):
        """
		Returns a tuple of arrays of (feature, values, subsets) 
		given the feature to split on.
		"""
        n, f = self.data_handler.get_shape()
        # Selects k features without replacement
        features = random.sample(range(1, f), self.K_FEATURES)
        # Calculate the gini index of k different splits
        splits = {}
        for feature in features:
            (gini, threshold) = self.data_handler.test_split(self.indices, feature)
            splits[feature] = {"threshold": threshold, "gini": gini}
            # Finds the optimal split from all the splits above
        best_feature, threshold, min_gini = None, None, 100
        for feature, results in splits.iteritems():
            if results["gini"] < min_gini:
                best_feature, threshold, min_gini = feature, results["threshold"], results["gini"]
                # Split the subset
        subset_left, subset_right = self.get_subsets(best_feature, threshold)
        return best_feature, threshold, subset_left, subset_right

    def get_subsets(self, feature, threshold):
        """
		Splits the current subset into two based on the 
		input feature and threshold
		"""
        left_indices, right_indices = self.data_handler.split(self.indices, feature, threshold)
        left_subset = Subset(left_indices, data_handler=self.data_handler)
        right_subset = Subset(right_indices, data_handler=self.data_handler)

        return left_subset, right_subset
            indexStart = x.index(itemStart)
        if itemEnd > x[-1]:
            indexEnd = len(x) - 1
        else:
            indexEnd = x.index(itemEnd) 
            
        # test
        # indexStart = 10
        # indexEnd = 60
           
        x = x[indexStart:indexEnd + 1]
        y[0] = y[0][indexStart:indexEnd + 1]
        y[1] = y[1][indexStart:indexEnd + 1]
        fig = pl.figure()
        # http://stackoverflow.com/questions/11617719/how-to-plot-a-very-simple-bar-chart-python-matplotlib-using-input-txt-file
        width = .5
        ind = np.arange(len(x))
        pl.bar(ind, y[0], width=width, color='green', alpha=0.5)
        pl.bar(ind, y[1], width=width, color='yellow', alpha=0.5)
        # pl.bar([0, 20, 50], [40, 60, 120], width=width, color = 'red')
        pl.xticks(ind + width / 2, x)

        fig.autofmt_xdate()
        pyplot.show()

if __name__ == '__main__':
    [X, y] = DataHandler.getTrainingData()
    x = X[0]
    dates = [datetime.datetime.strptime(item, "%Y-%m-%d %H:%M:%S") for item in X[0]]
    DataVisualization.barPlotTemporalData(dates, y, '2011-01-01 16:00:00', '2011-01-03 15:00:00')
示例#39
0
class Backtest(object):

    def __init__(self, strategy, portfolio, analyser, **kwargs):
        self.strategy = strategy
        self.portfolio = portfolio
        self.analyser = [analyser] if type(analyser) != list else analyser
        self.backtest_modules = [self, self.strategy, self.portfolio]
        self.backtest_modules.extend(self.analyser)

        self.symbols = None
        self.qcodes = None
        self.date_start = None
        self.date_end = None
        self.frequency = None
        self.datas = None
        self.trade_time = None
        self.benchmark = None
        self.benchmark_qcode = None

        for module in self.backtest_modules:
            module.__dict__.update(kwargs)
        #self.__dict__.update(kwargs)

        self.validate_input()

        self.data_handler = DataHandler(self.symbols, self.qcodes, self.date_start, self.date_end, self.frequency, self.datas)
        self.benchmark_handler = DataHandler([self.benchmark], [self.benchmark_qcode], self.date_start, self.date_end, self.frequency, self.datas)

    def validate_input(self):
        if self.symbols is None:
            raise ValueError, "Need to choose symbols to trade"

        if self.benchmark is None:
            print "No benchmark specified. Default is SPY"
            self.benchmark = 'SPY'
            self.benchmark_qcode = 'GOOG/NYSE_SPY'

        if not os.path.exists(self.options.outdir):
            os.mkdir(self.options.outdir)

    def run(self):
        print "\n\nHandling data"
        datas_symbols = self.data_handler.generate_data()
        datas_benchmark = self.benchmark_handler.generate_data()
        for module in self.backtest_modules:
            module.datas_symbols = datas_symbols
            module.datas_benchmark = datas_benchmark
            module.prices = datas_symbols[self.trade_time]
            module.prices_bm = datas_benchmark[self.trade_time]
        
        print "\n\nGenerating signals"
        self.strategy.begin()
        self.strategy.generate_signals()
        for module in self.backtest_modules:
            module.__dict__.update(self.strategy.__dict__)
        
        print "\n\nBacktesting portfolio"
        self.portfolio.begin()
        self.portfolio.generate_returns()
        for module in self.backtest_modules:
            module.__dict__.update(self.portfolio.__dict__)

        print "\n\nAnalysing results"
        for analyser in self.analyser:
            analyser.begin()
            analyser.generate_analysis()
示例#40
0
class MotherRussia:
    '''Mother russia functions as the program object'''

    def __init__(self, debug_mode=False, timeout=10.0):
        self.data_handler = DataHandler()
        self.connector = SocketHandler(timeout)
        self.debug = debug_mode
        self.bot = Bot()

        self.DEFAULT_TICKS = 2*(1000//50)
        self.ticks = self.DEFAULT_TICKS

    def __enter__(self):
        return self

    def __exit__(self, exec_type, value, traceback):
        if isinstance(value, KeyboardInterrupt):
            print('\r\rRecieved keyboard interrupt')
        elif isinstance(value, SystemExit):
            print('Recieved system exit signal')
        elif isinstance(value, Exception):
            print('Exception: ', value)

        print('Attempting to clean up...')
        clean_error = self.clean()
        if isinstance(clean_error, Exception):
            print('Could not clean up: ', clean_error)
        else:
            print('Done')

        if not self.debug:
            return True

    def init(self):
        socket_error = self.connector.connect()
        if isinstance(socket_error, Exception):
            raise socket_error
        self.connector.send_data('NAME Putin')

    def run(self):
        while True:
            raw_data = self.connector.poll_data()
            if len(raw_data) == 0:
                break
            json_error = self.data_handler.parse_data(raw_data)
            if isinstance(json_error, ValueError):
                # The exception will contain the string 'Extra data' if the
                # raw data it received was incomplete. Therefore, try to
                # receive new raw data
                if 'Extra data' in str(json_error):
                    continue
                else:
                    # In most cases, this error will be 'Expecting value',
                    # because the block of raw data it received was empty
                    raise json_error

            if self.data_handler.is_dead or self.data_handler.is_end_of_round:
                self.ticks = self.DEFAULT_TICKS

            start = time.perf_counter()
            self.bot.update_state(self.data_handler)
            self.bot.make_decisions(self.ticks)
            elapsed_time = (time.perf_counter() - start)*1000
            if elapsed_time > 45 and self.ticks > 0.5*(1000//50):
                self.ticks -= 1
            elif elapsed_time < 30 and self.ticks < 4*(1000//50):
                self.ticks += 1
            # print(elapsed_time, self.ticks)
            while len(self.bot.commands) > 0:
                command = self.bot.get_command()
                self.connector.send_data(command)

        self.clean()

    def clean(self):
        try:
            if self.connector.sock is not None:
                self.connector.close()
        except Exception as e:
            return e
from sklearn.metrics import mean_squared_error
import numpy as np
import evaluation
from sklearn.cross_validation import KFold

from sklearn.feature_selection import RFE
from sklearn.feature_selection import SelectKBest

from datahandler import DataHandler

def f_regression(X, Y):
   import sklearn
   return sklearn.feature_selection.f_regression(X, Y, center=False)  # center=True (the default) would not work ("ValueError: center=True only allowed for dense data") but should presumably work in general

if __name__ == '__main__':
    [X, y] = DataHandler.getTrainingData()
    X = DataHandler.getFeatures(X)
    
    yCasual = y[0]
    yRegistered = y[1]
    
    kf = KFold(len(X), n_folds=10)
    scoresCasualExtraTreesRegression = []
    scoresRegisteredExtraTreesRegression = []
    scoresTotalExtraTreesRegression = []
    
    scoresCasualABR = []
    scoresRegisteredABR = []
    scoresTotalABR = []
    
    mdlExtraTreesRegressorCasual = None
示例#42
0
class LabDAQ(tk.Frame): #, sH.SerialHandler, dH.DataHandler, pH.PlotHandler):
    def __init__(self, parent = None, *args, **kwargs):
        # Initialize TkInter frame and define parent
        tk.Frame.__init__(self, parent )
        self.parent = parent

        #initialize a starting deque size and poll rate
        self.pollRate = POLL_RATE_ms
        self.dequeSize = 100

        # Create arduino device and datadeque as object parameters
        self.device = arduino()
        self.dataHandler = DataHandler(dequeLength=DATA_POINTS_PER_PLOT)

        # Create serial port frame, data handling frame, and plot frame
        self.sH = sH.SerialHandlerUI( parent = parent, device = self.device )
        self.dH = dH.DataHandlerUI( parent = parent, dataHandler = self.dataHandler )
        self.pH = pH.PlotHandlerUI( parent = parent, dataHandler = self.dataHandler )

        # place into UI
        self.sH.grid(row=0, column=0, columnspan=6)
        self.dH.grid(row=1, column=0, columnspan=6)
        self.pH.grid(row=2, column=0, columnspan=6)

        # create poll rate menu and deque size slector
        self.create_poll_rate_menu()

        #create deque size selector
        # currently disabled because larger deque size == longer loop evaluation, undesirable
        #self.create_deque_size_selector()

        # generate quit button
        tk.Button(master=self.parent, text='Quit', command=self._quit).grid(row=5, column=2, columnspan=2)

        # start updating that data
        self.update_frequency = POLL_RATE_ms
        self.update_data()

    def update_data(self):
        # call this function again after {self.update_frequency time} (in ms)
        self.parent.after(self.update_frequency, self.update_data)
        #print "testing %s" % self.update_frequency

        #if device is connected,
        if self.device.is_connected():
            dataRow, dataFlag = self.device.poll() #read in data
            if dataFlag:
                # send the data to be sorted and added to channels
                # and simultaneously collect whether or not an alarm was triggered
                alarmStatus = self.dataHandler.append_data( dataRow )
                # update the plot
                self.pH.update_plots()
                # sound the alarm! but only if an alarm was triggered
                if alarmStatus:
                    self.device.trigger_alarm()
        else:
            pass


    def create_poll_rate_menu(self):
        # change polling rate option
        tk.Label(self.parent, text="Choose Polling Rate:").grid(row=3, column=0)
        self.pollRateTk=tk.DoubleVar()
        pollRateOptions = [ 0.25, 0.5, 1.0, 5.0, 10.0, 30.0, 60.0]
        #create option menu and place into UI
        menu=tk.OptionMenu(self.parent, self.pollRateTk, *pollRateOptions)
        self.pollRateTk.set( str(POLL_RATE_ms/1000.) )
        menu.grid(row=3, column=1,columnspan=2)
        # label for units
        tk.Label(self.parent, text="sampling interval in seconds").grid(row=3, column=3)
        # create button to update poll rate
        tk.Button(self.parent, text='Update Polling Rate', state=tk.NORMAL,
                command=self.update_poll_rate).grid(row=3,column=4)


    def create_deque_size_selector(self):
        # change polling rate option
        tk.Label(self.parent, text="Choose # Data Points Plotted:").grid(row=4, column=0)
        self.dequeSizeTk=tk.IntVar()
        dequeSizeOptions = [ 100, 250, 500, 750, 1000, 5000, 10000]
        #create option menu and place into UI
        menu=tk.OptionMenu(self.parent, self.dequeSizeTk, *dequeSizeOptions)
        menu.grid(row=4, column=1,columnspan=2)
        # update deque size Tk variable to current deque size
        self.dequeSizeTk.set( self.dequeSize )
        # label for units
        tk.Label(self.parent, text="data points").grid(row=4, column=3)
        # create button to update poll rate
        tk.Button(self.parent, text='Update #Points/Plot', state=tk.NORMAL,
                command=self.update_deque_size).grid(row=4,column=4)


    def update_poll_rate(self):
        new_rate = float(self.pollRateTk.get()) * 1000
        self.update_frequency = int(new_rate)

    def update_deque_size(self):
        self.dataDeque.set_deque_length( int(self.dequeSizeTk.get()) )
        self.pH.update_data_deque(self.dataDeque)

    def _quit(self):
        self.device.disconnect()
        self.parent.quit()
        self.parent.destroy()
示例#43
0
文件: model.py 项目: yysherlock/msae
    def train(self):
        outputPrefix=self.readField(self.config,self.name,"output_directory")
        outputDir=os.path.join(outputPrefix,self.name)
        if not os.path.exists(outputDir):
            os.makedirs(outputDir)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir,'vis')
            if not os.path.exists(visDir):
                os.mkdir(visDir)
        #do normalization for images if they are not normalized before
        normalize=self.str2bool(self.readField(self.config, self.name, "normalize"))
        trainDataSize=int(self.readField(self.config, self.name, "train_size"))
        numBatch = trainDataSize / self.batchsize
        trainDataPath = self.readField(self.config, self.name, "train_data")
        if self.readField(self.config,self.name,"extract_reps")=="True":
            trainRepsPath=self.readField(self.config, self.name, "train_reps")
        else:
            trainRepsPath=None
        trainDataLoader=DataHandler(trainDataPath, trainRepsPath, self.vDim, self.hDim, self.batchsize,numBatch, normalize)

        evalFreq=int(self.readField(self.config,self.name,'eval_freq'))
        if evalFreq!=0:
            qsize=int(self.readField(self.config, self.name, "query_size"))
            evalPath=self.readField(self.config,self.name,"validation_data")
            labelPath=self.readField(self.config,self.name,"label")
            queryPath=self.readField(self.config, self.name, "query")
            label=np.load(labelPath)
            eval=Evaluator(queryPath,label ,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose)
            validation_data=gp.garray(np.load(evalPath))
            if normalize:
                validation_data=trainDataLoader.doNormalization(validation_data)

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))

        nCommon, nMetric, title=self.getDisplayFields()
        if self.verbose:
            print title
        for epoch in range(maxEpoch):
            perf=np.zeros( nMetric)
            trainDataLoader.reset()
            for i in range(numBatch):
                batch = trainDataLoader.getOneBatch()
                curr = self.trainOneBatch(batch, epoch, computeStat=True)
                perf=self.aggregatePerf(perf, curr)

            if showFreq != 0 and (1+epoch) % showFreq == 0:
                validation_code=self.getReps(validation_data)
                np.save(os.path.join(visDir, '%dvis' % (1+epoch)), validation_code)
            if evalFreq !=0 and (1+epoch) % evalFreq ==0:
                validation_code=self.getReps(validation_data)
                eval.evalSingleModal(validation_code,epoch,self.name+'V')
                validation_code=None
            if self.verbose:
                self.printEpochInfo(epoch,perf,nCommon)

        if self.readField(self.config,self.name,"checkpoint")=="True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config,self.name,"extract_reps")=="True":
            if evalFreq!=0:
                validation_reps_path=self.readField(self.config, self.name, "validation_reps")
                self.extractValidationReps(validation_data, validation_reps_path)
            self.extractTrainReps(trainDataLoader, numBatch)

        self.saveConfig(outputDir)
示例#44
0
文件: msae.py 项目: yysherlock/msae
    def train(self):
        outputPrefix=self.readField(self.config,self.name,"output_directory")
        outputDir=os.path.join(outputPrefix,self.name)
        if not os.path.exists(outputDir):
            os.mkdir(outputDir)
        
        imageinput = self.readField(self.isae.ae[1].config, self.isae.ae[1].name, "train_data")
        textinput = self.readField(self.tsae.ae[1].config, self.tsae.ae[1].name, "train_data")

        if self.readField(self.config, self.name,"extract_reps")=="True":
            imageoutput=self.readField(self.isae.ae[-1].config, self.isae.ae[-1].name, "train_reps")
            textoutput=self.readField(self.tsae.ae[-1].config, self.tsae.ae[-1].name, "train_reps")
        else:
            imageoutput=None
            textoutput=None

        maxEpoch = int(self.readField(self.config, self.name, "max_epoch"))
        trainSize=int(self.readField(self.config, self.name, "train_size"))
        numBatch = int(trainSize / self.batchsize)
 
        normalizeImg=self.str2bool(self.readField(self.config, self.name, "normalize"))
        imgTrainDH=DataHandler(imageinput, imageoutput, self.isae.ae[1].vDim, self.isae.ae[-1].hDim, self.batchsize, numBatch,normalizeImg)
        txtTrainDH=DataHandler(textinput, textoutput, self.tsae.ae[1].vDim, self.tsae.ae[-1].hDim, self.batchsize, numBatch)

        showFreq = int(self.readField(self.config, self.name, "show_freq"))
        if showFreq > 0:
            visDir = os.path.join(outputDir, "vis")
            if not os.path.exists(visDir):
                os.makedirs(visDir)

        evalFreq = int(self.readField(self.config, self.name, "eval_freq"))
        if evalFreq!=0:
            qsize=int(self.readField(self.config, self.name, "query_size"))
            labelPath=self.readField(self.config,self.name,"label")
            label=np.load(labelPath)
            queryPath=self.readField(self.config, self.name, "query")
            validation=evaluate.Evaluator(queryPath,label,os.path.join(outputDir,'perf'), self.name, query_size=qsize,verbose=self.verbose)
            validateImagepath = self.readField(self.isae.ae[1].config, self.isae.ae[1].name, "validation_data")
            validateTextpath = self.readField(self.tsae.ae[1].config, self.tsae.ae[1].name, "validation_data")
            validateImgData = gp.garray(np.load(validateImagepath))
            if normalizeImg:
                validateImgData=imgTrainDH.doNormalization(validateImgData)
            validateTxtData = gp.garray(np.load(validateTextpath))
        else:
            print "Warning: no evluation setting!"

        nCommon, nMetric, title=self.getDisplayFields()
        if self.verbose:
            print title
 
        for epoch in range(maxEpoch):
            perf=np.zeros( nMetric)
            epoch1, imgcost, txtcost, diffcost=self.checkPath(epoch)
            imgTrainDH.reset()
            txtTrainDH.reset()
            for i in range(numBatch):
                img = imgTrainDH.getOneBatch() 
                txt = txtTrainDH.getOneBatch()
                curr= self.trainOneBatch(img, txt, epoch1, imgcost, txtcost, diffcost)
                perf=self.aggregatePerf(perf, curr)

            if evalFreq!=0 and (1+epoch) % evalFreq == 0:
                imgcode,txtcode=self.getReps(validateImgData, validateTxtData)
                validation.evalCrossModal(imgcode,txtcode,epoch,'V')

            if showFreq != 0 and (1+epoch) % showFreq == 0:
                imgcode,txtcode=self.getReps(validateImgData, validateTxtData)
                np.save(os.path.join(visDir,'%simg' % str((epoch+1)/showFreq)),imgcode)
                np.save(os.path.join(visDir,'%stxt' % str((epoch+1)/showFreq)),txtcode)

            if self.verbose:
                self.printEpochInfo(epoch, perf, nCommon)

        if self.readField(self.config, self.name, "checkpoint")=="True":
            self.doCheckpoint(outputDir)

        if self.readField(self.config, self.name,"extract_reps")=="True":
            if evalFreq!=0:
                self.extractValidationReps(validateImgData, validateTxtData, "validation_data","validation_reps")
            self.extractTrainReps(imgTrainDH, txtTrainDH, numBatch)

        self.saveConfig(outputDir)