def __init__(self, first_day, starting_cash):
     self.market_positions = [
         MarketPosition(DataUtils.get_date_from_row(first_day),
                        DataUtils.get_low_from_row(first_day),
                        starting_cash)
     ]
     self.cash = 0
示例#2
0
    def create_xy_train(self,
                        tag_file,
                        embedding_file,
                        data_size=1,
                        look_back=5,
                        threshold=0,
                        suffix=None,
                        mode="create",
                        load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            x_train, y_train = self.__create_xy_train(tag_file, embedding_file,
                                                      data_size, look_back,
                                                      threshold, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_X",
                                       "TRAIN" + "_" + str(look_back)),
                x_train)
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_Y",
                                       "TRAIN" + "_" + str(look_back)),
                y_train)

        if mode == "load" and load is not None:
            x_train = DataUtils.load_array(load[0])
            y_train = DataUtils.load_array(load[1])

        self.x_train = x_train
        self.y_train = y_train

        self.INPUT_SHAPE = x_train.shape
        self.OUTPUT_SHAPE = y_train.shape
示例#3
0
def main():
    data_utils = DataUtils()
    clf_utils = ClassifierUtils()
    decision_documents, decision_labels = data_utils.load_decision_data()
    disagreement_documents, disagreement_labels = data_utils.load_disagreement_data(
    )
    clf_metadata = {
        'type': 'RF',
        'n_estimators': 500,
        'max_depth': 128,
        'n_jobs': 8
    }
    features_metadata = {
        'type': 'count',
        'use_sw': True,
        'use_length': False,
        'binary': False,
        'normalize': False,
        'append_binary': False,
        'sampling': None
    }

    metrics = clf_utils.cross_validate(disagreement_documents,
                                       disagreement_labels,
                                       clf_metadata,
                                       features_metadata,
                                       num_splits=5)

    embed()
    def run(self, data_row, cash_infusion, sell_out=False):
        high = DataUtils.get_high_from_row(data_row)
        low = DataUtils.get_low_from_row(data_row)
        date = DataUtils.get_date_from_row(data_row)

        if cash_infusion > 0:
            self.cash += cash_infusion
            self.amount_to_invest_per_day = self.cash / 30

        if self.cash > 0:
            if self.cash > self.amount_to_invest_per_day:
                new_market_position = MarketPosition(
                    date, low, self.amount_to_invest_per_day)
                self.cash -= self.amount_to_invest_per_day
            else:
                new_market_position = MarketPosition(date, low, self.cash)
                self.cash = 0
            self.market_positions.append(new_market_position)

        balance = 0

        if sell_out:
            for mp in self.market_positions:
                balance += mp.sell(date, high)
        else:
            for mp in self.market_positions:
                balance += mp.current_value(high)

        return balance
示例#5
0
    def create_xy_test(self,
                       tag_file,
                       embedding_file,
                       data_size=1,
                       look_back=5,
                       suffix=None,
                       mode="create",
                       load=None):
        DataUtils.message("Prepearing Test Data...", new=True)

        if mode == "create" or mode == "save":
            x_test, y_test = self.__create_xy_test(tag_file, embedding_file,
                                                   data_size, look_back,
                                                   suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_X",
                                       "TEST" + "_" + str(look_back)), x_test)
            DataUtils.save_array(
                DataUtils.get_filename("ULSTM_Y",
                                       "TEST" + "_" + str(look_back)), y_test)

        if mode == "load" and load is not None:
            x_test = DataUtils.load_array(load[0])
            y_test = DataUtils.load_array(load[1])

        self.x_test = np.array(x_test)
        self.y_test = np.array(y_test)
示例#6
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)

        word_input = Input(shape=(self.look_back, 300))

        tag_input = Input(shape=(self.look_back, ))
        tag_emb = Embedding(self.distinct_tags + 1,
                            30,
                            input_length=self.look_back,
                            mask_zero=True,
                            trainable=False)(tag_input)

        concat_emb = Concatenate()([word_input, tag_emb])

        bilstm = Bidirectional(
            LSTM(300,
                 dropout=0.35,
                 recurrent_dropout=0.1,
                 return_sequences=True))(concat_emb)
        hidden = TimeDistributed(Dense(800, activation="tanh"))(bilstm)
        output = TimeDistributed(
            Dense(self.distinct_words, activation="softmax"))(hidden)

        model = Model(inputs=[word_input, tag_input], outputs=output)
        model.compile(loss='categorical_crossentropy',
                      optimizer="adam",
                      metrics=['accuracy'])

        self.model = model
示例#7
0
    def create_xy_test(self,
                       tag_file,
                       embedding_file,
                       data_size=1,
                       window_size=5,
                       available_tags=[],
                       suffix=None,
                       mode="create",
                       load=None):
        DataUtils.message("Prepearing Test Data...", new=True)

        if mode == "create" or mode == "save":
            x_test, y_test = self.__create_xy(tag_file, embedding_file,
                                              data_size, window_size,
                                              available_tags, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "X_TEST" + "_" + str(window_size)),
                x_test)
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "Y_TEST" + "_" + str(window_size)),
                y_test)

        if mode == "load" and load is not None:
            x_test = DataUtils.load_array(load[0])
            y_test = DataUtils.load_array(load[1])

        self.x_test = np.array(x_test)
        self.y_test = np.array(y_test)
示例#8
0
    def create_xy_train(self,
                        tag_file,
                        embedding_file,
                        data_size=1,
                        window_size=5,
                        available_tags=[],
                        suffix=None,
                        mode="create",
                        load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            x_train, y_train = self.__create_xy(tag_file, embedding_file,
                                                data_size, window_size,
                                                available_tags, suffix)

        if mode == "save":
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "X_TRAIN" + "_" + str(window_size)),
                x_train)
            DataUtils.save_array(
                DataUtils.get_filename("SFF",
                                       "Y_TRAIN" + "_" + str(window_size)),
                y_train)

        if mode == "load" and load is not None:
            x_train = DataUtils.load_array(load[0])
            y_train = DataUtils.load_array(load[1])

        self.x_train = np.array(x_train)
        self.y_train = np.array(y_train)

        self.INPUT_SHAPE = self.x_train.shape
        self.OUTPUT_SHAPE = self.y_train.shape
示例#9
0
    def create_xy_train(self, parse_tree_file, data_size=1, seq_len=10):
        DataUtils.message("Prepearing Training Data...", new=True)

        x_train, y_train = self.__create_xy(parse_tree_file, data_size,
                                            seq_len)

        self.x_train = x_train
        self.y_train = y_train
示例#10
0
    def create_xy_train(self, dependency_tree, embedding_file, data_size=1, look_back=0, mode="create", load=None):
        DataUtils.message("Prepearing Training Data...", new=True)

        if mode == "create" or mode == "save":
            word_train, tag_train, probability_train = self.__create_xy(dependency_tree, embedding_file, data_size, look_back, test=False)

        self.word_train = word_train
        self.tag_train = tag_train
        self.probability_train = probability_train
示例#11
0
def run():
    print(device_lib.list_local_devices())
    configuration = Configuration('configuration/configuration.cfg')

    DataUtils.check_and_create_folders(configuration)
    DataUtils.create_cache_if_not_exists(configuration)

    recognition = Recognition(configuration)
    recognition.train()
示例#12
0
    def save(self, note=""):
        DataUtils.message("Saving Model...", new=True)
        directory = "weights/"

        DataUtils.create_dir(directory)

        file = DataUtils.get_filename("UFF", note)+".h5"

        self.model.save(directory+file)
示例#13
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)

        input_forward = Input(shape=(self.seq_len, ))
        input_backward = Input(shape=(self.seq_len, ))

        head_forward = Input(shape=(self.seq_len, ))
        head_backward = Input(shape=(self.seq_len, ))

        word_embedding = Embedding(self.distinct_words,
                                   128,
                                   input_length=self.seq_len,
                                   trainable=True)
        input_forward_embedding = word_embedding(input_forward)
        input_backward_embedding = word_embedding(input_backward)

        head_forward_embedding = word_embedding(head_forward)
        head_backward_embedding = word_embedding(head_backward)

        lstm_forward = LSTM(128)
        lstm_backward = LSTM(128)

        input_forward_lstm = lstm_forward(input_forward_embedding)
        input_backward_lstm = lstm_backward(input_backward_embedding)
        input_lstm = Concatenate()([input_forward_lstm, input_backward_lstm])

        head_forward_lstm = lstm_forward(head_forward_embedding)
        head_backward_lstm = lstm_backward(head_backward_embedding)
        head_lstm = Concatenate()([head_forward_lstm, head_backward_lstm])

        tag_output = Dense(18, activation="softmax")(input_lstm)

        input_hidden = Dense(100, activation=None)
        input_forward_hidden = input_hidden(input_lstm)

        head_hidden = Dense(100, activation=None)
        head_forward_hidden = head_hidden(head_lstm)

        sum_hidden = Add()([input_forward_hidden, head_forward_hidden])
        tanh_hidden = Activation("tanh")(sum_hidden)

        arc_output = Dense(1, activation=None)(tanh_hidden)

        model = Model(inputs=[
            input_forward, input_backward, head_forward, head_backward
        ],
                      outputs=[tag_output, arc_output])

        def nll1(y_true, y_pred):
            # keras.losses.binary_crossentropy give the mean
            # over the last axis. we require the sum
            return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

        model.compile(loss=['categorical_crossentropy', nll1],
                      optimizer="adam",
                      metrics=['accuracy'])
        self.model = model
示例#14
0
    def plot(self, note=""):
        DataUtils.message("Ploting Model...", new=True)
        directory = "plot/"

        DataUtils.create_dir(directory)

        file = DataUtils.get_filename("UFF", note)+".png"

        plot_model(self.model, to_file=directory+file, show_shapes=True, show_layer_names=False)
示例#15
0
	def __init__(self):
		self.num_classes = 2
		self.resnet50_weights = os.path.realpath('models/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5')
		self.xception_weights = os.path.realpath('models/xception_weights_tf_dim_ordering_tf_kernels_notop.h5')
		self.model_output_path = os.path.realpath('data/model_output.h5')
		self.model_path = {'resnet50': os.path.realpath('data/model_resnet50.h5'),
						   'xception': os.path.realpath('data/model_xception.h5')}
		self.transfer_classifiers = {'resnet50': (ResNet50, self.resnet50_weights),
		                             'xception': (Xception, self.xception_weights)}
		self.du = DataUtils()
示例#16
0
    def create_xy_test(self, parse_tree_file, data_size=1, seq_len=10):
        DataUtils.message("Prepearing Validation Data...", new=True)

        x_test, y_test = self.__create_xy(parse_tree_file,
                                          data_size,
                                          seq_len,
                                          test=True)

        self.x_test = x_test
        self.y_test = y_test
示例#17
0
 def train(self, epochs, batch_size=32):
     DataUtils.message("Training...", new=True)
     self.model.fit([
         self.word_train[0][0], self.word_train[0][1], self.tag_train[0][0],
         self.tag_train[0][1], self.word_train[1][0], self.word_train[1][1],
         self.tag_train[1][0], self.tag_train[1][1]
     ],
                    self.head_train,
                    epochs=epochs,
                    batch_size=batch_size)
示例#18
0
	def test(self, classifier, model=None):
		du = DataUtils()
		X_test, y_test = du.data_preprocess('test')
		pred = self.predict(X_test, classifier, model)
		y_pred = np.zeros(len(pred), dtype=int)
		y_pred[pred[:, 1] > pred[:, 0]] = 1
		score = metrics.accuracy_score(y_test[:, 1], y_pred)
		logger_tc.info('test accuracy: %.3f' % score)
		with h5py.File(self.model_output_path) as model_output:
			if '%s_test_pred' % classifier not in model_output:
				model_output.create_dataset('%s_test_pred' % classifier, data=pred)
    def __init__(self, reports_directory, src_server, src_index, src_type):
        self.data_loader_utils_dest = DataLoaderUtils(src_server, src_index, src_type)
        self.reports_directory = reports_directory

        self.src_server = src_server
        self.src_index = src_index
        self.src_type = src_type

        self.delete_tags = True
        self.delete_annotations = True

        self.data_utils = DataUtils()
示例#20
0
def normal_experiment(args):
    test_size = 0.3
    du = DataUtils(args.name)
    pos, neg, bk, clauses, lang = du.load_data()
    pos_train, pos_test = train_test_split(pos,
                                           test_size=test_size,
                                           random_state=7014)
    neg_train, neg_test = train_test_split(neg,
                                           test_size=test_size,
                                           random_state=7014)

    pos_train_, neg_train_ = get_dataset_with_noise(pos_train,
                                                    neg_train,
                                                    noise_rate=args.noise_rate)

    if args.name == 'member':
        beam_step = 3
        N_beam = 3
    elif args.name == 'subtree':
        beam_step = 3
        N_beam = 15
    else:
        beam_step = 5
        N_beam = 10
    N_max = 50
    N = 1

    ilp_train = ILPProblem(pos_train_, neg_train_, bk, lang, name=args.name)
    ilp_train.print()
    CG = ClauseGenerator(ilp_train,
                         infer_step=args.T,
                         max_depth=1,
                         max_body_len=1)
    solver = ILPSolver(ilp_train,
                       C_0=clauses,
                       CG=CG,
                       m=args.m,
                       infer_step=args.T)
    clauses_, Ws_list, loss_list_list = solver.train_N(N=N,
                                                       gen_mode='beam',
                                                       N_max=N_max,
                                                       T_beam=beam_step,
                                                       N_beam=N_beam,
                                                       epoch=args.epoch,
                                                       lr=args.lr,
                                                       wd=0.0)
    v_list, facts = solver.predict_N(pos_test, neg_test, clauses_, Ws_list)
    mse = compute_mse(pos_test, neg_test, v_list[0], facts)
    auc = compute_auc(pos_test, neg_test, v_list[0], facts)

    print('====== TEST SCORE =======')
    print('Mean-squared test error: ', mse)
    print('AUC: ', auc)
示例#21
0
    def __create_xy_test(self,
                         tag_file,
                         embedding_file,
                         data_size=1,
                         look_back=5,
                         suffix=None):
        x_test = []
        y_test = []

        corpus = DataUtils.load_corpus(tag_file)
        tag_emb = DataUtils.create_onehot_vectors(
            DataUtils.extract_tag_list(corpus))
        word_emb = DataUtils.load_embeddings(embedding_file)
        if suffix is not None:
            word_emb = DataUtils.add_suffix_embeddings(word_emb, suffix[0],
                                                       suffix[1])

        words, tags = DataUtils.extract_data(corpus)
        word_keys = DataUtils.normalize_cases(word_emb.keys(), words)

        data_size = int(len(words) * min(data_size, 1)) - int(
            len(words) * min(data_size, 1)) % look_back

        for idx in np.arange(0, data_size, look_back):
            x_timestep = []
            y_timestep = []

            for jdx in range(look_back):
                word_input = word_emb[word_keys[idx + jdx]] if word_keys[
                    idx + jdx] in word_emb else word_emb["UNK"]
                tag_input = tag_emb[tags[idx + jdx]]

                if (jdx == 0):
                    x_timestep = [word_input]
                    y_timestep = [tag_input]
                else:
                    x_timestep = np.append(x_timestep, [word_input], axis=0)
                    y_timestep = np.append(y_timestep, [tag_input], axis=0)

                x_timestep = np.array(x_timestep)
                y_timestep = np.array(y_timestep)

            if (idx == 0):
                x_test = [x_timestep]
                y_test = [y_timestep]
            else:
                x_test = np.append(x_test, [x_timestep], axis=0)
                y_test = np.append(y_test, [y_timestep], axis=0)

            if idx % int(data_size / (10 * look_back)) == 0:
                DataUtils.update_message(str(int(idx / data_size * 100)))

        x_test = np.array(x_test)
        y_test = np.array(y_test)

        return x_test, y_test
示例#22
0
    def get_all_data(batch_size, sentence_len, word2idx, label2idx, fold_num):
        utils = DataUtils(batch_size=batch_size,
                          sentence_len=sentence_len,
                          word2idx=word2idx,
                          label2idx=label2idx)

        # 开发集
        develop_sentences, develop_labels = utils.get_train_data(
            "./data/", mode='develop_')
        develop_idx_x_batches, develop_y_batches, develop_word_len_batches = utils.encoder_data2idx_batch(
            develop_sentences, develop_labels)

        # 测试集
        test_sentences, test_labels = utils.get_train_data("./data/",
                                                           mode='test_')
        test_idx_x_batches, test_y_batches, test_word_len_batches = utils.encoder_data2idx_batch(
            test_sentences, test_labels)
        # 训练集
        train_sentences, train_labels = utils.get_train_data("./data/",
                                                             mode='train_')
        # 训练集的5折
        k_fold_x_train, k_fold_y_train, k_fold_x_test, k_fold_y_test = DataUtils.k_fold(
            train_sentences, train_labels, fold_num)
        # k 代表 训练集切分出来的数据
        k_train_idx_x_batches_list, k_train_y_batches_list, k_train_word_len_batches_list = [], [], []
        k_develop_idx_x_batches_list, k_develop_y_batches_list, k_develop_word_len_batches_list = [], [], []

        if fold_num != 1:
            for fold_idx in range(fold_num):
                k_train_idx_x_batches, k_train_y_batches, k_train_word_len_batches = utils.encoder_data2idx_batch(
                    k_fold_x_train[fold_idx], k_fold_y_train[fold_idx])
                k_train_idx_x_batches_list.append(k_train_idx_x_batches)
                k_train_y_batches_list.append(k_train_y_batches)
                k_train_word_len_batches_list.append(k_train_word_len_batches)

                k_develop_idx_x_batches, k_develop_y_batches, k_develop_word_len_batches = utils.encoder_data2idx_batch(
                    k_fold_x_test[fold_idx], k_fold_y_test[fold_idx])
                k_develop_idx_x_batches_list.append(k_develop_idx_x_batches)
                k_develop_y_batches_list.append(k_develop_y_batches)
                k_develop_word_len_batches_list.append(
                    k_develop_word_len_batches)
        else:
            k_train_idx_x_batches, k_train_y_batches, k_train_word_len_batches = utils.encoder_data2idx_batch(
                k_fold_x_train[0], k_fold_y_train[0])
            k_train_idx_x_batches_list.append(k_train_idx_x_batches)
            k_train_y_batches_list.append(k_train_y_batches)
            k_train_word_len_batches_list.append(k_train_word_len_batches)
        return k_train_idx_x_batches_list, k_train_y_batches_list, k_train_word_len_batches_list, \
               k_develop_idx_x_batches_list, k_develop_y_batches_list, k_develop_word_len_batches_list, \
               develop_idx_x_batches, develop_y_batches, develop_word_len_batches, \
               test_idx_x_batches, test_y_batches, test_word_len_batches,
示例#23
0
def export_doc_ids(server, src_index, src_type, query=None):
    print __name__, 'Fetching doc ids for', server, src_index, src_type
    if query is None:
        query = {
            "match_all": {}
        }

    data_utils = DataUtils()
    ids = data_utils.batch_fetch_ids_for_query(base_url=server, index=src_index, type=src_type, query=query)

    documents_ids = dict.fromkeys(ids, None)
    print __name__, 'Done, fetched', len(documents_ids), 'doc ids'

    return documents_ids
示例#24
0
    def __init__(self, config=config_reader()):
        """
        read model param
        """
        self.rnn_mode = config['rnn_mode']
        self.batch_size = config['batch_size']
        self.embedding_dim = config['embedding_dim']
        self.num_layers = config['num_layers']
        self.num_units = config['num_utils']
        self.FCNN_num_units = config['FCNN_num_units']
        self.learning_rate = config['learning_rate']
        self.max_epoch = config['max_epoch']
        self.keep_prob = config['keep_prob']
        self.model_path = config['model_path']
        self.logs_file = config['logs_file']
        self.end_loss = config['end_loss']
        self.save_model_name = config['save_model_name']
        self.print_step = config['print_step']
        self.save_epoch = config['save_epoch']

        self.data_utils = DataUtils()
        self.vocab = self.data_utils.vocab
        self.chunk_size = self.data_utils.chunk_size
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.increment_global_step_op = tf.assign(self.global_step,
                                                  self.global_step + 1)
示例#25
0
    def parse_date(self, test_string):
        test_string = DataUtils.remove_excess_spaces(test_string)
        # First, try to parse the date according the the specified format
        parsed_date = self.parse_date_string(test_string)
        if parsed_date != None:
            return parsed_date, parsed_date
        try:
            # If that fails, try to parse the date as a date range string
            return daterangeparser.parse(test_string)

        except pyparsing.ParseException:
            # If that fails, it may be a date range in a format that daterangeparser doesn't recognize
            # Check if the string contains two formatted dates by checking the beginning and end substrings
            # until it finds two strings formatted like dates
            test_start = len(test_string) - 1
            test_end = 0
            start = None
            end = None
            while test_end < len(test_string):
                if start == None:
                    start = self.parse_date_string(test_string[0:test_end])
                if end == None:
                    end = self.parse_date_string(
                        test_string[test_start:len(test_string)])

                if start != None and end != None:
                    break

                test_start -= 1
                test_end += 1

            if start == None or end == None:
                raise ValueError('Could not parse date string: ' + test_string)

            return start, end
示例#26
0
    def get_experiment_feedback(session_id, robot_id):
        global data_thread
        experiment_ongoing = True
        feedback_received = False
        black_box_id = BBUtils.get_bb_id(robot_id)
        robot_smart_wheel_count = config.get_robot_smart_wheel_count(robot_id)
        diagnostic_vars = DataUtils.expand_var_names(
            experiment_diagnostic_vars, robot_smart_wheel_count)

        zyre_communicator.reset_experiment_feedback(robot_id)
        while experiment_ongoing:
            feedback_msg = zyre_communicator.get_experiment_feedback(robot_id)
            if feedback_msg and feedback_msg['robot_id'] == robot_id:
                feedback_received = True
            experiment_ongoing = send_experiment_feedback(
                robot_id, feedback_msg, feedback_received)

            if experiment_ongoing:
                with data_thread_lock:
                    if not data_thread:
                        data_thread = threading.Thread(
                            target=send_diagnostic_data,
                            kwargs={
                                'session_id': session_id,
                                'black_box_id': black_box_id,
                                'diagnostic_vars': diagnostic_vars
                            })
                        data_thread.start()

        global feedback_thread
        feedback_thread = None
示例#27
0
    def get_download_query():
        '''Responds to a data download query by sending a query to the appropriate
        black box and then saving the data to a temporary file for download.
        '''
        robot_id = request.args.get('robot_id', '', type=str)
        black_box_id = BBUtils.get_bb_id(robot_id)
        variable_list = request.args.get('variables').split(',')
        start_query_time = request.args.get('start_timestamp')
        end_query_time = request.args.get('end_timestamp')

        query_msg = DataUtils.get_bb_query_msg(session['uid'].hex,
                                               black_box_id, variable_list,
                                               start_query_time,
                                               end_query_time)
        query_result = zyre_communicator.get_query_data(query_msg)

        message = ''
        try:
            with open(query_result_file_path, 'w') as download_file:
                json.dump(query_result, download_file)
            return jsonify(success=True)
        except Exception as exc:
            print('[get_download_query_robot_data] %s' % str(exc))
            message = 'Data could not be retrieved'
            return jsonify(message=message)
示例#28
0
 def load_data(self):
     self.du = DataUtils(self.config.training_file,
                         self.config.testing_file, self.config.batch_size)
     self.X_train = self.du.train_images
     self.y_train = self.du.train_labels
     self.X_val = self.du.val_images
     self.y_val = self.du.val_labels
     self.X_test = self.du.test_images
     self.y_test = self.du.test_labels
示例#29
0
def main(path, graphics):

    t = DataUtils(path)
    train, test = t.train, t.test
    for _t in train + test:
        inp, out = _t['input'], _t['output']
        inp, out = np.asarray(inp), np.asarray(out)
        output_array = solve(inp, out, graphics)
        print(output_array)
示例#30
0
    def create(self):
        DataUtils.message("Creating The Model...", new=True)
        word_input_forward = Input(shape=(self.look_back,300))
        word_input_backward = Input(shape=(self.look_back,300))

        tag_input_forward = Input(shape=(self.look_back,))
        tag_input_backward = Input(shape=(self.look_back,))

        tag_emb = Embedding(self.distinct_tags, 30, input_length=self.look_back, trainable=True)
        tag_input_forward_output = tag_emb(tag_input_forward)
        tag_input_backward_output = tag_emb(tag_input_backward)

        input_forward = Concatenate()([word_input_forward, tag_input_forward_output])
        input_backward = Concatenate()([word_input_backward, tag_input_backward_output])

        word_head_forward = Input(shape=(self.look_back,300))
        word_head_backward = Input(shape=(self.look_back,300))

        tag_head_forward = Input(shape=(self.look_back,))
        tag_head_backward = Input(shape=(self.look_back,))

        tag_head_forward_output = tag_emb(tag_head_forward)
        tag_head_backward_output = tag_emb(tag_head_backward)

        head_forward = Concatenate()([word_head_forward, tag_head_forward_output])
        head_backward = Concatenate()([word_head_backward, tag_head_backward_output])

        bilstm = BiLSTM(300)

        bilstm_input = bilstm([input_forward,input_backward])
        dense_input = Dense(600, activation="linear")(bilstm_input)

        bilstm_head = bilstm([head_forward,head_backward])
        dense_head = Dense(600, activation="linear")(bilstm_head)

        sum_dense = Add()([dense_input,dense_head])

        dense_tanh = Dense(600, activation="tanh")(sum_dense)
        output = Dense(1, activation="softmax")(dense_tanh)

        model = Model(inputs=[word_input_forward, word_input_backward, tag_input_forward, tag_input_backward, word_head_forward, word_head_backward, tag_head_forward, tag_head_backward], outputs=output)
        model.compile(loss='binary_crossentropy', optimizer="adam", metrics=['accuracy'])
        self.model = model
__author__ = 'guoliangwang'

from data_utils import DataUtils
from sklearn.preprocessing import StandardScaler
import numpy as np

parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE"
parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean.csv", "parkinson_testing.csv")
parkinson_clean_inputs = parkinson_data_util.training_inputs()
print('clean_inputs: ', parkinson_clean_inputs)

stdsc = StandardScaler()
clean_standard_inputs = stdsc.fit_transform(parkinson_clean_inputs)

# write the standardized data to csv
np.savetxt("parkinson_clean_standard_data_python.csv", clean_standard_inputs, delimiter=",")

# print("clean stadard inputs: ", clean_standard_inputs.values)
import numpy as np
import timeit
import seaborn as sb
from plot_utils import PlotUtils
from data_utils import DataUtils
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV

from sklearn.tree import DecisionTreeClassifier
import sklearn.tree as tree

wisconsin_features = "Clump.Thickness,Uniformity.of.Cell.Size,Uniformity.of.Cell.Shape,Marginal.Adhesion,Single.Epithelial.Cell.Size,Bare.Nuceoli,Bland.Chromatin,Normal.Nucleoli,Mitoses"
wisconsin_data_util = DataUtils(wisconsin_features, "data_sets", "wisconsin_training.csv", "wisconsin_testing.csv")
wisconsin_training_inputs = wisconsin_data_util.training_inputs()
wisconsin_training_classes = wisconsin_data_util.training_classes()
wisconsin_testing_inputs = wisconsin_data_util.testing_inputs()
wisconsin_testing_classes = wisconsin_data_util.testing_classes()

cross_validation = StratifiedKFold(wisconsin_training_classes, n_folds=5)
plot_utils = PlotUtils()


## Decision Tree below
decision_tree_classifier = DecisionTreeClassifier(random_state=0)

plot_tree = DecisionTreeClassifier(random_state=0, max_depth=6, max_features=1)
cv_scores = cross_val_score(plot_tree, wisconsin_training_inputs, wisconsin_training_classes, cv=5)
# sb.distplot(cv_scores)
import seaborn as sb
from plot_utils import PlotUtils
from data_utils import DataUtils
import matplotlib.pyplot as plt
from sklearn.cross_validation import StratifiedKFold
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import accuracy_score

from sklearn.tree import DecisionTreeClassifier
import sklearn.tree as tree

decision_tree_classifier = DecisionTreeClassifier(random_state=0)

parkinson_features = "MDVP_Fo.Hz.,MDVP_Fhi.Hz.,MDVP_Flo.Hz.,MDVP_Jitter...,MDVP_Jitter.Abs.,MDVP_RAP,MDVP_PPQ,Jitter_DDP,MDVP_Shimmer,MDVP_Shimmer.dB.,Shimmer_APQ3,Shimmer_APQ5,MDVP_APQ,Shimmer_DDA,NHR,HNR,RPDE,DFA,spread1,spread2,D2,PPE"
parkinson_data_util = DataUtils(parkinson_features, "data_sets", "parkinson_clean_normal_training.csv", "parkinson_clean_normal_testing.csv")
parkinson_training_inputs = parkinson_data_util.training_inputs()
parkinson_training_classes = parkinson_data_util.training_classes()
parkinson_testing_inputs = parkinson_data_util.testing_inputs()
parkinson_testing_classes = parkinson_data_util.testing_classes()

## plot for data distribution
print("mean: ", np.mean(parkinson_training_inputs))

# decision_tree_classifier.fit(parkinson_training_inputs, parkinson_training_classes)
# score1 = decision_tree_classifier.score(parkinson_testing_inputs, parkinson_testing_classes)
# print("score1: ", score1)


cross_validation = StratifiedKFold(parkinson_training_classes, n_folds=5)
plot_utils = PlotUtils()