Exemplo n.º 1
0
 parser.add_argument("--json_data",
                     default='../data/EventDump_10Ktracks.json',
                     type=str,
                     help="Json data path")
 parser.add_argument("--npy_data",
                     default='../data/ET_muons_10K_0000.npy',
                     type=str,
                     help="NPY data")
 args = parser.parse_args()
 np.random.seed(1234)
 data = np.load(args.npy_data)
 BD = BatchNpyData2(data)
 config = TestConfig()
 m = testrnn(config)
 data, filtered_data, rand_int = BD.sample_batch(m.config.MaxNumSteps, 1000)
 data, max_data = pre_process(data)
 filtered_data, _ = pre_process(filtered_data, max_data)
 test, filtered_test_data, rand_int = BD.sample_batch(
     m.config.MaxNumSteps, m.config.batch_size)
 test, _ = pre_process(test, max_data)
 filtered_test_data, _ = pre_process(filtered_test_data, max_data)
 cost_lst = []
 tf_config = tf.ConfigProto()
 tf_config.gpu_options.allow_growth = True
 with tf.Session(config=tf_config) as sess:
     tf.set_random_seed(1234)
     summary_writer = tf.train.SummaryWriter('Logs/')
     sess.run(tf.initialize_all_variables())
     for ii in range(args.niter):
         m.assign_lr(sess, config.learning_rate)
         #ind,data = m.generate_data(m.config.batch_size,2)
Exemplo n.º 2
0
    rank_improv_1_MAP = MAP()
    rank_improv_2_MAP = MAP()

    for file in files:

        if file == '.DS_Store':  # Ignore mac default attributes' folder
            continue

        source_file = open(TeMario_originals + file,
                           'r',
                           encoding='iso-8859-1')
        target_file = open(TeMario_summaries + 'Ext-' + file,
                           'r',
                           encoding='iso-8859-1')

        source = pre_process(source_file.read())
        target = pre_process(target_file.read())

        source_file.close()
        target_file.close()

        sents = filter_list(sent_tokenize(source))

        vectorizer = CustomVectorizer()

        vectorizer.fit(sents)

        vecs = vectorizer.transform_tfidf(sents)
        source_score = vectorizer.transform_tfidf([source])[0]

        graph = defaultdict(lambda: [])
Exemplo n.º 3
0
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Arguments for fitting")
    parser.add_argument("--niter",default=10,type=int,help="Number of iterations")
    parser.add_argument("--json_data",default='../data/EventDump_10Ktracks.json',type=str,help="Json data path")
    parser.add_argument("--npy_data",default='../data/ET_muons_10K_0000.npy',type=str,help="NPY data")
    args = parser.parse_args()
    rnd = np.random.RandomState(0)
    data = np.load(args.npy_data)
    BD= BatchNpyData(data)
    n_states = 5
    n_dim_obs = 3
    MaxNumSteps = 10
    batch_size = 200
    test, rand_int = BD.sample_batch(MaxNumSteps,batch_size)
    test, _ = pre_process(test)

    # create a Kalman Filter by hinting at the size of the state and observation
    # space.  If you already have good guesses for the initial parameters, put them
    # in here.  The Kalman Filter will try to learn the values of all variables.
    #kf = KalmanFilter(transition_matrices=np.array([[1, 1], [0, 1]]),
    #                  transition_covariance=0.01 * np.eye(2),
    #                  em_vars=['transition_matrices','transition_covariance',
    #                  'observation_matrices', 'observation_covariance',
    #                  'observation_offsets','transition_offsets'])
    trans_mat_init = np.array([[1,0,init(),init(),init()],
    [0,1,init(),init(),init()],
    [init(),init(),1,0,0],
    [init(),init(),0,1,0],
    [init(),init(),0,0,1]])
    observation_mat = np.array([[1,0,0,0,0],
            break

    rank = {k : round(rank[k], 6) for k in rank.keys()}  # the rounding precision influences convergence testing

    return rank, i


if __name__ == '__main__':

    print('\nTesting adapted PageRank algorithm for sentence ranking and consequent text summarization.\n' +
          'A graph is built linking sentences with similarity bigger than a certain threshold.\n' +
          'This method is tested and evaluated on the "catalunha.txt" file, with a 0.1 threshold.\n')

    file = open('catalunha.txt', encoding='utf-8')

    source = pre_process(file.read())
    sents  = filter_list(sent_tokenize(source))

    file.close()

    vectorizer = CustomVectorizer(stopwords=stopwords.words())

    vectorizer.fit(sents)  # -> fit on sentences or on whole text?
    vecs = vectorizer.transform_tfidf(sents)

    graph = {i: [] for i in range(len(vecs))}

    threshold = 0.1
    for i in range(len(vecs)):
        for j in range(i+1, len(vecs)):
            if similarity(vecs[i], vecs[j]) > threshold: