示例#1
0
def prepare_data(load_test_data=False):

	pd_data = load_data(TRAIN_PATH)
	vector = TfidfVectorizer("english")
	#vector = CountVectorizer()

	feature_matrics = vector.fit_transform(pd_data['question_text'].values.astype('U'))
	print('prepped train')
	# shuffle=False means pick the last 20% as dev data set.
	if load_test_data:
		test_data = load_data(TEST_PATH)
		test_feature_matrics = vector.transform(test_data['question_text'].values.astype('U'))
		return feature_matrics, test_feature_matrics, pd_data['target'], test_data
	else:
		return train_test_split(feature_matrics, pd_data['target'], test_size=0.2, shuffle=False)
 def read(self):
     circuits = []
     # circs = [
     #     "c6288",
     #     "c5315",
     #     "c432",
     #     "c499",
     #     "c880",
     #     "c1355",
     #     "c1908",
     #     "c3540",
     #     "adder.bench",
     #     "arbiter.bench",
     #     "cavlc.bench",
     #     "dec.bench",
     #     "voter.bench",
     #     "sin.bench",
     #     "priority.bench",
     # ]
     path = self.path if self.path else "../data/output"
     circs = self.circs if self.circs else []
     for circ in circs:
         A, X, labels = load_data(circ, path, normalize="")
         circuits.append(Graph(x=X.toarray(), a=A, y=labels))
         print(f"{circ}: {sum(labels)}, {len(labels)}")
     return circuits
示例#3
0
def get_data():
    return load_data(TRAIN_PATH)