def to_bow(train_path,test_path): train=seq.create_dataset(train_path) test=seq.create_dataset(test_path) bow=create_dict(train.instances +test.instances) train_feats=create_vectors(train,bow) test_feats=create_vectors(test,bow) train_data=(train_feats,train.get_labels()) test_data=(test_feats,test.get_labels()) return train_data,test_data
def unify(a_path,b_path,out_path): a_dataset=seq.create_dataset(a_path) b_dataset=seq.create_dataset(b_path) a_vec=bow.compute_bow(a_dataset) b_vec=bow.compute_bow(b_dataset) united_vec=[av +bv for av,bv in zip(a_vec,b_vec)] labels=a_dataset.get_labels() persons=a_dataset.get_persons() utils.to_labeled_file(out_path,united_vec,labels) return united_vec,persons,labels
def basic_bow(in_path,_bow=None): dataset=seq.create_dataset(in_path) labels=dataset.get_labels() if(_bow==None): vectors,_bow=bow.compute_bow(dataset,True) vectors=np.array(vectors) return _bow,(vectors,labels) else: vectors=bow.apply_bow(dataset,_bow) vectors=np.array(vectors) return vectors,labels
def get_unlabeled_vectors(in_path,compute_features,suffix=".csv"): out_path=in_path.replace(".seq",suffix) dataset=seq.create_dataset(in_path) vectors=compute_features(dataset) utils.to_csv_file(out_path,vectors,labels)
def get_labeled_vectors(in_path,compute_features,suffix=".lb"): out_path=in_path.replace(".seq",suffix) dataset=seq.create_dataset(in_path) labels=dataset.get_labels() vectors=compute_features(dataset) utils.to_labeled_file(out_path,vectors,labels)