def test_net_keras_predict(modelname): model, params = KERAS_MODELS[modelname] for random in range(0, 3): # create dataset rng = numpy.random.RandomState(0) X, y = make_classification(n_features=params['features'], n_classes=params['classes'], n_redundant=0, n_informative=params['features'], random_state=rng, n_clusters_per_class=1, n_samples=50) X += 2 * rng.uniform(size=X.shape) X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2) if params['classes'] != 2: class_names = None y_train = MultiLabelBinarizer(classes=class_names).fit_transform( y_train.reshape(-1, 1)) model.fit(X_train, y_train, epochs=1, batch_size=10) X_test = X_test[:3] # check each method. Done here instead of using parameters to save time, above is slow assert_equivalent(model, X_test[:3], params['classes'], method='pymodule') assert_equivalent(model, X_test[:3], params['classes'], method='loadable')
def load_cora(): with open(dataloc + 'cora.data', 'rb') as f: data = p.load(f) graph = data['NXGraph'] features = data['CSRFeatures'] labels = data['Labels'] # Number format labels = MultiLabelBinarizer().fit_transform( labels.reshape(labels.shape[0], 1)) return graph, features, labels
def run(name, source, destination, split): n = 10 l = 256 x_train = [] x_test = [] train_imgname = [] y_train = [] y_test = [] test_imgname = [] _class_names = os.listdir(source) _class_names.sort() print(len(_class_names)) _n2l = {_class_names[i]: i for i in range(len(_class_names))} print(_n2l) if os.path.exists(destination): for i in range(len(_class_names)): images = os.listdir(source + "/" + _class_names[i]) ratio = math.floor(len(images) * split) count = 0 for y in images: src = source + "/" + _class_names[i] + "/" + y img = dicom.read_file(src) img = img.pixel_array img = imresize(img, (227, 227)) x_train.append(img) y_train.append(_n2l[_class_names[i]]) train_imgname.append(y) count += 1 print(count) print(_class_names[i] + " included in training.") #print(y_ts) train = list(zip(x_train, y_train, train_imgname)) #print(train) random.shuffle(train) x_train, y_train, train_imgname = zip(*train) x_train = np.array(x_train) y_tr = np.array(y_train) y_tr = MultiLabelBinarizer().fit_transform(y_tr.reshape(-1, 1)) print(y_tr) train_imgname = np.array(train_imgname) d_train = {} d_train['data'] = x_train d_train['labels'] = y_tr d_train['imgname'] = train_imgname #print(d_train['labels']) with open(destination + '/' + name + '.pat1', 'wb') as f: pickle.dump(d_train, f)
def get_labels(self): try: labels_kw = self.labels except AttributeError: print("{} doesn't have labels.".format(self.graph_name)) return None with open(self.dataloc + self.graph_file, 'rb') as f: data = pickle.load(f) labels = data[labels_kw] if (self.convert_labels == 'True'): labels = MultiLabelBinarizer().fit_transform( labels.reshape(labels.shape[0], 1)) return labels
def gen_multi_result(X, y, X_test): """ generate multilabel result use ovr classifier Args: X: (n_samples, n_features) y: (n_samples,) or (n_samples, n_labels) X_test: (n_samples, n_features) Returns: y_pred: (n_samples, n_labels) y_probas: (n_samples, n_labels) """ clf = OneVsRestClassifier(LogisticRegression(solver='liblinear')) # clf = OneVsRestClassifier(LinearSVCP()) # clf = OneVsRestClassifier(XGBClassifier()) if len(y.shape) == 1: y = MultiLabelBinarizer().fit([range(10)]).transform(y.reshape(-1, 1)) clf.fit(X, y) y_pred = clf.predict(X_test) y_probas = clf.predict_proba(X_test) return y_pred, y_probas
def main(): path=sys.argv[1] with open(path) as f: config=json.load(f) batch_size=int(config['batch_size']) nb_classes=int(config['nb_classes']) weight_path=config['weights'] #####################First level of Classification ################################ ##### load model model=None model=new.load_model(nb_classes,weight_path) ####### specify the loss function sgd = SGD(lr=0.00005, decay = 1e-5, momentum=0.99, nesterov=True) #sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) #model.compile(loss='sparse_categorical_crossentropy', optimizer=sgd, metrics=[metrics.mae, metrics.sparse_categorical_accuracy]) ######## load data test={} with open(config['data_path']+'/'+config['dataset_name']+'.test','rb') as f: test=pickle.load(f) x_test,y_test,imgname=test['data'],test['labels'],test['imgname'] x_ts = x_test.reshape((-1,227,227,1)) print(x_ts.shape, 'test samples') print(y_test.shape, 'test sample labels') ##### evalution and prediction and confusion matrix formation scores=model.evaluate(x_ts,y_test,batch_size=batch_size,verbose=0) print("model %s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) prediction= model.predict_classes(x_ts,verbose=1) #print(prediction) np.save('prediction.npy', prediction) pre=np.array(prediction) pre=MultiLabelBinarizer().fit_transform(pre.reshape(-1, 1)) orig=y_test print('') print('') print('score for first level classification: ',scores) ''' count = 0 for i in range(0,len(pre)): if not np.array_equal(orig[i],pre[i]): print(imgname[i],"_",orig[i],"_",pre[i],"_False") count = count + 1 print (count) ''' aa=[0,1] aa = np.array(aa) print('') print('') print(MultiLabelBinarizer().fit_transform(aa.reshape(-1, 1))) print("0-Nontumor 1-Tumor") a=[0,1] a=np.array(a) b=[1,0] b=np.array(b) y_true = [] y_pred = [] print(range(len(prediction))) for i in range(len(prediction)): if np.array_equal(orig[i],a): y_true.append(1) elif np.array_equal(orig[i],b): y_true.append(0) for i in range(len(prediction)): if np.array_equal(pre[i],a): y_pred.append(1) elif np.array_equal(pre[i],b): y_pred.append(0) cm = ConfusionMatrix(y_true, y_pred) print('') print('') print('*****************************Confusion Matrix for first level Classification****************************') print(cm) print('') print('') ############################ Second level Classification ############################### '''
import pandas as pd from sklearn.svm import LinearSVC from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import MultiLabelBinarizer from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.datasets import make_classification # %% ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(ROOT_DIR, 'data/clean/final_segmented') # %% train_data = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) train_labels = train_data.loc[:, 'lbl'].to_numpy().astype(np.int32) train_labels = MultiLabelBinarizer().fit_transform(train_labels.reshape(-1, 1)) train_labels = np.delete(train_labels, 1, 1) train_data = train_data.iloc[:, :-1].to_numpy().astype(float).astype( np.float32) test_data = pd.read_csv(os.path.join(DATA_DIR, 'test.csv')) test_labels = test_data.loc[:, 'lbl'].to_numpy().astype(np.int32) test_labels = MultiLabelBinarizer().fit_transform(test_labels.reshape(-1, 1)) test_labels = np.delete(test_labels, 1, 1) test_data = test_data.iloc[:, :-1].to_numpy().astype(np.float32) (train_labels.shape, train_data.shape, test_data.shape, test_labels.shape) # %% svm = make_pipeline(StandardScaler(), OneVsRestClassifier(LinearSVC())) svm.fit(train_data, train_labels) # %%
# In[8]: print (type(X)) print (X[28708]) print (X.shape) # In[9]: x = data['pixels'] y = data['emotion'] from sklearn.preprocessing import MultiLabelBinarizer y=MultiLabelBinarizer().fit_transform(y.reshape(-1,1)) print(len(y)) # In[10]: # In[11]: print (y[4]) print(x.shape) # In[12]: