def generate_data(): TRAIN_SET_DIR = '/Users/knight/Desktop/GodClassDetection/trainset/train1' # 直接改成自己的路径 train_x = preprocess.get_metrics(TRAIN_SET_DIR) print('train_x[0]:', train_x[0]) train_y = preprocess.get_labels(TRAIN_SET_DIR) print('train_y[0]:', train_y[0]) TEST_SET_DIR = '/Users/knight/Desktop/GodClassDetection/trainset/train2' # 直接改成自己的路径 test_x = preprocess.get_metrics(TEST_SET_DIR) test_y = preprocess.get_labels(TEST_SET_DIR) print('-------------train datasize:', len(train_x)) print('train_x:\n', train_x) print('train_y:\n', train_y) print() print('-------------test datasize:', len(test_x)) print('test_x:\n', test_x) print('test_y:\n', test_y) return train_x, train_y, test_x, test_y
def plot_data_distribution(fraction=0.1): """ Plotting distribution of sleep stages in data :param fraction: fraction of data to sample for the plot """ # Sampling from full dataset assert 0 < fraction <= 1 all_xml = [ f for f in os.listdir(C.RAW_XML_DIR) if os.path.isfile(C.RAW_XML_DIR + f) ] all_xml = np.array(all_xml) np.random.shuffle(all_xml) cutoff = int(fraction * len(all_xml)) samples = all_xml[:cutoff] # Getting labels for all epochs from each patient n = C.FINAL_SAMPLING_FREQ * 30 # Rows per epoch all_labels = [] for path in samples: labels = get_labels(C.RAW_XML_DIR + path) num_rows = len(labels) labels = np.reshape( labels, (num_rows // n, n, 1)) # Into shape (epoch, sequence, features) labels = labels[:, 0, 0] labels = labels.astype(np.int64) all_labels.extend(labels) # Computing scores, counts = np.unique(np.array(all_labels), return_counts=True) percentage = [] print("Number of patients: {}".format(len(samples))) print("Number of epochs: {}:".format(len(all_labels))) print("Score, Counts, Percentage") for i in range(0, len(scores)): print("{}, {}, {}".format(scores[i], counts[i], counts[i] / len(all_labels) * 100)) percentage.append(counts[i] / len(all_labels) * 100) # Plotting x_pos = np.arange(len(scores)) x_labels = ['Wake', 'N1', 'N2', 'N3', 'REM'] plt.figure() plt.bar(x_pos, percentage, align='center', alpha=0.5) plt.xticks(x_pos, x_labels) plt.title('Distribution of Sleep Stages') plt.ylabel('Percentage') plt.savefig(C.GRAPHS_DIR + "stage_distribution.png")
def compress_audio_files(path): labels = get_labels(path) for label in labels: print("Compressing... ", label) audiofiles = [] for audiofile in os.listdir(path + '/' + label): #current_path = path + '/' + label + '/' +audiofile audiofiles.extend([audiofile]) i = 1 for audiofile in audiofiles: # file_size = get_size_in_mega_bytes(path+"/"+label+"/"+audiofile) # if file_size > 0.45: convert_to_mp3(path=path, label=label, file_name=audiofile, index=i) i += 1
def predict(name): data = {"path": name} params = flask.request.json if (params == None): params = flask.request.args # if parameters are found, return a prediction if (params != None): with graph.as_default(): sample = preprocess.wav2mfcc('C://Users//Stage//Downloads//' + name + '.wav') print(name) sample_reshaped = sample.reshape(1, 40, 47, 1) data["prediction"] = preprocess.get_labels()[0][np.argmax( model.predict(sample_reshaped))] data["success"] = True # return a response in json format return flask.jsonify(data)
def predictTest(name): data = {"path": name} params = flask.request.json if (params == None): params = flask.request.args # if parameters are found, return a prediction if (params != None): with graph.as_default(): dir = "C://Users//Stage//final project//test//" + name filename = random.choice(os.listdir(dir)) print(filename) sample = preprocess.wav2mfcc(dir + "//" + filename) print(name) sample_reshaped = sample.reshape(1, 40, 47, 1) data["prediction"] = preprocess.get_labels()[0][np.argmax( model.predict(sample_reshaped))] data["success"] = True # return a response in json format return flask.jsonify(data)
def prepare_model(): print("Preparing model...") raw_train, raw_test = get_raw_data() tokenized_inputs = get_tokenized_inputs(raw_train, cols=in_features) labels = get_labels(raw_train) split_index_inputs = int(0.9 * tokenized_inputs.shape[0]) split_index_labels = int(0.9 * labels.shape[0]) train_inputs = tokenized_inputs[:split_index_inputs] train_labels = labels[:split_index_labels] test_inputs = tokenized_inputs[split_index_inputs:] test_labels = labels[split_index_labels:] train_loader = DataLoader(list(zip(train_inputs, train_labels)), batch_size=hyperparams["batch_size"]) test_loader = DataLoader(list(zip(test_inputs, test_labels)), batch_size=hyperparams["batch_size"]) model = AttnModel(hyperparams).to(device) return model, train_loader, test_loader
import tensorflow as tf from transformers import BertTokenizer, TFBertModel import numpy as np import pandas as pd import models, preprocess, mybert from tensorflow.keras.callbacks import ModelCheckpoint from tensorflow.python.keras.callbacks import EarlyStopping # 데이터셋 로드 => bert로 전처리 => label 생성 test_data = pd.read_csv("gap-test.tsv", sep = '\t') test_emb = mybert.run_bert(test_data[:]) test_labels = preprocess.get_labels(test_data[:]) validation_data = pd.read_csv("gap-validation.tsv", sep = '\t') val_emb = mybert.run_bert(validation_data[:]) val_labels = preprocess.get_labels(validation_data[:]) development_data = pd.read_csv("gap-development.tsv", sep = '\t') dev_emb = mybert.run_bert(development_data) dev_labels = preprocess.get_labels(development_data[:]) submission_data = pd.read_csv("test_stage_2.tsv", sep = '\t') sub_emb = mybert.run_bert(submission_data) x_test = np.array(test_emb) y_test = np.array(test_labels)
from resnet import build_resnet from keras.models import Model, load_model from keras.callbacks import ModelCheckpoint, TensorBoard, LambdaCallback from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights from keras.utils import plot_model from keras.optimizers import Nadam, Adamax, Adam, SGD, RMSprop from sklearn import metrics from sklearn.model_selection import train_test_split import dpn import time os.environ['CUDA_VISIBLE_DEVICES'] = '2,3,4,5' train_df = pre.load_data('train.json') images = pre.get_images(train_df) labels = pre.get_labels(train_df) del (train_df) X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=None) img_gen = pre.images_generator() print 'Images generator inits completely' print 'training images:', X_train.shape print 'validation images:', X_val.shape print 'training labels:', y_train.shape print 'validaiton labels:', y_val.shape
def predict(filepath, model=None): # predict english word based CNN sample = wav2mfcc(filepath) feature_dim_1, feature_dim_2, channel = 20, 11, 1 sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel) return get_labels()[0][np.argmax(model.predict(sample_reshaped))]
def predict(filepath, model): sample = wav2mfcc(filepath) sample_reshaped = sample.reshape(1, feature_dim_1, feature_dim_2, channel) return get_labels()[0][np.argmax(model.predict(sample_reshaped))]
parser.add_argument("-l", "--load", action="store_true", help="load model.pt") args = parser.parse_args() pickle_in = open(args.data_file[0], "rb") mols = pickle.load(pickle_in) pickle_in.close() #load numpy files wiener_idx = np.load(args.data_file[1]) hyper_wiener_idx = np.load(args.data_file[2]) zagreb_idx = np.load(args.data_file[3]) labels = get_labels(wiener_idx, hyper_wiener_idx, zagreb_idx) data = RegressionData(mols, labels) dataset = get_data(data) dgl_graphs = [build_graph(data) for data in dataset] dataset = list(zip(dgl_graphs, labels)) train_dataset, valid_dataset, test_dataset = split_dataset(dataset, [0.8, 0.1, 0.1], shuffle=True) def collate(samples): graphs, labels = map(list, zip(*samples)) batched_graph = dgl.batch(graphs) return batched_graph, torch.tensor(labels)
def create_extra_data(path): # compress_audio_files(path) labels = get_labels(path) for label in labels: print("Creating exta data for:", label) audiofiles = [] for audiofile in os.listdir(path + '/' + label): #current_path = path + '/' + label + '/' +audiofile audiofiles.extend([audiofile]) i = 1 for audiofile in audiofiles: add_noise(path=path, label=label, file_name=audiofile, index=i) skip_n_seconds(path=path, label=label, file_name=audiofile, index=i, n=2) speed_up_audio(path=path, label=label, file_name=audiofile, index=i, speed=1.2) i += 1 #save_our_data_as_numpy_array(max_len = 80, max_len2 = 40,origin_path=my_data_set_path,destination_path=big_numpy_files_path) #labels = get_labels(big_numpy_files_path) #for label in labels: # x = np.load(big_numpy_files_path + '/' + label) # if x.shape[0]<150: # print(label, "Shape:",x.shape[0]) #path = our_data_set_path #labels = get_labels(path) #for label in labels: # print("Creating exta data for:",label) # audiofiles = [] # for audiofile in os.listdir(path + '/' + label): # #current_path = path + '/' + label + '/' +audiofile # audiofiles.extend([audiofile]) # index = np.random.randint(low=2,high=3) # audiofiles = audiofiles[::index] # i=1 # for audiofile in audiofiles: # f = np.random.randint(low=0,high=2) # fade_audio(path=path,label=label,file_name=audiofile,index=i,fade=f) # i+=1 #path = our_data_set_path #labels = get_labels(path) #for label in labels: ## print("Creating exta data for:",label) # audiofiles = [] # for audiofile in os.listdir(path + '/' + label): # #current_path = path + '/' + label + '/' +audiofile # audiofiles.extend([audiofile]) # i=20 # for audiofile in audiofiles: # if audiofile[-10:-5] == 'noise': # skip_n_seconds(path=path,label=label,file_name=audiofile,index=i,n=2) # elif audiofile[:6] == 'sliced': # speed_up_audio(path=path,label=label,file_name=audiofile,index=i,speed=1.25) # elif audiofile[:6] == 'speedx': # add_noise(path=path,label=label,file_name=audiofile,index=i) # i+=1 #print_user_files(our_data_set_path)