def constOptimize(net, base_img, guide_img, objective, iter_n, max_thres, end, factr=factr, pgtol=pgtol, verbose=True): proc_base = utils.preprocess(net, base_img) proc_guide = utils.preprocess(net, guide_img) src = net.blobs['data'] ch, h, w = proc_base.shape src.reshape(1, ch, h, w) # allocate image for network-produced details src, dst = net.blobs['data'], net.blobs[end] src.data[0] = proc_guide net.forward(end='prob') guide_features = dst.data[0].copy() up_bnd = proc_base + max_thres lw_bnd = proc_base - max_thres mean_arr = net.transformer.mean['data'] if mean_arr.ndim == 1: mean_arr = mean_arr.reshape((3, 1, 1)) up_bnd = np.minimum(up_bnd, 255 - mean_arr) lw_bnd = np.maximum(lw_bnd, 0 - mean_arr) bound = zip(lw_bnd.flatten(), up_bnd.flatten()) src.data[0] = proc_base x, f, d = cnstOpt(calc_gstep, proc_base.flatten().astype(float), args=(net, guide_features, end, objective, verbose), bounds=bound, maxiter=iter_n, iprint=0, factr=factr, pgtol=pgtol) return x.reshape(proc_base.shape), f, d
def generate(sentence): with open('train.txt') as fin: train = fin.read() train += ' ' + preprocess(sentence) morse_codes = translateToMorseCode(train) huffman_tree = HuffmanTree() huffman_tree.train(morse_codes) print huffman_tree.translate(preprocess(sentence))
def predict(clf, file_path, scaler=None): data = preprocess(file_path) X = extract_all_features(data, 44100) X = np.asmatrix(X) if scaler: X = scaler.transform(X) res = clf.predict(X[:,imporved_features()])[0] return res
def submission(): """ Generate submission file for the trained models. """ logging.info('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() logging.info('Loading models weights...') model_systole.load_weights('../models/weights/weights_systole_best.hdf5') model_diastole.load_weights('../models/weights/weights_diastole_best.hdf5') # load val losses to use as sigmas for CDF with open('./logs/val_loss.txt', mode='r') as f: val_loss_systole = float(f.readline()) val_loss_diastole = float(f.readline()) logging.info('Loading validation data...') X, ids = load_validation_data() logging.info('Pre-processing images...') X = preprocess(X) batch_size = 32 logging.info('Predicting on validation data...') pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) # real predictions to CDF cdf_pred_systole = real_to_cdf(pred_systole, val_loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, val_loss_diastole) logging.info('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file logging.info('Writing submission to file...') fi = csv.reader(open('../input/sample_submission_validate.csv')) f = open('../submissions/submission_17.csv', 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(next(fi)) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: logging.info('Miss {0}'.format(idx)) fo.writerow(out) f.close() logging.info('Done.')
def tokenize(text): replacements = [("---", " "), ("--", " "), ("-", "")] # trying to capture multi-word keywords for (src, tgt) in replacements: text = text.replace(src, tgt) words = utils.preprocess(text) return filter(lambda w: w not in stops, words)
def trainAPI(): global vw, sequenceLabeler model = request.args.get("model", "tagger1.bin") N = request.args.get("iter", 10) try: data = request.args.get("data") data = data.strip() print "Traning:", data sequenceLabeler.learn(preprocess([data])) return "model trained" except: return "'data' field not present OR trainning error!!!"
def submission(): """ Generate submission file for the trained models. """ print('Loading and compiling models...') model_systole = get_vgg_model() model_diastole = get_vgg_model() print('Loading models weights...') model_systole.load_weights('weights_systole_best.hdf5') model_diastole.load_weights('weights_diastole_best.hdf5') print('Loading validation data...') X, ids = load_validation_data() print('Pre-processing images...') X = preprocess(X) batch_size = 32 print('Predicting on validation data...') pred_systole = model_systole.predict(X, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X, batch_size=batch_size, verbose=1) # real predictions to CDF cdf_pred_systole = pred_systole.cumsum(axis=-1) cdf_pred_diastole = pred_diastole.cumsum(axis=-1) print('Accumulating results...') sub_systole = accumulate_study_results(ids, cdf_pred_systole) sub_diastole = accumulate_study_results(ids, cdf_pred_diastole) # write to submission file print('Writing submission to file...') fi = csv.reader(open('/data/heart/sample_submission_test.csv')) f = open('submission.csv', 'w') fo = csv.writer(f, lineterminator='\n') fo.writerow(fi.next()) for line in fi: idx = line[0] key, target = idx.split('_') key = int(key) out = [idx] if key in sub_systole: if target == 'Diastole': out.extend(list(sub_diastole[key][0])) else: out.extend(list(sub_systole[key][0])) else: print('Miss {0}'.format(idx)) fo.writerow(out) f.close() print('Done.')
def main(): vw = [] sl = [] while True: inp = raw_input("> ") inp = inp.strip() words = inp.split() cmd = words[0] if cmd == "/save": for temp in vw: temp.finish() sys.exit(1) if cmd == "/train": data = " ".join(words[1:]).strip() for i in range(10): for temp in sl: temp.learn(preprocess([data])) elif cmd == "/query": data = " ".join(words[1:]).strip() output = set() for s in sl: output.add(postprocess(query(s, data))) for out in output: print "\t", out elif cmd == "/start": data = " ".join(words[1:]).strip() if os.path.isfile(data + ".1") and os.path.isfile(data + ".2") and os.path.isfile( data + ".3") and os.path.isfile(data + ".4"): vw = [ pyvw.vw("--quiet -i " + data + ".1 -f "+data + ".1"), pyvw.vw("--quiet -i " + data + ".2 -f "+data + ".2"), pyvw.vw("--quiet -i " + data + ".3 -f "+data + ".3"), pyvw.vw("--quiet -i " + data + ".4 -f "+data + ".4") ] else: vw = [ pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".1"), pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".2"), pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".3"), pyvw.vw("--search 3 --quiet --search_task hook --ring_size 2048 -f " + data + ".4") ] sl = [ vw[0].init_search_task(SequenceLabeler), vw[1].init_search_task(SequenceLabeler2), vw[2].init_search_task(SequenceLabeler3), vw[3].init_search_task(SequenceLabeler4) ]
def create_bb_pip(tfr, nepoch, sbatch, mean, shuffle=True): tf_mean = tf.constant(mean, dtype=tf.float32) tf_mean = tf.reshape(tf_mean, [1, 1, 1, 3]) fqueue = tf.train.string_input_producer([tfr], num_epochs=nepoch * 10) image, idx, bbx = read_single_image(fqueue, 64) data = tf.train.batch([image, idx, bbx], batch_size=sbatch, num_threads=1, capacity=sbatch * 3) # preprocess input images data[0] = preprocess(data[0], tf_mean) return data
def predict(model, img_path, device): from utils import preprocess, transform model.eval() with torch.no_grad(): in_shape = np.asarray(cv2.imread(img_path)).shape img = preprocess(img_path) fin_shape = np.asarray(img).shape img = transform(img) img = Variable(img).to(device) img = img.view(1, 1, fin_shape[0],fin_shape[1]) output = model(img) img = ((255*output.cpu().clone().detach().numpy()).squeeze().squeeze()) img = cv2.resize(img, (in_shape[1],in_shape[0]),interpolation = cv2.INTER_AREA) return img
def extract_feature(self, images, batch_size, preprocess=False, config=None, is_training=False): num_images = images.shape[0] if type(images)==np.ndarray else len(images) num_features = self.outputs.shape[1] result = np.ndarray((num_images, num_features), dtype=np.float32) for start_idx in range(0, num_images, batch_size): end_idx = min(num_images, start_idx + batch_size) inputs = images[start_idx:end_idx] if preprocess: assert config is not None inputs = utils.preprocess(inputs, config, is_training) feed_dict = {self.inputs: inputs, self.phase_train_placeholder: False, self.keep_prob_placeholder: 1.0} result[start_idx:end_idx] = self.sess.run(self.outputs, feed_dict=feed_dict) return result
def embed(self, X0, X1, X2, X3, X4): X0 = preprocess(X0) X1 = preprocess(X1) X2 = preprocess(X2) X3 = preprocess(X3) X4 = preprocess(X4) X0_latent, X1_latent, X2_latent, X3_latent, X4_latent = self.sess.run( [ self.c0_test, self.c1_test, self.c2_test, self.c3_test, self.c4_test ], feed_dict={ self.x0: X0, self.x1: X1, self.x2: X2, self.x3: X3, self.x4: X4 }) return X0_latent[:, 0, :], X1_latent[:, 0, :], X2_latent[:, 0, :], X3_latent[:, 0, :], X4_latent[:, 0, :]
def decode(self, words, lower=False): """ Return the words with tags of the given words. args: - words (list): Input words. - lower (bool, optional): If lower is True, all uppercase characters in a list \ of the words are converted into lowercase characters. return: - object : The object of the words with tags. """ if not type(words) == list: raise AssertionError("Please input a list of words.") words = [utils.preprocess(w) for w in words] postags = self._postagging(words, lower) return postags
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) try: image = np.asarray(image) # from PIL image to numpy array image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array # predict the steering angle for the image steering_angle = float(model.predict(image, batch_size=1)) # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 #steering_angle_list.append(steering_angle) #i_list.append(i+1) #df = DataFrame({'time_values': i_list, 'Steering angle': steering_angle_list}) #df.to_excel('steering_angle_data.xlsx', sheet_name='sheet1', index=False) file_object.write(str(steering_angle)) file_object.write('\n') print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: file_object.close() print(e) # save frame if args.image_folder != '': #df = DataFrame({'time_values': i_list, 'Steering angle': steering_angle_list}) #df.to_excel('steering_angle_data.xlsx', sheet_name='sheet1', index=False) timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: sio.emit('manual', data={}, skip_sid=True)
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car original_image = Image.open(BytesIO(base64.b64decode(data["image"]))) try: image = np.asarray(original_image) # from PIL image to numpy array image = utils.preprocess(image) # apply the preprocessing image = transformations(image) image = torch.Tensor(image) #image = np.array([image]) # the model expects 4D array image = image.view(1, 3, 75, 320) image = Variable(image) # predict the steering angle for the image steering_angle = model(image).view(-1).data.numpy()[0] # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed / speed_limit)**2 #throttle = controller.update(float(speed)) - 0.1 print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print("Exception") print(e) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) original_image.save('{}.jpg'.format(image_filename)) else: sio.emit('manual', data={}, skip_sid=True)
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) try: image = np.asarray(image) # from PIL image to numpy array image_copy = np.copy(image) image_copy = autoenconder_model.normalize_and_reshape(image_copy) loss = anomaly_detection.test_on_batch(image_copy, image_copy) image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array # predict the steering angle for the image steering_angle = float(model.predict(image, batch_size=1)) # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 if loss > 0.035: print('{} {} {} {} WARNING'.format(steering_angle, throttle, speed, loss)) else: print('{} {} {} {} OK'.format(steering_angle, throttle, speed, loss)) send_control(steering_angle, throttle) except Exception as e: print(e) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def classify(v): ####### training part ############### samples = np.loadtxt('generalsamples.data', np.float32) responses = np.loadtxt('generalresponses.data', np.float32) responses = responses.reshape((responses.size, 1)) model = cv2.ml.KNearest_create() model.train(samples, cv2.ml.ROW_SAMPLE, responses) ############################# testing part ######################### cap = cv2.VideoCapture(1) labels = ['S', 'U', 'H'] while (True): ret, im = cap.read() #print(str('train' + str(i)+'.jpg')) #im = cv2.imread(str('train/' + str(i)+'.jpg')) img = utils.preprocess(im) im2 = img.copy() image, contours, hierarchy = cv2.findContours(im2, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) ############################# classification ######################### for cnt in contours: area = cv2.contourArea(cnt) if utils.check_rectangle(area): [x, y, w, h] = cv2.boundingRect(cnt) if utils.check_ratio(x, y, w, h): cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 0), 2) retval, results, neigh_resp, dists = model.findNearest( utils.roismall(img, x, y, w, h), k=3) string = labels[int(results[0][0])] cv2.putText(im, string, (x + 3, y + h + 3), 0, 2, (255, 0, 0), thickness=3) cv2.imshow('im', im) #cv2.imshow('processed',img) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) img = np.asarray(image) img = utils.preprocess(img) try: # from PIL image to numpy array #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # predict the steering angle for the image img = Variable(torch.cuda.FloatTensor([img], device=device)).permute(0,3,1,2) steering_angle_throttle = model(img) #steering_angle = steering_angle_throttle[0].item() #throttle = steering_angle_throttle[1].item() steering_angle = steering_angle_throttle.item() #print(f'steering angle {steering_angle}') # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 print('sterring_angle: {} throttle: {} spped: {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print(e) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: sio.emit('manual', data={}, skip_sid=True)
def main(args): # I/O config_file = args.config_file config = utils.import_file(config_file, 'config') #trainset = utils.Dataset(config.train_dataset_path) testset = utils.Dataset(config.test_dataset_path) network = BaseNetwork() network.initialize(config, 0 ) #trainset.num_classes # Initalization for running log_dir = utils.create_log_dir(config, config_file) summary_writer = tf.summary.FileWriter(log_dir, network.graph) if config.restore_model is not None: network.restore_model(config.restore_model, config.restore_scopes) # Set up LFW test protocol and load images print('Loading images...') lfwtest = LFWTest(testset.images) lfwtest.init_standard_proto(config.lfw_pairs_file) lfwtest.images = utils.preprocess(lfwtest.image_paths, config, is_training=False) #trainset.start_batch_queue(config, True) # # Main Loop # print('\nStart Training\nname: %s\n# epochs: %d\nepoch_size: %d\nbatch_size: %d\n'\ % (config.name, config.num_epochs, config.epoch_size, config.batch_size)) global_step = 0 # Testing on LFW print('Testing on Neetis LFW protocol...') embeddings = network.extract_feature(lfwtest.images, config.batch_size) print(type(embeddings)) accuracy_embeddings, threshold_embeddings = lfwtest.test_standard_proto(embeddings) print('Embeddings Accuracy: %2.4f Threshold %2.3f' % (accuracy_embeddings, threshold_embeddings)) with open(os.path.join(log_dir,'lfw_result.txt'),'at') as f: f.write('%d\t%.5f\n' % (global_step,accuracy_embeddings)) summary = tf.Summary() summary.value.add(tag='lfw/accuracy', simple_value=accuracy_embeddings) summary_writer.add_summary(summary, global_step)
def read_data_from_file(data_path): maybe_download() with open(data_path) as f: text = f.read() ########################################################### # ------------------- Preprocessing ----------------------- # 1. Tokenize punctuations e.g. period -> <PERIOD> # 2. Remove words that show up five times or fewer words = utils.preprocess(text) # Hmm, let's take a look at the processed data print('First 30 words:', words[:30]) print('Total words:', len(words)) print('Total unique words:', len(set(words))) # Create two dictionaries to convert words to integers vocab_to_int, int_to_vocab = utils.create_lookup_tables(words) n_vocab = len(int_to_vocab) # Convert words into integers int_words = [vocab_to_int[w] for w in words] ########################################################### # ------------------- Subsampling ------------------------- # Some words like "the", "a", "of" etc don't provide much # information. So we might want to remove some of them. # This results in faster and better result. # The probability that a word is discarded is # P(w) = 1 - sqrt(1 / frequency(w)) each_word_count = Counter(int_words) total_count = len(int_words) threshold = 1e-5 # FLAGS.drop_word_threshold freqs = {word: count/total_count for word, count in each_word_count.items()} probs = {word: 1 - np.sqrt(threshold/freqs[word]) for word in each_word_count} train_words = [word for word in int_words if random.random() < (1 - probs[word])] print('After subsampling, first 30 words:', train_words[:30]) print('After subsampling, total words:', len(train_words)) # Subsampling makes it worse for eliminating contextual info # return train_words, int_to_vocab, vocab_to_int, n_vocab return int_words, int_to_vocab, vocab_to_int, n_vocab
def get_tweets(): global batch_start_time processed_tweet = [] try: for line in api.GetStreamSample(): if 'text' in line and line['lang'] == u'en': text = line['text'].encode('utf-8').replace('\n', ' ') p_t = preprocess(text) # process tweets if p_t: processed_tweet += p_t, if time.time( ) - batch_start_time >= tw * 60: # time is over for this batch return processed_tweet return processed_tweet # server-side interruption except: pass
def preprocess_clustering(text: str): text = preprocess(text) tokens = text.split(' ') doc = [] for token in tokens: if token in string.punctuation: continue if token.isnumeric(): continue if len(token) < 2: continue # lemmatize the words token = LEMMATIZER.lemmatize(token) doc.append(token) return doc
def __getitem__(self, index): _img = Image.open(self.images[index]).convert('RGB') _target = Image.open(self.masks[index]) _img, _target = preprocess(_img, _target, flip=True if self.train else False, scale=(0.5, 2.0) if self.train else None, crop=(self.crop_size, self.crop_size) if self.train else (1025, 2049)) if self.transform is not None: _img = self.transform(_img) if self.target_transform is not None: _target = self.target_transform(_target) # print(_img.shape) return _img, _target
def __init__(self, base_dir, batch_size, rst, max_size=500, normalize=True, preprocessing=True): BATCH_FILES = 4 self.base_dir = base_dir self.batch_size = batch_size self.rst = rst self.normalize = normalize self.max_size = max_size self.preprocessing = preprocessing self.x = self.get_content_images() if self.preprocessing: self.x = utils.preprocess(self.x) if normalize: self.x = utils.norm(self.x)
def read_phone_data(f_name): with open(f_name, 'r', encoding='utf-8') as f_phone: phone_data = [] phone = f_phone.readlines() phone.insert(0, '\n') for idx, line in tqdm(enumerate(phone)): if idx + 1 < len(phone) and phone[idx + 1] == '\n': continue if line == '\n': i, text = 0, '' i += 1 if i > 3: text += line if idx + 2 < len(phone) and phone[idx + 2] == '\n': label = int(phone[idx + 1].replace('\n', '')) phone_data.append({label: preprocess(text)}) return phone_data
def preprocess(self, stop_filter=True, pos_filter=True): """Preprocess document text This method can filter out basic stopwords listed in the utils file and parts of speech. :param stop_filter: stopword filter status :type stop_filter: bool :param pos_filter: parts of speech filter status :type pos_filter: bool """ self.text = ". ".join( preprocess(self.text, stop_filter=stop_filter, pos_filter=pos_filter))
def train_baseline(): train_df = pd.read_csv(train_df_path, index_col=0) sentences_train, dictionary, y_train_encoded = preprocess( train_df, processed_train_df_path, encoder) y_train = train_df['relation_type'].values vectorizer.fit(sentences_train) X_train = vectorizer.transform(sentences_train) print('training...') classifier = RandomForestClassifier(n_estimators=700, max_depth=60, n_jobs=-1, class_weight='balanced') classifier.fit(X_train, y_train) print('trained') return classifier, dictionary
def train_baseline(): train_df = pd.read_csv(train_df_path, index_col=0) sentences_train, dictionary, y_train_encoded = preprocess( train_df, processed_train_df_path, encoder) y_train = train_df['relation_type'].values vectorizer.fit(sentences_train) X_train = vectorizer.transform(sentences_train) print('training...') classifier = MLPClassifier(activation='tanh', alpha=0.1, hidden_layer_sizes=(30, 5), learning_rate='constant') classifier.fit(X_train, y_train) print('trained') return classifier, dictionary
def raw_utterance_with_keyword(self, vocab, train=True): cache = cacher('dts_ConvAI2.raw_utterance_with_keyword', vocab, train) if cache.cached: return cache.data examples, corpus, check_dict = self.raw_utterance(train) field = Field(vocab) kwext = KeywordExtractor(field) for example in prolog(examples, name=' -extract keywords'): kws = kwext.extract(example['uttr'].lst) example['kwpos'] = kws['kwpos'] example['keywords'] = kws['keywords'] examples = preprocess(examples, field, log=' -process to_pack cls') return cache.cache((examples, field, corpus, check_dict))
def synthesize_x1(self, X1_latent, parents=None): if isinstance(X1_latent, int): N = X1_latent X1_latent = np.random.uniform(size=(N, self.latent_dim[1])) X1_noise = np.random.normal(scale=0.5, size=(N, self.noise_dim[1])) else: N = X1_latent.shape[0] X1_noise = np.zeros((N, self.noise_dim[1])) if parents is None: X0 = self.synthesize_x0(1)[0] else: X0 = parents[0] X0 = preprocess(X0) X0 = np.tile(X0, (N,1,1,1)) X1 = self.sess.run(self.x1_fake, feed_dict={self.c1: X1_latent, self.z1: X1_noise, self.x0_fake: X0}) return [postprocess(X1), postprocess(X0)]
def cell_transform(xs, indexes=None): Fs = [] xs = [preprocess(x) for x in xs] for xmb in tqdm( iter_data(xs, size=hps.nbatch), ncols=80, leave=False, total=len(xs)//hps.nbatch): smb = np.zeros((2, hps.nbatch, hps.nhidden)) n = len(xmb) xmb, mmb = batch_pad(xmb, hps.nbatch, hps.nsteps) smb = sess.run(cells, {X: xmb, S: smb, M: mmb}) smb = smb[:, :n, :] if indexes is not None: smb = smb[:, :, indexes] Fs.append(smb) Fs = np.concatenate(Fs, axis=1).transpose(1, 0, 2) return Fs
def live(state_widget, model, camera, prediction_widget): global dataset while state_widget.value == 'live': image = camera.value preprocessed = preprocess(image) output = model(preprocessed).detach().cpu().numpy().flatten() category_index = dataset.categories.index(category_widget.value) x = output[2 * category_index] y = output[2 * category_index + 1] x = int(camera.width * (x / 2.0 + 0.5)) y = int(camera.height * (y / 2.0 + 0.5)) prediction = image.copy() prediction = cv2.circle(prediction, (x, y), 8, (255, 0, 0), 3) prediction_widget.value = bgr8_to_jpeg(prediction)
def read_data_from_file(data_path: str) -> tuple: """ 生成训练的词列表,以及列表的长度。 :param data_path: :return: """ maybe_download() with open(data_path) as f: text = f.read() # 将文本中的特殊标点符号用指定的字符进行替换。 words = utils.preprocess(text) print('First 30 words:', words[:30]) print('Total words:', len(words)) print('Total unique words:', len(set(words))) # 根据文本生成的单词频率进行由高到低的排序,过滤掉低频词(词出现的次数<5),生成字典id2word以及word2id。 vocab_to_int, int_to_vocab = utils.create_lookup_tables(words) n_vocab = len(int_to_vocab) # 由原来的词频进而转化成词的序列,序列通过enumerate来实现的。 int_words = [vocab_to_int[w] for w in words] ########################################################### # ------------------- Subsampling ------------------------- # Some words like "the", "a", "of" etc don't provide much # information. So we might want to remove some of them. # This results in faster and better result. # The probability that a word is discarded is # P(w) = 1 - sqrt(1 / frequency(w)) each_word_count = Counter(int_words) total_count = len(int_words) threshold = FLAGS.drop_word_threshold # 统计词频 freq_s = { word: count / total_count for word, count in each_word_count.items() } prob_s = { word: 1 - np.sqrt(threshold / freq_s[word]) for word in each_word_count } train_words = [ word for word in int_words if random.random() < (1 - prob_s[word]) ] print('After subsampling, first 30 words:', train_words[:30]) print('After subsampling, total words:', len(train_words)) return train_words, int_to_vocab, vocab_to_int, n_vocab
def detect(sess, model, names, image, path): preprocess = eval(args.preprocess) _, height, width, _ = image.get_shape().as_list() _image = read_image(path) image_original = np.array(np.uint8(_image)) image_height, image_width, _ = image_original.shape image_std = preprocess( np.array(np.uint8(_image.resize((width, height)))).astype(np.float32)) feed_dict = {image: np.expand_dims(image_std, 0)} tensors = [model.conf, model.xy_min, model.xy_max] conf, xy_min, xy_max = sess.run( [tf.check_numerics(t, t.op.name) for t in tensors], feed_dict=feed_dict) boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou) scale = [image_width / model.cell_width, image_height / model.cell_height] fig = plt.figure() ax = fig.gca() ax.imshow(image_original) colors = [ prop['color'] for _, prop in zip( names, itertools.cycle(plt.rcParams['axes.prop_cycle'])) ] cnt = 0 for _conf, _xy_min, _xy_max in boxes: index = np.argmax(_conf) if _conf[index] > args.threshold: wh = _xy_max - _xy_min _xy_min = _xy_min * scale _wh = wh * scale linewidth = min(_conf[index] * 10, 3) ax.add_patch( patches.Rectangle(_xy_min, _wh[0], _wh[1], linewidth=linewidth, edgecolor=colors[index], facecolor='none')) ax.annotate(names[index] + ' (%.1f%%)' % (_conf[index] * 100), _xy_min, color=colors[index]) cnt += 1 fig.canvas.set_window_title('%d objects detected' % cnt) ax.set_xticks([]) ax.set_yticks([]) return fig
def create_bb_pip(tfr_pool, nepoch, sbatch, mean, shuffle=True): if len(tfr_pool) == 3: ebs = [int(sbatch * 0.5), int(sbatch * 0.3), sbatch - int(sbatch * 0.5) - int(sbatch * 0.3)] elif len(tfr_pool) == 1: ebs = [sbatch] else: print("Input Format is not recognized") return data_pool = [] for ix, tfr in enumerate(tfr_pool): cur_ebs = ebs[ix] tokens = tfr.split('/')[-1].split('_') dim = int(tokens[-1].split('.')[0][1:]) tf_mean = tf.constant(mean, dtype=tf.float32) tf_mean = tf.reshape(tf_mean, [1, 1, 1, 3]) fqueue = tf.train.string_input_producer([tfr], num_epochs=nepoch) image, gt_key, gt_3d, gt_2d, occ = read_one_datum(fqueue, dim) if shuffle: data = tf.train.shuffle_batch([image, gt_key, gt_3d, gt_2d, occ], batch_size=cur_ebs, num_threads=12, capacity=sbatch * 6, min_after_dequeue=cur_ebs * 3) else: data = tf.train.batch([image, gt_key, gt_3d, gt_2d, occ], batch_size=cur_ebs, num_threads=12, capacity=cur_ebs * 5) # preprocess input images # print("data0]", data[0]) data[0] = preprocess(data[0], tf_mean) # # data[0] = preprocess_norm(data[0]) # if ix == 0: for j in range(len(data)): data_pool.append([data[j]]) else: for j in range(len(data)): data_pool[j].append(data[j]) combined_data = [] for dd in data_pool: combined_data.append(tf.concat(dd, axis=0)) # print("sanity check : combined_data", combined_data) return combined_data
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) ensembling_weight = 0.66 # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) try: image = np.asarray(image) # from PIL image to numpy array image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array global smoothed_angle # predict the steering angle for the image steering_angle1 = float(model.predict(image, batch_size=1)) # smoothed_angle += 0.2 * pow(abs((steering_angle - smoothed_angle)), 2.0 / 3.0) * (steering_angle - smoothed_angle) / abs(steering_angle - smoothed_angle) # lower the throttle as the speed increases steering_angle2 = float(model2.predict(image, batch_size=1)) steering_angle = float( (ensembling_weight * steering_angle1 + steering_angle2 * (1.0 - ensembling_weight))) # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed / speed_limit)**2 print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print(e) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: sio.emit('manual', data={}, skip_sid=True)
def __getitem__(self, idx): files = self.sample_files[idx] pre_img = cv2.imread(files['pre_img']) post_img = cv2.imread(files['post_img']) if self.rgb: pre_img = cv2.cvtColor(pre_img, cv2.COLOR_BGR2RGB) post_img = cv2.cvtColor(post_img, cv2.COLOR_BGR2RGB) if self.mode in [ 'train', 'oodtrain', 'guptatrain', "ood2train", "ood3train", "singletrain" ]: sample = self.get_sample_with_mask(files, pre_img, post_img) sample['image_id'] = files['img_id'] if self.preprocessing is not None: transformed = preprocess(sample['pre_img'], sample['post_img'], sample['mask_img'], flip=self.preprocessing['flip'], scale=self.preprocessing['scale'], crop=self.preprocessing['crop']) sample['pre_img'] = transformed[0] sample['post_img'] = transformed[1] sample['mask_img'] = transformed[2] elif self.mode in [ 'oodtest', 'oodhold', 'guptatest', 'guptahold', "ood2test", "ood2hold", "ood3test", "ood3hold", "singletest", "singlehold" ]: pre_img = self.data_transforms(pre_img) post_img = self.data_transforms(post_img) sample = { 'pre_img': pre_img, 'post_img': post_img, 'image_id': files['img_id'] } post_json = json.loads(open(files['post_json']).read()) buildings = self._get_building_from_json(post_json) sample['mask_img'] = self.make_mask_img(**buildings) else: pre_img = self.data_transforms(pre_img) post_img = self.data_transforms(post_img) sample = { 'pre_img': pre_img, 'post_img': post_img, 'image_id': files['img_id'] } return sample
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) try: image = np.asarray(image) image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array # predict the steering angle for the image steering_angle = float(model.predict(image, batch_size=1)) # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED #Calculate throttle from spped and steering angle throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print(e) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) else: sio.emit('manual', data={}, skip_sid=True)
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = float(data["steering_angle"]) # The current throttle of the car, how hard to push peddle throttle = float(data["throttle"]) # The current speed of the car speed = float(data["speed"]) # The current image from the center camera of the car image = Image.open(BytesIO(base64.b64decode(data["image"]))) # save frame if args.image_folder != '': timestamp = datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')[:-3] image_filename = os.path.join(args.image_folder, timestamp) image.save('{}.jpg'.format(image_filename)) try: image = np.asarray(image) # from PIL image to numpy array image = utils.preprocess(image) # apply the preprocessing image = np.array([image]) # the model expects 4D array # predict the steering angle for the image steering_angle = float(model.predict(image, batch_size=1)) # lower the throttle as the speed increases # if the speed is above the current speed limit, we are on a downhill. # make sure we slow down first and then go back to the original max speed. global speed_limit if speed > speed_limit: speed_limit = MIN_SPEED # slow down else: speed_limit = MAX_SPEED throttle = 1.0 - steering_angle**2 - (speed/speed_limit)**2 print('{} {} {}'.format(steering_angle, throttle, speed)) send_control(steering_angle, throttle) except Exception as e: print(e) else: sio.emit('manual', data={}, skip_sid=True)
def get_data(): """ 获取语料库 """ texts = [] subfolds = os.listdir("../Sample") for subfold in subfolds: subdir = "../Sample/{}".format(subfold) if os.path.isdir(subdir): files = os.listdir(subdir) for file in files: text = open("{}/{}".format(subdir, file)).read() text = utils.preprocess(text) text = utils.getWordlist(text) texts.append(text) # 去除只出现一次的词 frequency = defaultdict(int) for text in texts: for token in text: frequency[token] += 1 texts = [[token for token in text if frequency[token] > 1] for text in texts] return texts
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model(img_size) model_diastole = get_model(img_size) print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) # define image generator for random rotations datagen = ImageDataGenerator(featurewise_center=False, featurewise_std_normalization=False, rotation_range=15) nb_iter = 300 epochs_per_iter = 1 batch_size = 64 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max if not os.path.exists(STATS): os.makedirs(STATS) with open(STATS + 'RMSE_CRPS.txt', 'w') as f: names = ['train_RMSE_d', 'train_RMSE_s', 'test_RMSE_d', 'test_RMSE_s', 'train_crps', 'test_crps'] f.write('\t'.join([str(name) for name in names]) + '\n') print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights(MODELS + 'weights_systole.hdf5', overwrite=True) model_diastole.save_weights(MODELS + 'weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights(MODELS + 'weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights(MODELS + 'weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open(MODELS + 'val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole)) with open(STATS + 'RMSE_CRPS.txt', 'a') as f: # train_RMSE_d train_RMSE_s test_RMSE_d test_RMSE_s train_crps test_crps rmse_values = [loss_diastole, loss_systole, val_loss_diastole, val_loss_systole] crps_values = [crps_train, crps_test] f.write('\t'.join([str(val) for val in rmse_values + crps_values]) + '\n') print('Saving stats images...') write_images(STATS) if (i != 0) & ((i + 1) % 100 == 0): print('Submitting learned model....') SUBMISSION_FOLDER = SUBMISSION + preproc_type + "/" + model_name + "/" + get_name() + "_ITERS" + str(i + 1) + "/" if not os.path.exists(SUBMISSION_FOLDER): os.makedirs(SUBMISSION_FOLDER) copyfile(MODELS + 'weights_systole_best.hdf5', SUBMISSION_FOLDER + 'weights_systole_best.hdf5') copyfile(MODELS + 'weights_diastole_best.hdf5', SUBMISSION_FOLDER + 'weights_diastole_best.hdf5') copyfile(MODELS + 'val_loss.txt', SUBMISSION_FOLDER + 'val_loss.txt') os.system('python submission.py %s %s %s' % (preproc_type, model_name, SUBMISSION_FOLDER))
def main(num_epochs=500): # Load the dataset print 'Loading dataset ...' eng_para = pd.read_csv('data/2g_gongcan.csv') #eng_para = eng_para.loc[:, ['LAC', 'CI', 'Angle', 'Longitude', 'Latitude', 'Power', 'GSM Neighbor Count', 'TD Neighbor Count']] tr_feature, tr_label, tr_ids = load_dataset('data/forward_recovered.csv', eng_para, True) te_feature, te_label, te_ids = load_dataset('data/backward_recovered.csv', eng_para, False) ## !!! maybe here need to ensure train data are the same shape as test data train_size, n_con = tr_feature.shape test_size, n_con = te_feature.shape n_dis = len(tr_ids) # Create neural network model print 'Preprocessing data ...' # Standardize continous input tr_feature, te_feature = preprocess(tr_feature, te_feature) tr_input = {'con_input' : tr_feature} te_input = {'con_input' : te_feature} # Prepare embedding input dis_dims, vocab_sizes = [], [] for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_) tr_id_idx_, te_id_idx_ = [], [] dis_dim = len(tr_ids_) for i in range(dis_dim): tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i]) te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i]) tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose() te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose() ## Add discrete feature to dict tr_input['emb_input%d' % ii] = tr_ids te_input['emb_input%d' % ii] = te_ids dis_dims.append(dis_dim) vocab_sizes.append(vocab_size) print 'Building model and compiling functions ...' # Define network structure l_output = build_mlp(n_con, n_dis, dis_dims, vocab_sizes) # Set batch size bi = BatchIterator(batch_size=10) # Build network network = NeuralNet(l_output, regression=True, update_learning_rate=1e-5, update=nesterov_momentum, update_momentum=0.9, train_split=TrainSplit(eval_size=0.05), verbose=1, batch_iterator_train=bi, objective_loss_function=lasagne.objectives.squared_error, max_epochs=5000) pickle_name = 'MLP-0.10.pickle' mul_val = 10000. lon_offset = np.mean(tr_label[:, 0]) lon_std = np.mean(tr_label[:, 0]) lat_offset = np.mean(tr_label[:, 1]) lat_std = np.mean(tr_label[:, 1]) ######## Change Target tr_label[:, 0] = (tr_label[:, 0] - lon_offset) * mul_val tr_label[:, 1] = (tr_label[:, 1] - lat_offset) * mul_val tr_label = tr_label.astype(np.float32) print tr_label is_train = True if is_train: network.fit(tr_input, tr_label) # Dump Network with open('model/'+pickle_name, 'wb') as f: pickle.dump(network, f, -1) else: # Load Network f = open('model/'+pickle_name) network = pickle.load(f) # Make prediction te_pred = network.predict(te_input) te_pred[:, 0] = te_pred[:, 0] / mul_val + lon_offset te_pred[:, 1] = te_pred[:, 1] / mul_val + lat_offset f_out = open('pred.csv', 'w') for pred_pt, true_pt in zip(te_pred, te_label): f_out.write('%f,%f,%f,%f\n' % (pred_pt[0], pred_pt[1], true_pt[0], true_pt[1])) # Generate report gen_report(te_label, te_pred, pickle_name)
def mlp(tr_data, te_data, eng_para, col_name, grid_size, \ optimizer, batch_size, hidden_size, mlp_feature, \ nb_epoch, prediction, model_name, is_train): # Load the dataset print 'Loading dataset ...' tr_feature, tr_label, tr_ids = mlp_feature(tr_data, eng_para, True, col_name) te_feature, te_label, te_ids = mlp_feature(te_data, eng_para, True, col_name) rg = RoadGrid(np.vstack((tr_label, te_label)), grid_size) tr_label = rg.transform(tr_label) # te_label = rg.transform(te_label) ## !!! maybe here need to ensure train data are the same shape as test data train_size, n_con = tr_feature.shape test_size, n_con = te_feature.shape n_dis = len(tr_ids) # Create neural network model print 'Preprocessing data ...' # Standardize continous input # tr_feature, te_feature = preprocess(tr_feature, te_feature) tr_feature, te_feature = preprocess(tr_feature, te_feature) # te_feature = preprocess(te_feature) tr_input = {'con_input' : tr_feature, 'output' : tr_label} te_input = {'con_input' : te_feature} # Prepare embedding input dis_dims, vocab_sizes = [], [] for ii, tr_ids_, te_ids_ in zip(range(n_dis), tr_ids, te_ids): # make sure tr_ids contain several different discrete features vocab_size, vocab_dict = make_vocab(tr_ids_, te_ids_) tr_id_idx_, te_id_idx_ = [], [] dis_dim = len(tr_ids_) for i in range(dis_dim): tr_id_idx_ += map(lambda x: vocab_dict[x], tr_ids_[i]) te_id_idx_ += map(lambda x: vocab_dict[x], te_ids_[i]) tr_ids = np.array(tr_id_idx_, dtype=np.int32).reshape(dis_dim, train_size).transpose() te_ids = np.array(te_id_idx_, dtype=np.int32).reshape(dis_dim, test_size).transpose() ## Add discrete feature to dict tr_input['emb_input%d' % ii] = tr_ids te_input['emb_input%d' % ii] = te_ids dis_dims.append(dis_dim) vocab_sizes.append(vocab_size) print 'Building model and compiling functions ...' # Define network structure grid_info = rg.grid_center network = build_mlp(n_con, n_dis, dis_dims, vocab_sizes, len(grid_info), hidden_size) #network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=SGD(lr=1e-2, momentum=0.9, nesterov=True)) network.compile(loss={'output': 'categorical_crossentropy'}, optimizer=optimizer) # Build network # pickle_name = 'MLP-softmax-0.4.pickle' pickle_name = model_name if is_train: history = network.fit(tr_input, nb_epoch=nb_epoch, batch_size=batch_size, verbose=1) # Dump Network with open('model/'+pickle_name, 'wb') as f: pickle.dump(network, f, -1) else: # Load Network f = open('model/'+pickle_name) network = pickle.load(f) # Make prediction ## 1. weighted if prediction == 'weighted': te_pred = np.asarray(network.predict(te_input)['output']) te_pred = te_pred.dot(grid_info) # Generate report # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Weighted']) elif prediction == 'argmax': ## 2. argmax te_pred = np.asarray(network.predict(te_input)['output']) te_pred = np.argmax(te_pred, axis=1) te_pred = [grid_info[idx] for idx in te_pred] # Generate report # gen_report(te_label, te_pred, pickle_name, [type(optimizer), batch_size, hidden_size, 'Argmax']) else: te_pred = None return te_pred
def train(): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) # sigmas for predicted data, actually loss function values (RMSE) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') # save weights so they can be loaded later model_systole.save_weights('weights_systole.hdf5', overwrite=True) model_diastole.save_weights('weights_diastole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
def run(self, load_pipeline=False, train_more=False): # Load/pretrain the net if load_pipeline: # Load the processing pipeline nn_input_shape = self.get_nn_input_shape() nnet, numer, denom, scaler = utils.load_processing_pipeline( self._filename_pipeline_base, self._is_scaling_needed, nn_type=self._nn_type, nn_input_shape=nn_input_shape, nn_output_shape=self._num_event_types, num_max_training_epochs=self._num_max_training_epochs) if train_more: # Load the training data X_train_raw, labels_train = utils.load_data( data_filename=self._filename_train, signal_col_ids=self._signal_col_ids, label_col_ids=self._label_col_ids, decimation_factor=self._decimation_factor) # Preprocess the data numer, denom, scaler = utils.init_preprocessors( X_raw=X_train_raw, freq_sampling=self._freq_sampling, freq_cut_lo=self._freq_cut_lo, freq_cut_hi=self._freq_cut_hi, M_fir=self._M_fir, artifact_threshold=self._artifact_threshold) if not self._is_scaling_needed: scaler = None X_train_preproc, labels_train = utils.preprocess( X_train_raw, labels_train, tdfilt_numer=numer, tdfilt_denom=denom, # reref_channel_id=params.REREF_CHANNEL_ID, artifact_threshold=self._artifact_threshold, # power=True, # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS, scaler=scaler, window_size=self._window_size_decimated_in_samples, nn_type=self._nn_type) # Train the NN nnet = nnutils.train_nn_from_timeseries( nnet, self._nn_type, X_train_preproc, labels_train, self._window_size_decimated_in_samples, self._num_event_types, self._num_train_data_instances, plot_history=False) # Save the pipeline utils.save_processing_pipeline( nnet, self._nn_type, numer, denom, scaler) else: # If a new net is to be created # Load the training data logging.debug('%s Loading the training data...', TAG) X_train_raw, labels_train = utils.load_data( data_filename=self._filename_train, signal_col_ids=self._signal_col_ids, label_col_ids=self._label_col_ids, decimation_factor=self._decimation_factor) logging.debug('%s X_train_raw.shape: %s', TAG, str(X_train_raw.shape)) logging.debug('%s labels_train.shape: %s', TAG, str(labels_train.shape)) logging.debug('%s np.sum(labels_train, axis=0): %s', TAG, str(np.sum(labels_train, axis=0).tolist())) logging.debug('%s Training data loaded.', TAG) # Preprocess the data numer, denom, scaler = utils.init_preprocessors( X_train_raw, self._freq_sampling, self._freq_cut_lo, self._freq_cut_hi, self._M_fir, artifact_threshold=self._artifact_threshold, plot=False) if not self._is_scaling_needed: scaler = None X_train_preproc, labels_train = utils.preprocess( X_train_raw, labels_train, tdfilt_numer=numer, tdfilt_denom=denom, # reref_channel_id=params.REREF_CHANNEL_ID, artifact_threshold=self._artifact_threshold, # power=True, # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS, scaler=scaler, window_size=self._window_size_decimated_in_samples, nn_type=self._nn_type) # labels_train = labels_train # Plot the training data if self._is_plot_mode_on: logging.debug('%s Plotting the preprocessed training data... %s', TAG, self._nn_type) time_axis = np.arange(X_train_preproc.shape[0]) if 'gtec' in self._nn_type: t_from = 0 t_to = t_from + 120 * self._freq_sampling plot_cols = range(16) #plot_cols = (1, 3, 9) #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt') #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw') plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt') plt.plot(time_axis[t_from:t_to], 10.0*labels_train[t_from:t_to], linewidth=3, label='event') elif 'biosemi' in self._nn_type: t_from = 20000 t_to = t_from + 1000 * self._freq_sampling #plot_rows = (6) #plot_cols = (0, 1, 2, 3, 4, 5, 6, 7) #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt') #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw') plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt') plt.plot(time_axis[t_from:t_to], -300000.0*labels_train[t_from:t_to], linewidth=3, label='event') elif 'gal' in self._nn_type: t_from = 0 t_to = t_from + 1000 * self._freq_sampling #plot_rows = (6) plot_cols = (0, 1, 2, 3, 4, 5, 6, 7) #plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_rows, plot_cols], label='tdfilt') #plt.plot(time_axis[t_from:t_to], X_train_raw[t_from:t_to, plot_cols], label='raw') plt.plot(time_axis[t_from:t_to], X_train_preproc[t_from:t_to, plot_cols], label='tdfilt') plt.plot(time_axis[t_from:t_to], 10.0*labels_train[t_from:t_to], linewidth=3, label='event') else: logging.critical('%s Unknown source make.', TAG) plt.legend(loc='lower right') plt.show() # Init the NN nn_input_shape = self.get_nn_input_shape() nnet, _ = nnfactory.create_nn( nn_type=self._nn_type, nn_input_shape=nn_input_shape, nn_output_shape=self._num_event_types, num_max_training_epochs=self._num_max_training_epochs) # Train the NN logging.debug('%s Training the NN...', TAG) nnet = nnutils.train_nn_from_timeseries( nnet, self._nn_type, X_train_preproc, labels_train, self._window_size_decimated_in_samples, self._num_event_types, self._num_train_data_instances, plot_history=False) logging.debug('%s Training the NN finished.', TAG) # Save the pipeline utils.save_processing_pipeline( nnet, self._nn_type, numer, denom, scaler) # Load the test data logging.debug('%s Loading the test data...', TAG) X_test_raw, labels_test = utils.load_data( data_filename=self._filename_test, signal_col_ids=self._signal_col_ids, label_col_ids=self._label_col_ids, decimation_factor=self._decimation_factor) if self._is_runtest_mode_on: X_test_raw = X_test_raw[0:X_test_raw.shape[0]/4] labels_test = labels_test[0:labels_test.shape[0]/4] # Pre-process the test data if not self._is_scaling_needed: scaler = None X_test_preproc, labels_test = utils.preprocess( X_test_raw, labels_test, tdfilt_numer=numer, tdfilt_denom=denom, #reref_channel_id=params.REREF_CHANNEL_ID, artifact_threshold=self._artifact_threshold, #power=True, #mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS, scaler=scaler, window_size=self._window_size_decimated_in_samples, nn_type=self._nn_type) #X_test_preproc = X_test_preproc[0:40000, :] #labels_test = labels_test[0:40000, :] # Dummy set for testing #X_test_preproc = X_train = np.tile(np.reshape(labels_test[:, 0], (labels_test.shape[0], 1)), [1, params.NUM_CHANNELS]) # Test the net batch_iter_test_valid = TimeSeriesBatchIterator( data=X_test_preproc, labels=None, nn_type=self._nn_type, window_size_samples=self._window_size_decimated_in_samples, nn_output_shape=self._num_event_types, batch_size=params.BATCH_SIZE) nnet.batch_iterator_train = None nnet.batch_iterator_test = batch_iter_test_valid indices_test = np.arange(X_test_preproc.shape[0]) logging.debug('%s Testing the net...', TAG) utils.log_timestamp() predictions = nnet.predict_proba(indices_test) utils.log_timestamp() logging.debug('%s Predictions size: %d, %d', TAG, predictions.shape[0], predictions.shape[1]) logging.debug('%s np.sum(predictions): %f', TAG, np.sum(predictions)) # Find the thresholds #tpr_targets = (0.5, 0.5, 0.5, 0.5, 0.5, 0.5) tpr_targets = (0.5, 0.5) p_thresholds = utils.calculate_auroc( labels_test, predictions, self._event_name_list, tpr_targets, self._nn_type, plot=self._is_plot_mode_on) #p_thresholds = (0.1, 0.1, 0.1) logging.debug('%s p_thresholds: %s', TAG, str(p_thresholds)) # Simulate control signal if self._is_control_simulation_on: TimeSeriesProcessor.create_control_signal( labels_test, predictions, p_thresholds, self._nn_type);
''' setting parameters according to the README ''' prob = svm_problem(train_labels, train_images) param = svm_parameter('-q') param_best = svm_parameter('-c 32 -g 0.0078125 -q') param_linear = svm_parameter('-t 0 -q') param_poly = svm_parameter('-t 1 -g 1 -q') param_rbf = svm_parameter('-g 0.0078125 -q') model = svm_train(prob, param) """ ''' precompute-kernel in generate by precompute-kernel.py ''' pre_train_labels, pre_train_images = svm_read_problem('../../../lab5/data/precompute-kernel-train') pre_test_labels, pre_test_images = svm_read_problem('../../../lab5/data/precompute-kernel-test') print('File loaded') prob_pre = svm_problem(pre_train_labels, pre_train_images, isKernel=True) param_pre = svm_parameter('-t 4') model = svm_train(prob_pre, param_pre) """ ''' get support vectors ''' n = model.get_sv_indices() n = [i-1 for i in n] ''' draw support vectors and dots in 2D space with PCA ''' images, labels = preprocess(path='../../../lab5/data/') pca(images, labels, special=n)
# preprocess the training text from utils import preprocess with open("raw.txt") as fin: text = fin.read() print preprocess(text)
def cursor_func( freq_sampling, num_signal_channels, num_event_types, window_size_in_samples): logging.debug('%s cursor_func(.) entered.', TAG) len_padding = 5 * freq_sampling cursor_radius = 26 w = 2 * math.pi / 10 # Initialize the time-domain filter #numer, denom = get_time_domain_filters(8.0, 12.0, 0.5) # Init the NN if is_control_mode: filename_base = '../models/MIBBCI_NN_medium_bestsofar' filename_nn = filename_base + '.npz' nnet = nnutils.load_nn(nnutils.create_nn_medium, filename_nn) # Init the preproc stuff if is_control_mode: filename_p = filename_base + '.p' scaler = cPickle.load(open(filename_p, 'rb')) print 'Loaded scaler.mean_, scaler.var_:', scaler.mean_, scaler.var_ # Init graphics win = graphics.GraphWin('Cursor', params.IMAGE_W, params.IMAGE_H) cursor = graphics.Circle(graphics.Point(params.IMAGE_W/2, params.IMAGE_H/2), cursor_radius) cursor.setFill(graphics.color_rgb(params.CURSOR_COLOR_REST[0], params.CURSOR_COLOR_REST[1], params.CURSOR_COLOR_REST[2])) cursor.setOutline(graphics.color_rgb(params.CURSOR_COLOR_REST[0], params.CURSOR_COLOR_REST[1], params.CURSOR_COLOR_REST[2])) cursor.draw(win) cursor_pos_prev = np.array([params.IMAGE_W/2, params.IMAGE_H/2]) cursor_pos = cursor_pos_prev # Init event labels event_arr_right = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types)) event_arr_right[:, params.EVENT_ID_RH] = np.ones(params.LEN_DATA_CHUNK_READ) event_arr_left = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types)) event_arr_left[:, params.EVENT_ID_LH] = np.ones(params.LEN_DATA_CHUNK_READ) event_arr_idle = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types)) event_arr_idle[:, params.EVENT_ID_IDLE] = np.ones(params.LEN_DATA_CHUNK_READ) #event_arr_calib = np.zeros((params.LEN_DATA_CHUNK_READ, num_event_types)) #event_arr_calib[:, 3] = np.ones(params.LEN_DATA_CHUNK_READ) cursor_event_list = [] cursor_color_arr_raw = np.zeros((int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ), 3)) color_counter = 0 for i in range(int(params.LEN_IDLE_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)): cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_IDLE cursor_event_list.append(event_arr_idle) # r, l, idle, calib color_counter += 1 for i in range(int(params.LEN_RIGHT_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)): cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_RIGHT cursor_event_list.append(event_arr_right) color_counter += 1 for i in range(int(params.LEN_IDLE_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)): cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_IDLE cursor_event_list.append(event_arr_idle) color_counter += 1 for i in range(int(params.LEN_LEFT_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)): cursor_color_arr_raw[color_counter, :] = params.CURSOR_COLOR_LEFT cursor_event_list.append(event_arr_left) color_counter += 1 conv_window = np.ones((params.LEN_COLOR_CONV_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ, 1))\ / (1 * int(params.LEN_COLOR_CONV_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)) cursor_color_arr_ud = np.flipud(cursor_color_arr_raw) cursor_color_arr_ud_convd = signal.convolve(cursor_color_arr_ud.T, conv_window.T).T cursor_color_arr_final = np.flipud(cursor_color_arr_ud_convd[0:cursor_color_arr_raw.shape[0], :]) if False: plt.figure() plt.plot(cursor_color_arr_raw) #plt.plot(cursor_color_arr_ud[:, 0]) #plt.plot(cursor_color_arr_ud_convd[:, 0]) plt.plot(cursor_color_arr_final) #plt.legend(['raw', 'ud', 'ud_convd', 'final']) plt.show() # Initialize the amplifier if not is_simulation_mode: print 'Initializing the amp...' recorder = Recorder('lslamp', freq_sampling, params.LEN_REC_BUF_SEC, num_signal_channels) thread_rec = threading.Thread(target=recorder.record) thread_rec.start() # Cursor control loop X_raw_buf_live = np.zeros((int(freq_sampling*params.LEN_REC_BUF_SEC), num_signal_channels)) label_buf_live = np.zeros((int(freq_sampling*params.LEN_REC_BUF_SEC), num_event_types)) counter = 0 #while True: while counter < (params.LEN_REC_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ): print 'counter: ', counter # Clear the canvas win.delete('all') if not is_simulation_mode: # Wait for new data and get it data_last_chunk = recorder.get_new_data(params.LEN_DATA_CHUNK_READ, params.AMP_WAIT_SEC) recorder.acknowledge_new_data() print 'recorder.new_data_counter:', recorder.new_data_counter else: time.sleep(1.0 / (freq_sampling/params.LEN_DATA_CHUNK_READ)) data_last_chunk = 1000.0 * np.random.rand(int(params.LEN_DATA_CHUNK_READ), num_signal_channels) #print 'Random data_last_chunk size:', data_last_chunk # Insert the new sample into our time series i_row_lb = int((counter+len_padding)*params.LEN_DATA_CHUNK_READ) i_row_ub = int((counter+len_padding+1)*params.LEN_DATA_CHUNK_READ) X_raw_buf_live[i_row_lb:i_row_ub, :] = data_last_chunk #print 'data_last_chunk:', data_last_chunk label_buf_live[i_row_lb:i_row_ub, :]\ = cursor_event_list[counter % int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)] # Calculating cursor step i_row_ub = int((counter+len_padding+1)*params.LEN_DATA_CHUNK_READ) i_row_lb = i_row_ub - int(window_size_in_samples) if i_row_lb >= 0: #print 'i_row_lb, i_row_ub:', i_row_lb, i_row_ub #print 'X_raw_buf_live[i_row_lb:i_row_ub, :].shape:', X_raw_buf_live[i_row_lb:i_row_ub, :].shape if is_control_mode: X_window = utils.preprocess(X_raw_buf_live[i_row_lb:i_row_ub, :], scaler) X_in = TimeSeriesBatchIterator.create_X_instance(X_window, conv_dim=1) X_in = X_in.reshape(1, X_in.shape[0], X_in.shape[1]) #print 'X_window.shape:', X_window.shape #print 'X_in.shape:', X_in.shape cursor_step = calc_cursor_step(nnet, X_in.astype(np.float32)) else: #X_window = X_raw_buf_live[i_row_lb:i_row_ub, :] cursor_step = 0 cursor_pos = cursor_pos_prev + np.array([cursor_step, 0]) #print 'cursor_pos: ', cursor_pos else: cursor_pos = cursor_pos_prev cursor_pos_point = graphics.Point(cursor_pos[0], cursor_pos[1]) cursor_pos_prev = cursor_pos cursor = graphics.Circle(cursor_pos_point, cursor_radius) color_temp = cursor_color_arr_final[counter % int(params.LEN_PERIOD_SEC * freq_sampling / params.LEN_DATA_CHUNK_READ)] cursor.setFill(graphics.color_rgb(color_temp[0], color_temp[1], color_temp[2])) cursor.setOutline(graphics.color_rgb(color_temp[0], color_temp[1], color_temp[2])) cursor.draw(win) counter += 1 # End of if # End of while # Stop recording recorder.stop_recording() # Close the window win.close() # Cut the padding from the data i_row_lb = int(len_padding * params.LEN_DATA_CHUNK_READ) i_row_ub = int((counter+len_padding)*params.LEN_DATA_CHUNK_READ) X_raw_buf_cut = X_raw_buf_live[i_row_lb:i_row_ub, :] label_buf_cut = label_buf_live[i_row_lb:i_row_ub, :] # Save data to file time_axis = np.arange(X_raw_buf_cut.shape[0]).reshape((X_raw_buf_cut.shape[0], 1)) print 'time_axis.shape:', time_axis.shape data_merged = np.concatenate((time_axis, X_raw_buf_cut, label_buf_cut), axis=1) print 'data_merged.shape: ', data_merged.shape time_save = datetime.now() np.savetxt('../data/MIBBCI_REC_{0}Hz_{1}{2:02}{3:02}_{4:02}h{5:02}m{6:02}s_RAW.csv'.format( int(freq_sampling), time_save.year, time_save.month, time_save.day, time_save.hour, time_save.minute, time_save.second), X=data_merged, fmt='%.8f', delimiter=",", header='time, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, red, blue, idle', comments='') print 'cursor_func(.) terminates.'
y = "year" aff = "affiliation" affn = "aff_nullable" numa = "number_of_authors" coa = "coauthors" cit = "cit_count" s = json.load(open(config_file)) conn_str = ('driver=%s; server=%s; uid=%s; pwd=%s; db=%s' % (s['driver'], s['server'], s['uid'], s['pwd'], s['db'])) conn = pyodbc.connect(conn_str) """ make the pairs """ df = pd.io.sql.read_sql(select1, conn) df[ab] = df[ab].map(lambda s : " ".join([utils.preprocess(x) for x in json.loads(s).itervalues()])) df[ti] = df[ti].map(lambda x: utils.preprocess(x)) # see # http://stackoverflow.com/questions/13446480/python-pandas-remove-entries-based-on-the-number-of-occurrences#comment18556837_13447176 # for a better way? counts = df.groupby(un).size() counts = counts[counts != 1] df = df[df[un].isin(counts.index.values)] cursor = conn.cursor() df[coa] = df.apply(lambda x: utils.query_coauths(cursor, int(x[pmid]), int(x[id])), axis=1)['pmid'] cursor.close() ungroup = df.groupby(un) bases_idx = [] matches_idx = []
def main(): # parse arguments args=parse_args() if args.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "" sess = tf.Session() if args.is_train: # read tfrecord files glob_pattern=os.path.join(args.dataset_dir, '*.tfrecord') tfrecords_list = glob.glob(glob_pattern) # check dirs if not os.path.exists(args.checkpoint_dir): os.makedirs(args.checkpoint_dir) if not os.path.exists(args.logs_dir): os.makedirs(args.logs_dir) model=MobileNetV2(sess=sess, tf_files=tfrecords_list, num_sampes=args.num_samples, epoch=args.epoch, batch_size=args.batch_size, image_height=args.image_height, image_width=args.image_width, n_classes=args.n_classes, is_train=args.is_train, learning_rate=args.learning_rate, lr_decay=args.lr_decay,beta1=args.beta1, chkpt_dir=args.checkpoint_dir, logs_dir=args.logs_dir, model_name=args.model_name, rand_crop=args.rand_crop) model._build_train_graph() model._train() else: model=MobileNetV2(sess=sess, tf_files='', num_sampes=args.num_samples, epoch=args.epoch, batch_size=args.batch_size, image_height=args.image_height, image_width=args.image_width, n_classes=args.n_classes, is_train=args.is_train, learning_rate=args.learning_rate, lr_decay=args.lr_decay,beta1=args.beta1, chkpt_dir=args.checkpoint_dir, logs_dir=args.logs_dir, model_name=args.model_name, rand_crop=args.rand_crop) model._build_test_graph() saver=tf.train.Saver() ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) saver.restore(sess, os.path.join(args.checkpoint_dir, ckpt_name)) print("[*] Success to read {}".format(ckpt_name)) else: print("[*] Failed to find a checkpoint") return # get input and output tensors from graph graph = tf.get_default_graph() input_x = graph.get_tensor_by_name("input:0") input_y = graph.get_tensor_by_name("label:0") prob = graph.get_tensor_by_name("mobilenetv2/prob:0") # prepare eval/test data and label img=imread('data/test/t_1_0.jpeg') img = imresize(img, (args.image_height, args.image_width)) img=preprocess(img) print(img.dtype) label=1 feed_dict={input_x:[img],input_y:[label]} # use [], because we need 4-D tensor start=time.time() res=sess.run(prob, feed_dict=feed_dict)[0] # index 0 for batch_size print('prob: {}, class: {}'.format(res, np.argmax(res))) print('time: {}'.format(time.time()-start))
def train(train_prefix_dir="/data/heart"): """ Training systole and diastole models. """ print('Loading and compiling models...') model_systole = get_vgg_model() model_diastole = get_vgg_model() print('Loading training data...') X, y = load_train_data(train_prefix_dir) print('Pre-processing images...') X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 # calculate CRPS every n-th iteration (set to 0 if CRPS estimation is not needed) # remember min val. losses (best iterations), used as sigmas for submission min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) # Create Image Augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) height_shift_range=0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=True) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) # Create model checkpointers for systole and diastole systole_checkpointer_best = ModelCheckpoint(filepath="weights_systole_best.hdf5", verbose=1, save_best_only=True) diastole_checkpointer_best = ModelCheckpoint(filepath="weights_diastole_best.hdf5", verbose=1, save_best_only=True) systole_checkpointer = ModelCheckpoint(filepath="weights_systole.hdf5", verbose=1, save_best_only=False) diastole_checkpointer = ModelCheckpoint(filepath="weights_diastole.hdf5", verbose=1, save_best_only=False) # Create 600-dimentional y cdfs from observations y_syst_train = np.array([(i < np.arange(600)) for i in y_train[:, 0]], dtype=np.uint8) y_syst_test = np.array([(i < np.arange(600)) for i in y_test[:, 0]], dtype=np.uint8) y_diast_train = np.array([(i < np.arange(600)) for i in y_train[:, 1]], dtype=np.uint8) y_diast_test = np.array([(i < np.arange(600)) for i in y_test[:, 1]], dtype=np.uint8) print('Fitting Systole Shapes') hist_systole = model_systole.fit_generator(datagen.flow(X_train, y_syst_train, batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_syst_test), callbacks=[systole_checkpointer, systole_checkpointer_best], nb_worker=1) print('Fitting Diastole Shapes') hist_diastole = model_diastole.fit_generator(datagen.flow(X_train, y_diast_train, batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_iter, show_accuracy=False, validation_data=(X_test, y_diast_test), callbacks=[diastole_checkpointer, diastole_checkpointer_best], nb_worker=1) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) # CDF for train and test data (actually a step function) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) # CDF for predicted data cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) # evaluate CRPS on training data crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) # evaluate CRPS on test data crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) # save best (lowest) val losses in file (to be later used for generating submission) with open('val_loss.txt', mode='w+') as f: f.write(str(min(hist_systole.history['val_loss']))) f.write('\n') f.write(str(min(hist_diastole.history['val_loss']))) """
def train(): "training ONE model for systole and diastole " print('Loading and compiling models...') model_systole = get_model1() print('Loading models weights...') model_systole.load_weights('weights_systole_best.hdf5') print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') # denoising filter X = preprocess(X) # split to training and test X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) # save test subset with open('y_test.txt', mode='w+') as f: f.write(str(y_test)) f.write('\n') nb_iter = 20 epochs_per_iter = 1 batch_size = 32 # if too small, will converge to unreliable models # if too big, it wont fit into memory calc = 4 # Every n-th iteration (0 if not needed) # remember min val. losses (best iterations) min_val_loss_systole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Fitting diastole/systole model...') hist_systole = model_systole.fit(X_train, y_train[:, :], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, :])) # loss function values (RMSE) loss_last = hist_systole.history['loss'][-1] # one number for the iter val_loss_last = hist_systole.history['val_loss'][-1] # since hist_systole.history['loss'] returns an array # pick the last value with [-1] loss = hist_systole.history['loss'][:] # all iter val_loss = hist_systole.history['val_loss'][:] with open('loss_last.txt', mode='a') as f: f.write(str(loss_last)) f.write('\n') with open('val_loss_last.txt', mode='a') as f: f.write(str(val_loss_last)) f.write('\n') with open('loss.txt', mode='a') as f: f.write(str(loss)) f.write('\n') with open('val_loss.txt', mode='a') as f: f.write(str(val_loss)) f.write('\n') # usually accuracy = correct predictions / total predictions # using RMSE as a loss function, means if value of loss function is 20 # - that is an indicator the model usually misses the true value by ~20ml if calc > 0 and i % calc == 0: print('Getting predictions...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) # npy 1283 x 2 val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) # npy 320 x 2 # save predictions with open('pred_systole.txt', mode='a') as f: f.write(str(pred_systole)) f.write('\n') with open('val_pred_systole.txt', mode='a') as f: f.write(str(val_pred_systole)) f.write('\n') # save weights so they can be loaded later print('Saving weights...') model_systole.save_weights('weights_systole.hdf5', overwrite=True) # for best (lowest) val losses, save weights if val_loss_last < min_val_loss_systole: min_val_loss_systole = val_loss_last model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) # save best (lowest) val losses in file (to be later used for submission) with open('min_val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n')
def train(sequenceLabeler, data): sequenceLabeler.learn(preprocess([data]))
def predict_box(box_path, model_path, out_path, *, box_dset="/box", epoch=None, verbose=True, overwrite=False, save_confmaps=False): """ Predict and save peak coordinates for a box. :param box_path: path to HDF5 file with box dataset :param model_path: path to Keras weights file or run folder with weights subfolder :param out_path: path to HDF5 file to save results to :param box_dset: name of HDF5 dataset containing box images :param epoch: epoch to use if run folder provided instead of Keras weights file :param verbose: if True, prints some info and statistics during procesing :param overwrite: if True and out_path exists, file will be overwritten :param save_confmaps: if True, saves the full confidence maps as additional datasets in the output file (very slow) """ if verbose: print("model_path:", model_path) # Find model weights model_name = None weights_path = model_path if os.path.isdir(model_path): model_name = os.path.basename(model_path) weights_paths, epochs, val_losses = find_weights(model_path) if epoch is None: weights_path = weights_paths[np.argmin(val_losses)] elif epoch == "final": weights_path = os.path.join(model_path, "final_model.h5") else: weights_path = weights_paths[epoch] # Input data box = h5py.File(box_path,"r")[box_dset] num_samples = box.shape[0] if verbose: print("Input:", box_path) print("box.shape:", box.shape) # Create output path if out_path[-3:] != ".h5": if model_name is None: out_path = os.path.join(out_path, os.path.basename(box_path)) else: out_path = os.path.join(out_path, model_name, os.path.basename(box_path)) os.makedirs(os.path.dirname(out_path), exist_ok=True) model_name = os.path.basename(model_path) if verbose: print("Output:", out_path) t0_all = time() if os.path.exists(out_path): if overwrite: os.remove(out_path) print("Deleted existing output.") else: print("Error: Output path already exists.") return # Load and prepare model model = keras.models.load_model(weights_path) model_peaks = convert_to_peak_outputs(model, include_confmaps=save_confmaps) if verbose: print("weights_path:", weights_path) print("Loaded model: %d layers, %d params" % (len(model.layers), model.count_params())) # Load data and preprocess (normalize) t0 = time() X = preprocess(box[:]) if verbose: print("Loaded [%.1fs]" % (time() - t0)) # Evaluate t0 = time() if save_confmaps: Ypk, confmaps = model_peaks.predict(X) # Quantize confmaps_min = confmaps.min() confmaps_max = confmaps.max() confmaps = (confmaps - confmaps_min) / (confmaps_max - confmaps_min) confmaps = (confmaps * 255).astype('uint8') # Reshape confmaps = np.transpose(confmaps, (0, 3, 2, 1)) else: Ypk = model_peaks.predict(X) prediction_runtime = time() - t0 if verbose: print("Predicted [%.1fs]" % prediction_runtime) # Save t0 = time() with h5py.File(out_path, "w") as f: f.attrs["num_samples"] = num_samples f.attrs["img_size"] = X.shape[1:] f.attrs["box_path"] = box_path f.attrs["box_dset"] = box_dset f.attrs["model_path"] = model_path f.attrs["weights_path"] = weights_path f.attrs["model_name"] = model_name ds_pos = f.create_dataset("positions_pred", data=Ypk[:,:2,:].astype("int32"), compression="gzip", compression_opts=1) ds_pos.attrs["description"] = "coordinate of peak at each sample" ds_pos.attrs["dims"] = "(sample, [x, y], joint) === (sample, [column, row], joint)" ds_conf = f.create_dataset("conf_pred", data=Ypk[:,2,:].squeeze(), compression="gzip", compression_opts=1) ds_conf.attrs["description"] = "confidence map value in [0, 1.0] at peak" ds_conf.attrs["dims"] = "(sample, joint)" if save_confmaps: ds_confmaps = f.create_dataset("confmaps", data=confmaps, compression="gzip", compression_opts=1) ds_confmaps.attrs["description"] = "confidence maps" ds_confmaps.attrs["dims"] = "(sample, channel, width, height)" ds_confmaps.attrs["range_min"] = confmaps_min ds_confmaps.attrs["range_max"] = confmaps_max total_runtime = time() - t0_all f.attrs["total_runtime_secs"] = total_runtime f.attrs["prediction_runtime_secs"] = prediction_runtime if verbose: print("Saved [%.1fs]" % (time() - t0)) print("Total runtime: %.1f mins" % (total_runtime / 60)) print("Performance: %.3f FPS" % (num_samples / total_runtime))
exit(1) if __name__ == '__main__': if len(sys.argv) < 3: _usage(sys.argv) action = sys.argv[1] k = 0 if action not in _ACTIONS: _usage(sys.argv) try: k = int(sys.argv[2]) except ValueError: _usage(sys.argv) if action == 'preprocess': preprocess(_DATA_FILE, encoding=_ENCODING, k=k) if action == 'graph': print("Loading graphs...") with open('data/graphs_%d.dat' % k, 'rb') as f: graphs = pickle.loads(f.read()) while True: try: index = int(input('Enter note number (ctrl+d to end program): ')) g = graphs[index] print("Writing image to out.png...") g.draw('out.png') print("Done") print() except (ValueError, KeyError): continue except (KeyboardInterrupt, EOFError):
# Load the pipeline filename_base = '../models/MIBBCI_NN_20160406_14h17m59s' nnet, numer, denom, scaler = utils.load_pipeline(filename_base) if is_net_to_train_more: # Load the training data X_train_raw, labels_train = utils.load_data(data_filename_train_list) # Preprocess the data numer, denom, scaler = utils.init_preprocessors(X_train_raw) X_train_preproc, labels_train = utils.preprocess( X_train_raw, labels_train, decimation_factor=params.DECIMATION_FACTOR_PREPROC, tdfilt_numer=numer, tdfilt_denom=denom, # reref_channel_id=params.REREF_CHANNEL_ID, # power=True, # mov_avg_window_size=params.MOVING_AVG_WINDOW_SIZE_SECS, scaler=scaler) # labels_train = labels_train # Epoch the data X_epoch_list_train_rh = utils.create_epochs( X_train_preproc, labels_train[:, 0], params.EPOCH_OFFSET_SAMPLES) X_epoch_list_train_lh = utils.create_epochs( X_train_preproc, labels_train[:, 1], params.EPOCH_OFFSET_SAMPLES) X_epoch_list_train_all = [] X_epoch_list_train_all.extend(X_epoch_list_train_rh) X_epoch_list_train_all.extend(X_epoch_list_train_lh) label_list_train_all = [] label_list_train_all.extend([1.0, 0.0] * len(X_epoch_list_train_rh))
def train(): print('Loading and compiling models...') model_systole = get_model() model_diastole = get_model() print('Loading training data...') X, y = load_train_data() print('Pre-processing images...') X = preprocess(X) X_train, y_train, X_test, y_test = split_data(X, y, split_ratio=0.2) nb_iter = 200 epochs_per_iter = 1 batch_size = 32 calc_crps = 1 min_val_loss_systole = sys.float_info.max min_val_loss_diastole = sys.float_info.max print('-'*50) print('Training...') print('-'*50) for i in range(nb_iter): print('-'*50) print('Iteration {0}/{1}'.format(i + 1, nb_iter)) print('-'*50) print('Augmenting images - rotations') X_train_aug = rotation_augmentation(X_train, 15) print('Augmenting images - shifts') X_train_aug = shift_augmentation(X_train_aug, 0.1, 0.1) print('Fitting systole model...') hist_systole = model_systole.fit(X_train_aug, y_train[:, 0], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 0])) print('Fitting diastole model...') hist_diastole = model_diastole.fit(X_train_aug, y_train[:, 1], shuffle=True, nb_epoch=epochs_per_iter, batch_size=batch_size, validation_data=(X_test, y_test[:, 1])) loss_systole = hist_systole.history['loss'][-1] loss_diastole = hist_diastole.history['loss'][-1] val_loss_systole = hist_systole.history['val_loss'][-1] val_loss_diastole = hist_diastole.history['val_loss'][-1] if calc_crps > 0 and i % calc_crps == 0: print('Evaluating CRPS...') pred_systole = model_systole.predict(X_train, batch_size=batch_size, verbose=1) pred_diastole = model_diastole.predict(X_train, batch_size=batch_size, verbose=1) val_pred_systole = model_systole.predict(X_test, batch_size=batch_size, verbose=1) val_pred_diastole = model_diastole.predict(X_test, batch_size=batch_size, verbose=1) cdf_train = real_to_cdf(np.concatenate((y_train[:, 0], y_train[:, 1]))) cdf_test = real_to_cdf(np.concatenate((y_test[:, 0], y_test[:, 1]))) cdf_pred_systole = real_to_cdf(pred_systole, loss_systole) cdf_pred_diastole = real_to_cdf(pred_diastole, loss_diastole) cdf_val_pred_systole = real_to_cdf(val_pred_systole, val_loss_systole) cdf_val_pred_diastole = real_to_cdf(val_pred_diastole, val_loss_diastole) crps_train = crps(cdf_train, np.concatenate((cdf_pred_systole, cdf_pred_diastole))) print('CRPS(train) = {0}'.format(crps_train)) crps_test = crps(cdf_test, np.concatenate((cdf_val_pred_systole, cdf_val_pred_diastole))) print('CRPS(test) = {0}'.format(crps_test)) print('Saving weights...') model_systole.save_weights('weights_systole.hdf5', overwrite=True) model_diastole.save_weights('weights_diastole.hdf5', overwrite=True) if val_loss_systole < min_val_loss_systole: min_val_loss_systole = val_loss_systole model_systole.save_weights('weights_systole_best.hdf5', overwrite=True) if val_loss_diastole < min_val_loss_diastole: min_val_loss_diastole = val_loss_diastole model_diastole.save_weights('weights_diastole_best.hdf5', overwrite=True) with open('val_loss.txt', mode='w+') as f: f.write(str(min_val_loss_systole)) f.write('\n') f.write(str(min_val_loss_diastole))
labels.append(label) cluster[label] += d.real count[label] += 1 print('Each cluster:', count) ''' Update centers and check whether it has converged ''' cluster = [cluster[i] / float(count[i]) for i in range(K)] if np.array_equal(center, cluster): break center = cluster return labels print('Fetching data ...') t = time() X_train, T_train = preprocess() table = dot_table(X_train, X_train, 'dot_table.npy') print('Time:', time() - t) if LOAD and os.path.isfile('w.npy') and os.path.isfile('v.npy'): print('Loading eigenvector ...') t = time() w = np.load(open('w.npy', 'rb')) v = np.load(open('v.npy', 'rb')) print('Time:', time() - t) else: kernel = {'linear':linear, 'rbf':rbf, 'linearbf':linearbf}[KERNEL] print('Generating L ...') t = time() N = len(table)
def getSVC(df, random_split=None): X, Y = to_array(df.drop("validation", axis=1)) scaler = StandardScaler() X = scaler.fit_transform(X) tr_ind = df[df["validation"]==0].index.values.astype(int) val_ind = df[df["validation"]==1].index.values.astype(int) custom_CV_iterator = [(tr_ind, val_ind)] print("Create a Random Forest Classifier") print("__Parameter searching...") # TODOs: cross-validation for best hyper parameter clf = GridSearchCV(SVC(probability=False), param_grid=TUNED_PARAMS, scoring='roc_auc', n_jobs=10, verbose=5, cv=custom_CV_iterator ) clf.fit(X, Y) print("Best score: {}".format(clf.best_score_)) print("Best parameters: {}".format(clf.best_params_)) return clf, scaler if __name__ == "__main__": output_fname = sys.argv[1] df, test_df = preprocess() model, scaler = getSVC(df) write_ans(model, test_df, ofname=output_fname, scaler=scaler)