def getFeature(self, ori_q,rel_q): ori_q[0]=preprocess(ori_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) ori_q[1]=preprocess(ori_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) word2vec_q_subject=self.getWordVectorFeatures(ori_q[0]) word2vec_q_body=self.getWordVectorFeatures(ori_q[1]) word2vec_rel_q_subject=self.getWordVectorFeatures(rel_q[0]) word2vec_rel_q_body=self.getWordVectorFeatures(rel_q[1]) subject=np.concatenate((word2vec_q_subject*word2vec_rel_q_subject, np.abs(word2vec_q_subject-word2vec_rel_q_subject)),axis=0) body=np.concatenate((word2vec_q_body*word2vec_rel_q_body, np.abs(word2vec_q_body-word2vec_rel_q_body)),axis=0) ''' subject_dist=self.dist(ori_q[0], rel_q[0]) body_dist=self.dist(ori_q[1], rel_q[1]) return np.array([subject_dist,body_dist]).T ''' return np.concatenate((subject, body,),axis=0).T
def gen_train_sample(im): # train.classifierclassifier.load('svm_class.xml') img = pp.preprocess(im.copy()) # img,rot = pp.skew_correction(img) hight,width=im.shape # M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1) # im = cv2.warpAffine(im,M,(width,hight)) # cv2.imwrite('skew correct.png',im) contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) contours = [] for cnt in contours2: print (cv2.contourArea(cnt)) if(cv2.contourArea(cnt)>20): contours.append(cnt) X = [cv2.contourArea(C) for C in contours] # print len(contours),len(X) t=[i for i in range (0,len(contours))] X,t = zip(*sorted(zip(X,t))) i=0 for j in t: x,y,w,h=cv2.boundingRect(contours[j]) box = im[y-1:y+h+1,x-1:x+w+1] char = pp.preprocess(box.copy()) try: f = train.find_feature(char) fu= train.np.array(f,train.np.float32) # print len(fu) t = train.classifier.predict(fu) print t except IndexError: t = 0 cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box) # cv2.imwrite('./samp/'+str(i)+'.png',box) i+=1
def export_sentences_for_giza(self, lang1, lang2, stream1, stream2, use_metaphone=True): for row in self.rows: if lang1 in row and lang2 in row: print >> stream1, \ preprocess(row[lang1], use_metaphone).encode('utf-8') print >> stream2, \ preprocess(row[lang2], use_metaphone).encode('utf-8')
def preprocess_data(self): print "Preprocessing Data" self.train_dataframe = preprocess.preprocess(self.train_dataframe) self.test_dataframe = preprocess.preprocess(self.test_dataframe) # Add dummy column to test dataframe to match dimensions # Quick hack: should take away self.test_dataframe["IsBadBuy"] = 0
def main(self) : pathnow = os.getcwd() try : os.mkdir("D:/WebZ") except Exception as e: pass case = getcase.case_format().case_in_xlsx("D:/case.xlsx") preprocess.preprocess().process(case, pathnow)
def main(argv): # x = sys.argv[2] # hmm_train_file_path = '../../data/hmm_train_data' # hmm_train_file_path = '../../data/hmm_test_data' # hmm_train_file_path = '../../data/hmm_train_data_jpn' hmm_train_file_path = '../../data/hmm_test_data_jpn' vtb_train_file_path = '../../data/viterbi_train_data' # F-B stopping criteria threshold = 1e-5 param_log_filepath = 'log1' # preprocess input_str = preprocess(hmm_train_file_path) # do analysis analyze(input_str) # forward & backward algorithms index_dic, A, B = gen_matrix() pcll_old = -1000.0 pcll_new = -1000.0 pcll_list = [] itr = 1 while True: alpha_table, pcll_alpha = forward(input_str, A, B, index_dic) beta_table, pcll_beta = backward(input_str, A, B, index_dic) A, B = forward_backward(alpha_table, beta_table, A, B, index_dic, input_str) # update pcll pcll_new = pcll_alpha pcll_list.append(pcll_new) if abs(pcll_new - pcll_old) < threshold: break print 'ITERATION#' + str(itr) + ': ' + str(pcll_new) # update pcll_old = pcll_new itr += 1 # plot plot_pcll(pcll_list) print '\n=========FINAL A & B===========' print 'A:' print A print '\nB:' print B print '==============================\n' # viterbi decoder vtb_input_str = preprocess(vtb_train_file_path) vtb_hidden_state_list = vtb_decode(vtb_input_str, A, B, index_dic) ll_hidden_state_list = ll_decode(vtb_input_str, B, index_dic) print ll_hidden_state_list print 'END!'
def count(self, ori_q, rel_q): ori_q[0]=preprocess(ori_q[0],bigram=self.bigram,trigram=self.trigram) rel_q[0]=preprocess(rel_q[0],bigram=self.bigram,trigram=self.trigram) ori_q[1]=preprocess(ori_q[1],bigram=self.bigram,trigram=self.trigram) rel_q[1]=preprocess(rel_q[1],bigram=self.bigram,trigram=self.trigram) self.c_title.append(len(ori_q[0].split())) self.c_title.append(len(rel_q[0].split())) self.c_body.append(len(ori_q[1].split())) self.c_body.append(len(rel_q[1].split()))
def getFeature(self, ori_q,rel_q): # ori_q[0]=preprocess(ori_q[0]) # rel_q[0]=preprocess(rel_q[0]) ori_q[0]=preprocess(ori_q[0],bigram=self.bigram,trigram=self.trigram, no_stopwords=True) rel_q[0]=preprocess(rel_q[0],bigram=self.bigram,trigram=self.trigram, no_stopwords=True) ori_q[1]=preprocess(ori_q[1],bigram=self.bigram,trigram=self.trigram, no_stopwords=True) rel_q[1]=preprocess(rel_q[1],bigram=self.bigram,trigram=self.trigram, no_stopwords=True) word2vec_q_subject, q_len=self.getWordVectorFeatures(ori_q[0], title=True) word2vec_rel_q_subject, rel_q_len=self.getWordVectorFeatures(rel_q[0], title=True) word2vec_q_body, q_len=self.getWordVectorFeatures(ori_q[1]) word2vec_rel_q_body, rel_q_len=self.getWordVectorFeatures(rel_q[1]) return [word2vec_q_subject, word2vec_rel_q_subject, word2vec_q_body, word2vec_rel_q_body]
def run(args): if args.input_file: with open(args.input_file, "r") as input_file: source = input_file.read() else: import sys source = sys.stdin.read() if args.assemble: with open(args.output, "w") as binout: write_binary(parse_instructions(source), binout) else: tree = parse(preprocess(source)) if args.print_ast: print_tree(tree) if not tree: return instructions = link(*translate(tree)) if args.print_assembly: print_instructions(instructions) with open(args.output, "w") as binout: write_binary(instructions, binout)
def load_imgs(): global imgs global wheels for p in purposes: for epoch_id in epochs[p]: print 'processing and loading "{}" datasets {} into memory, current num of imgs is {}...'.format(p, epoch_id, len(imgs[p])) vid_path = data_dir +"/dataset{0}/out-mencoder.avi".format(epoch_id) assert os.path.isfile(vid_path) frame_count = cm.frame_count(vid_path) cap = cv2.VideoCapture(vid_path) csv_path = data_dir + "/dataset%i/data.csv" % epoch_id assert os.path.isfile(csv_path) rows = cm.fetch_csv_data(csv_path) assert frame_count == len(rows) yy = [[float(row['angle'])] for row in rows] while True: ret, img = cap.read() if not ret: break if img.any(): img = preprocess.preprocess(img) imgs[p].append(img) wheels[p].extend(yy) assert len(imgs[p]) == len(wheels[p]) cap.release()
def put(self, sentence, uid, fbid, timestamp): wordList = re.split(r"\s", sentence) wordList = [preprocess(word) for word in wordList] modelList = [(uid, fbid, word, timestamp) for word in wordList if word != ""] self.modelList = self.modelList.union(modelList) if(len(self.modelList) >= 1000): self.flush()
def classify(c, t, word_data, save=False): targets, clf = t[1], c[1] target_names = class_labels[np.unique(targets)] if len(target_names) == 1: return _, 1 features_train, features_test, labels_train, labels_test = preprocess(word_data, targets) print("\n### CLASSIFICATION using {} ###".format(c[0])) t0 = time() clf.fit(features_train.toarray(), labels_train) print("Training time:", round((time()-t0)/60, 2), "m") t1 = time() prediction = clf.predict(features_test.toarray()) print("Prediction time:", round((time()-t1)/60, 2), "m") print(classification_report(labels_test, prediction, target_names=target_names)) acc = accuracy_score(prediction, labels_test) print("Accuracy:", acc) cm = np.nan_to_num(confusion_matrix(labels_test, prediction)) norm_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] score = np.diag(norm_cm) if save: try: joblib.dump((clf, score), "./data/{} Classifier.pkl".format(c[0])) except: clf.save("./data/{} Classifier".format(c[0]), score) plot_confusion_matrix(norm_cm, target_names, title, show=False) return clf, score
def onPreview(self): f=open(self.compare_url.get(),'r') g=open(self.output_url.get(),'w') img=cv2.imread(self.source_url.get(),0) if(img==None): print url+' does\'nt exist' exit() img = pp.preprocess(img) im,rot = pp.skew_correction(img) line = pp.find_lines(im.copy()) # print len(linene) label_list=it.train.label_unicode() q=f.readlines() i=0 num=[] for l in line: for w in l.word_list: for c in w.char_list: tup=label_list[int(c.label)] if(q[i][:-1]!=tup): tup=q[i][:-1] g.write(tup) i+=1 g.write(' ') g.write('\n') f.close() g.close() self.draw_frame3()
def run_models(): """ Run all classification models. """ # TODO: Vary max_features. # feature_range = range(1, 31) # scores = {} # for i in feature_range: # print "Beginning preprocessing..." # train_test_sets = pp.preprocess(max_features=i, force_load=True) # print "...finished preprocessing" # print "Depth-limited Decision Tree Classifier..." # scores[i] = test_decision_tree_classifier( # train_test_sets, depth_limited=True) # max_features = max(feature_range, key=lambda x: scores[x]) # print "Best max_features: ", max_features train_test_sets = pp.preprocess(max_features=10) # print "Beginning training..." # TODO: Change all metrics to F1-score once we have imbalanced data set. # Also change number of folds if necessary. # print "Baseline Classifier..." # # test_subreddit_baseline_classifier() # print "Decision Tree Classifier..." # # test_decision_tree_classifier(train_test_sets) print "Depth-limited Decision Tree Classifier..." test_decision_tree_classifier(train_test_sets, depth_limited=True)
def compile(filename): logger = yacc.NullLogger() yacc.yacc() data =preprocess(get_input(filename)) print data ast = yacc.parse(data,lexer = lex.lex(),debug=1) return ast
def make_compare_file(): f=open('./corrected_docs/Samp_'+str(tno)+'/compare_list_new.txt','w') g=open('./corrected_docs/Samp_'+str(tno)+'/output_file_new.txt','w') # img=cv2.imread('./Example/dc_books_page.png',0) path='./corrected_docs/Samp_'+str(tno)+'/*.png' url=glob.glob(path) img=cv2.imread(url[0],0) # img=cv2.imread('./Samp_3/samp3.png',0) if(img==None): print 'image does\'nt exist' exit() img = pp.preprocess(img) # im=img # im,rot = pp.skew_correction(img) line = pp.find_lines(img.copy()) # print len(linene) label_list=train.label_unicode() i=0 num=[] for l in line: for w in l.word_list: for c in w.char_list: # num.append((str(i),label_list[int(c.label)])) tup=label_list[int(c.label)] f.write(tup+'\n') g.write(tup) # cv2.imwrite('./Samp_22/samp/'+str(i)+'.png',c.data) i+=1 g.write(' ') g.write('\n') f.close() g.close()
def get_feature_matrix(tfidf_matrix, phrase_list, true_keys, first_occurrence, phrase_entropy): #X = np.empty((0, len(features))) #y = np.empty(0) X = [] y = [] doc_tfidf_vecs = tfidf_matrix.toarray().tolist() # tfidf matrix # lower true keywords true_keys = [[preprocess(key) for key in key_list] for key_list in true_keys] for doc_id, tfidf_vec in enumerate(doc_tfidf_vecs): # traverse the doc vector print "--extracting features from doc {}".format(doc_id) for i, tfidf in enumerate(tfidf_vec): if tfidf != 0: # Why is this case here? feature_vec = get_feature_vector(phrase_list[i], tfidf, first_occurrence[doc_id][phrase_list[i]], phrase_entropy[doc_id][phrase_list[i]]) #X = np.append(X, feature_vec, axis=0) X.append(feature_vec) #if feature_vec[2] == 0: #print "phrase {} entropy 0 in doc {}".format(phrase_list[i], doc_id) label = lambda: 1 if phrase_list[i] in true_keys[doc_id] else 0 y.append(label()) #y = np.append(y, label()) return np.array(X), y
def computerTrainError(self): posPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/pos/' negPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/neg/' testPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/pos/' process = preprocess() posTrain = [] negTrain = [] testData = [] posTrain = process.getCleanTxt(posPath, 'pos') negTrain = process.getCleanTxt(negPath, 'neg') testData = process.getCleanTestData(testPath) classifier = naiveBayes() result = classifier.test(posTrain, negTrain, testData) errorCount = 0 for i in result: if i[1] == 'neg': errorCount = errorCount + 1 testPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/neg/' testData = [] testData = process.getCleanTestData(testPath) result = classifier.test(posTrain, negTrain, testData) for i in result: if i[1] == 'pos': errorCount = errorCount + 1 return errorCount
def telemetry(sid, data): if data: # The current steering angle of the car steering_angle = data["steering_angle"] # The current throttle of the car throttle = data["throttle"] # The current speed of the car speed = data["speed"] # The current image from the center camera of the car imgString = data["image"] image = Image.open(BytesIO(base64.b64decode(imgString))) #Change the image to be usable by the cv2 module image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR) #Process the image and use it to predict the steering angle image = preprocess.preprocess(image) image_array = np.asarray(image) steering_angle = model.y.eval(feed_dict={model.x: image_array[None, :, :, :], model.keep_prob: 1.0})[0][0] #Predict the angle throttle = controller.update(float(speed)) print(steering_angle, throttle) send_control(steering_angle, throttle) else: # NOTE: DON'T EDIT THIS. sio.emit('manual', data={}, skip_sid=True)
def parse_jstruct(filename, include_paths=[]): parser = c_parser.CParser() with open(filename, 'r') as infile: text = infile.read() # insert some header includes and a 'do not modify' text = re.sub( GUARD_HEADERS_EXPR, r'\g<0>' + GENERATED + PREPEND_HEADERS, text, count=1 ) pptext, err = preprocess(text, include_paths=include_paths, defines=['__attribute__(x)='] ) if err: import os rel_filename = os.path.relpath(filename) err = err.replace('<stdin>', rel_filename) raise Exception('C Preprocessor: ' + err) ast = parser.parse(pptext, filename=filename) return (ast, text)
def make_modified_file(): f=open('./compare_list.txt','r') g=open('./output_file.txt','w') img=cv2.imread('./Example/dc_books_page.png',0) if(img==None): print url+' does\'nt exist' exit() img = pp.preprocess(img) im,rot = pp.skew_correction(img) line = pp.find_lines(im.copy()) # print len(linene) label_list=train.label_unicode() q=f.readlines() i=0 num=[] for l in line: for w in l.word_list: for c in w.char_list: # num.append((str(i),label_list[int(c.label)])) tup=label_list[int(c.label)] if(q[i][:-1]!=tup): print tup # f.write(tup+'\n') g.write(tup) # cv2.imwrite('samp/'+str(i)+'.png',c.data) i+=1 g.write(' ') g.write('\n') f.close() g.close()
def __init__(self, schemafile): p = etree.XMLParser(remove_comments=True) self.tree = etree.parse(cStringIO.StringIO(preprocess.preprocess(schemafile)), p) self.callbacks = {'element': self.cb_element, 'documentation': self.cb_documentation, 'value': self.cb_value, 'attribute': self.cb_attribute, 'data': self.cb_data, 'optional': self.cb_optional, 'zeroOrMore': self.cb_zeroormore, 'oneOrMore': self.cb_oneormore, 'choice': self.cb_choice, 'empty': self.cb_empty, 'list': self.cb_list, 'group': self.cb_group, 'interleave': self.cb_group, 'name': self.cb_name, 'text': self.cb_text, 'anyName' : self.cb_anyname, 'nsName' : self.cb_nsname, 'except' : self.cb_except, 'ignore' : self.cb_ignore, 'notAllowed' : self.cb_notallowed} self.lost_eles = [] self.added_eles = [] self.lost_attrs = [] self.added_attrs = [] return
def main(argv): if len(argv) < 2 or len(argv) > 3: print "Wrong number or arguements" sys.exit(2) # Get command line arguements input_file_name = argv[0] output_file_name = argv[len(argv) - 1] # Last arguement input_file = open(input_file_name, "r") data = input_file.readlines() input_file.close() if len(argv) == 3: # Optional arguement group_number = int(argv[1]) # TODO: Make 5500 and 800,000 constants? first_half = data[group_number * 5500 : (group_number + 1) * 5500] last_half = data[800000 + group_number * 5500 : 800000 + (group_number + 1) * 5500] data = first_half + last_half # Process the tweet as per the steps in the assignment processed_tweets = preprocess.preprocess(data) # write results to the file output_file = open(output_file_name, "w") output_file.write(processed_tweets) output_file.close()
def getFeature(self, ori_q,rel_q): ori_q[0]=preprocess(ori_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) ori_q[1]=preprocess(ori_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) rel_q[1]=preprocess(rel_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram) word2vec_q_subject=self.getWordVectorFeatures(ori_q[0]) word2vec_q_body=self.getWordVectorFeatures(ori_q[1]) word2vec_q=np.concatenate((word2vec_q_subject,word2vec_q_body),axis=0) word2vec_rel_q_subject=self.getWordVectorFeatures(rel_q[0]) word2vec_rel_q_body=self.getWordVectorFeatures(rel_q[1]) word2vec_rel_q=np.concatenate((word2vec_rel_q_subject,word2vec_rel_q_body),axis=0) return np.concatenate((word2vec_q,word2vec_rel_q),axis=0)
def test(): load() count,correct=0,0 url='../samples/train_images/' for i in range(101,150): s_list=glob.glob(url+str(i)+'/*.png') for j in s_list: imgo=cv2.imread(j,0) img=pp.preprocess(imgo.copy()) f = find_feature(img.copy()) fu= np.array(f,np.float32) # print len(fu) t = classifier.predict(fu) print label_uni[i-100],label_uni[int(t)],int(i-100==t) if(i-100==t): correct+=1 else: name = './zerr_'+str(i)+'_'+str(count)+'.png' print j print count # cv2.imwrite('./zerr_'+str(i)+'_'+str(count)+'z.png',img) shutil.copyfile(j,name) count+=1 print 'accuracy :'+str(100.0*correct/count)+'%' print ('accurate recognition :'+str(correct)) print ('total character tested :'+str(count))
def train_svm(): # CV2 SVM svm_params = dict( kernel_type = cv2.SVM_RBF, svm_type = cv2.SVM_C_SVC, C=9.34, gamma=15.68 ) svm=cv2.SVM() label_list=[] label_list.append('a') url='train_images/' train_set = [] s_list=sorted(os.listdir(url)) label = 0 for i in s_list: s_list=glob.glob(url+i+'/*.png') # if(len(s_list)>25): if(len(s_list)>500): file=open(url+i+'/utf8',"r") i_uni=file.read() i_uni=i_uni[:-1] label_list.append(i_uni) label+=1 else: continue print str(label),i,label_list[label],len(s_list) int test=10; for j in s_list: if(!test-=1) break; img=cv2.imread(j,0) img=pp.preprocess(img) f =train.find_feature(img.copy()) # print len(f) s = [label,f] train_set.append(s) f=open('label','w') for l in label_list: f.write(l+'\n') f.close() shuffle(train_set) f_list = [] label = [] for t in train_set: label.append(t[0]) f_list.append(t[1]) # np.savetxt('feature.txt',f_list) # np.savetxt('label.txt',label) # samples = np.loadtxt('feature.txt',np.float32) # responses = np.loadtxt('label.txt',np.float32) # responses = responses.reshape((responses.size,1)) samples = np.array(f_list,np.float32) responses = np.array(label,np.float32) print 'auto training initiated' print 'please wait.....' svm.train(samples,responses,params=svm_params) # svm.train_auto(samples,responses,None,None,params=svm_params) svm.save("svm_class.xml")
def vector(self, text): seg = segment(preprocess(text)) word_count = len(seg) x = [0] * self.n for w in seg: if w in self.dic: x[self.dic[w]] += 1.0 / word_count * math.log(self.doc_count / self.df[w]) return x
def detectPlates(imgOriginal): # cv.ShowImage("image", image) imgGrayscale, imgThresh = preprocess.preprocess(imgOriginal) # cv.ShowImage("imgGrayscale", imgGrayscale) # cv.ShowImage("imgThresh", imgThresh) # cv.WaitKey() # ------------------------------------------------------------------ listOfPossibleChars = findPossibleChars(imgGrayscale, imgThresh) # print "len of listOfPossibleChars = " + str(len(listOfPossibleChars)) # 246 # for possibleChar in listOfPossibleChars: # imageToDrawContourOn = cv.CreateImage(cv.GetSize(imgGrayscale), cv.IPL_DEPTH_8U, 1) # cv.DrawContours(imageToDrawContourOn, possibleChar.contour, 255, 255, 1000, 2) # cv.ShowImage("imageToDrawContourOn", imageToDrawContourOn) # cv.WaitKey() # # end for # imageToDrawContourOn = cv.CreateImage(cv.GetSize(imgGrayscale), cv.IPL_DEPTH_8U, 1) # # for possibleChar in listOfPossibleChars: # cv.DrawContours(imageToDrawContourOn, possibleChar.contour, 255, 255, 1000, 1) # # end for # # cv.ShowImage("imageToDrawContourOn", imageToDrawContourOn) # cv.WaitKey() # ------------------------------------------------------------------ listOfListsOfMatchingChars = findListOfListsOfMatchingChars(listOfPossibleChars) # print "len of listOfListsOfMatchingChars = " + str(len(listOfListsOfMatchingChars)) # ------------------------------------------------------------------ imgListOfPlates = [] for listOfListsOfMatchingChars in listOfListsOfMatchingChars: imgPlate = extractPlate(imgOriginal, listOfListsOfMatchingChars) if imgPlate is not None: imgListOfPlates.append(imgPlate) # end if # end for # debugCount = 0 # for plate in listOfPlates: # cv.ShowImage("plate" + str(debugCount), plate) # debugCount = debugCount + 1 # # end for # # cv.WaitKey() return imgListOfPlates
def _testOneInputFile(self, fname): import preprocess infile = os.path.join('inputs', fname) # input reffile = os.path.join('outputs', fname) # expected output outfile = os.path.join('tmp', fname) # actual output errfile = os.path.join('outputs', fname+'.err') # expected error optsfile = os.path.join('inputs', fname+'.opts') # input options # Determine input options to use, if any. opts = {} if os.path.exists(optsfile): for line in open(optsfile, 'r').readlines(): if line[-1] == "\n": line = line[:-1] name, value = line.split('=', 1) try: value = eval(value) except NameError: pass opts[name] = value #print "options from '%s': %s" % (optsfile, pprint.pformat(opts)) # If there is no reference output file this means that processing # this file is expected to fail. if os.path.exists(reffile): ref = open(reffile, 'r').readlines() if not sys.platform.startswith("win"): ref = [line.replace('\\','/') for line in ref] # use Un*x paths preprocess.preprocess(infile, outfile, **opts) out = open(outfile, 'r').readlines() if ref != out: diff = list(difflib.ndiff(ref, out)) self.fail("%r != %r:\n%s"\ % (reffile, outfile, pprint.pformat(diff))) elif os.path.exists(errfile): err = open(errfile, 'r').read() if not sys.platform.startswith("win"): err = err.replace('\\','/') # use Un*x paths try: preprocess.preprocess(infile, outfile, **opts) except preprocess.PreprocessError, ex: #print "XXX ex: %s" % str(ex).strip() self.failUnlessEqual(err.strip(), str(ex).strip()) else: self.fail("No PreprocessError when expected one.")
def __init__(self,filename): self.filename = filename self.tag_array,self.tag_dict, self.words_dict = preprocess(filename) self.num_of_tags = len(self.tag_array) #including START and END self.num_of_words = len(self.words_dict.keys()) #including START and END self.transition = np.zeros((self.num_of_tags,self.num_of_tags)) self.emission_prob = np.zeros((self.num_of_tags, self.num_of_words)) self.__model_initialization() self.start_prob = self.transition[self.tag_dict['START'],:]
def mymethod(): original_train_data = pd.read_csv("../datasets/train_set.csv", sep="\t") clf = MultinomialNB() X = preprocess(original_train_data) le = preprocessing.LabelEncoder() y = le.fit_transform(original_train_data['Category']) cv = CountVectorizer(stop_words=STOPWORDS) X = cv.fit_transform(X) ksplits = 10 kf = KFold(n_splits=ksplits, shuffle=False) precs = 0 recs = 0 f1s = 0 accs = 0 for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf.fit(X_train, y_train) predictions = clf.predict(X_test) precs += precision_score(y_test, predictions, average='micro') recs += recall_score(y_test, predictions, average='micro') f1s += f1_score(y_test, predictions, average='micro') accs += accuracy_score(y_test, predictions) avgprec = precs / ksplits avgrec = recs / ksplits avgf1 = f1s / ksplits avgacc = accs / ksplits scores = list() scores.append(avgacc) scores.append(avgprec) scores.append(avgrec) scores.append(avgf1) return scores
def load_batch(purpose): global current_batch_id xx = [] yy = [] # fetch the batch definition batch_id = current_batch_id[purpose] assert batch_id < len(batches[purpose]) batch = batches[purpose][batch_id] epoch_id, frame_start, frame_end = batch['epoch_id'], batch[ 'frame_start'], batch['frame_end'] assert epoch_id is not None and frame_start is not None and frame_end is not None # update the current batch current_batch_id[purpose] = (current_batch_id[purpose] + 1) % len( batches[purpose]) # fetch image and steering data vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id)) assert os.path.isfile(vid_path) frame_count = cm.frame_count(vid_path) cap = cv2.VideoCapture(vid_path) cm.cv2_goto_frame(cap, frame_start) csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id)) assert os.path.isfile(csv_path) rows = cm.fetch_csv_data(csv_path) assert frame_count == len(rows) yy = [[float(row['wheel'])] for row in rows[frame_start:frame_end + 1]] for frame_id in xrange(frame_start, frame_end + 1): ret, img = cap.read() assert ret img = preprocess.preprocess(img) #cv2.imwrite(os.path.abspath('output/sample_frame.jpg'), img) xx.append(img) assert len(xx) == len(yy) cap.release() return xx, yy
def doshit(positives, poswordcount, numpostweets, negatives, negwordcount, numnegtweets, vocabsize): inputfile = open(os.path.dirname(__file__) + "trumptweets.csv", "r") # used to compute accuracy pcount = 0 ncount = 0 count = 1 alldata = [] for line in inputfile: line = line[:-2] line = line.strip("\"") # read in custom file symbol, date, time, tweet, sentiment = line.split("\",\"") date = date.split(" ") date = Date(date[1], date[0], date[2]) data = TweetData(symbol, date, time, tweet) tokens = preprocess.preprocess(tweet) probpos = math.log(numpostweets / (numpostweets + numnegtweets)) probneg = math.log(numnegtweets / (numpostweets + numnegtweets)) for token in tokens: if token in positives: probpos += math.log(positives[token]) else: probpos += math.log(1.0 / (poswordcount + vocabsize)) if token in negatives: probneg += math.log(negatives[token]) else: probneg += math.log(1.0 / (negwordcount + vocabsize)) data.correctsentiment = sentiment if probpos >= probneg: data.sentiment = "positive" if sentiment != "positive": pcount += 1 else: data.sentiment = "negative" if sentiment != "negative": ncount += 1 count += 1 alldata.append(data) print "Text Sentiment Accuracy: ", float(count - pcount - ncount) / float(count) return alldata
def load_imgs(): global imgs global wheels for p in purposes: for epoch_id in epochs[p]: print ('processing and loading "{}" epoch {} into memory, current num of imgs is {}...'.format(p, epoch_id, len(imgs[p]))) # vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id)) vid_path = cm.jn(data_dir, 'out-video-{}.avi'.format(epoch_id)) assert os.path.isfile(vid_path) frame_count = cm.frame_count(vid_path) cap = cv2.VideoCapture(vid_path) # csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id)) csv_path = cm.jn(data_dir, 'out-key-{}.csv'.format(epoch_id)) assert os.path.isfile(csv_path) rows = cm.fetch_csv_data(csv_path) print ("{}, {}".format(len(rows), frame_count)) assert frame_count == len(rows) yy = [[float(row['wheel'])] for row in rows] print ("{}, {}".format(len(imgs[p]), len(wheels[p]))) while True: ret, img = cap.read() if not ret: break img = preprocess.preprocess(img) imgs[p].append(img) wheels[p].extend(yy) while len(imgs[p]) < len(wheels[p]): wheels[p].pop() assert len(imgs[p]) == len(wheels[p]) cap.release()
def __iter__(self): train = codecs.open(self.filename, 'r', 'utf-8') papers = [] for line in train: line = line.replace("###FORMULA###", "||FORMULA||") line = line.replace("###TABLE###", "||TABLE||") line = line.replace("###FIGURE###", "||FIGURE||") map = line.split('\t') paper_id = map[0] summary = map[3] print(paper_id) lines = tokenizer.tokenize(summary) for uid, line in enumerate(lines): yield gensim.models.doc2vec.LabeledSentence( words=preprocess(summary), tags=['ABS_' + str(paper_id) + '_' + str(uid)])
def run_som_pcn(): ''' Runs a perceptron using the activations from a SOM. The initial data is split into two sets, one for use in the SOM, and the other for use in the perceptron. ''' x = preprocess.preprocess('Pollens') pollen = np.array(x.create_one_file(SIMPLE_GRASS)) pollen = x.normalise_max(pollen) som_train_set, som_train_set_target, pcn_set, pcn_set_target, empty_set, empty_set_target = x.make_groups( pollen, LABEL_SIZE, algorithm='mlp', train_size=300, test_size=350, validation_size=0) net = som.som(5, 5, som_train_set) net.somtrain(som_train_set, 300) net.run_perceptron(pcn_set, pcn_set_target, train_size=200, test_size=150)
def main(): object = ps.preprocess() X_train, X_test, y_train, y_test = object.cleaning() param_grid = { 'objective': ['binary:logistic'], 'nround': [1000], 'max_depth': [8] } estimator = xgb.XGBRegressor() grid_search = GridSearchCV(estimator, param_grid, verbose=2, cv=2, n_jobs=-1) client = Client(processes=False) start_time = time.time() with joblib.parallel_backend("dask"): grid_search.fit(X_train, y_train) end_time = time.time() grid_search.predict(X_test) print ("time difference in GridSearchCV first XGBRegressor is %d seconds" % end_time) client.shutdown()
def index(): try: content = request.get_json() path2test = content['path_to_test_csv'] path2model = content['path_to_model'] path2ohe = content["path_to_onehotencoder"] path2scaler = content["path_to_scaler"] path2poly = content["path_to_poly"] scaler = load_model(path2scaler) poly = load_model(path2poly) ohe = load_model(path2ohe) model = load_model(path2model) X_test, y_test, idx = preprocess(path2test, ohe, scaler, poly) prediction = predict(X_test, model) except: return redirect(url_for('bad_request')) #return jsonify(json.loads(pd.Series(prediction, index=idx).to_json())) return pd.Series(prediction, index=idx).to_json()
def cross_validation(): from sklearn.model_selection import KFold new_dataset = 1 if new_dataset: df = pd.read_csv('./dataset/af1and5.csv') paths = df['path'] classes = df['class'] X, y = preprocess(paths, classes) np.save('X', X) np.save('y', y) else: X = np.load('X.npy') y = np.load('y.npy') kf = KFold(n_splits=5) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index]
def gender_predict(self, features: dict): # Convert the jsonrequest to dict X_dict = {"Favorite Color": features["Favorite Color"], "Favorite Music Genre": features["Favorite Music Genre"], "Favorite Beverage": features["Favorite Beverage"], "Favorite Soft Drink": features["Favorite Soft Drink"]} # Convert the dict request to dataframe X_df = pd.DataFrame([X_dict]) X_df = prepro.preprocess(X_df) # Get the columns dummies not inquire X_df = X_df.reindex(labels=self.X.columns, axis=1).fillna(0) prediction = self.clf.predict_proba(X_df) dict_res = {"class": self.gender_type[np.argmax(prediction)], 'probabilité': round(max(prediction[0]), 2)} text_res = f"L'algorithme prédit que vous êtes un(e) {self.gender_type[np.argmax(prediction)]}\n" \ f"avec une probabilité de {round(max(prediction[0]), 2) * 100}%" return text_res
def get_coeff(self): X, Y, x_mean, y_mean, x_std, y_std, X_test, Y_test = preprocess( "3D_spatial_network.csv") theta = np.random.rand(3) ntheta, ith, coh = self.grad_desc(X, Y, theta) print(ntheta) predictions = np.dot(X, ntheta) print("RMSE Train: ", sqrt(mean_squared_error(Y, predictions))) print("R2 Score Train: ", r2_score(Y, predictions)) predictions = np.dot(X_test, ntheta) print("RMSE Test: ", sqrt(mean_squared_error(Y_test, predictions))) print("R2 Score Test: ", r2_score(Y_test, predictions)) plt.plot(ith, coh) plt.xlabel("Iterations") plt.ylabel("Cost") plt.title("L1 Gradient Desc") plt.show() return ntheta, X_test, Y_test
def test_sample_input_correctType(): raw = mne.io.read_raw_edf('/Users/raphaelbechtold/Documents/MATLAB/Automagic/automagic/data/Subj1/S001R01.edf') params = {'line_noise' : 50, \ 'filter_type' : 'high', \ 'filt_freq' : None, \ 'filter_length' : 'auto', \ 'eog_index' : -1, \ 'lam' : -1, 'tol' : 1e-7, 'max_iter': 1000 } eeg,fig1,fig2 = preprocess(raw, params) assert(type(eeg) == mne.io.edf.edf.RawEDF) assert(type(fig1) == type(plt.figure())) assert(type(fig2) == type(plt.figure()))
def createTextdictionary(X_train): i = 2 count = {} for data in X_train: word_array = preprocess(data) for word in word_array: if not word in count: count[word] = 1 else: count[word] = count[word] + 1 count = sorted(count.items(), key=lambda item: item[1], reverse=True) for word, value in count: if not word in dictionary: dictionary[word] = i i = i + 1 if i >= num_words + 2: # Only take top 15000 words with most occurrence. break save_dictionary(dictionary, 'dictionary.pkl')
def load_imgs_v2(): global imgs global wheels for epoch_id in epochs['all']: print('processing and loading epoch {} into memorys. train:{}, val:{}'. format(epoch_id, len(imgs['train']), len(imgs['val']))) # vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id)) vid_path = cm.jn(data_dir, 'out-video-{}.avi'.format(epoch_id)) if not os.path.isfile(vid_path): continue frame_count = cm.frame_count(vid_path) cap = cv2.VideoCapture(vid_path) # csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id)) csv_path = cm.jn(data_dir, 'out-key-{}.csv'.format(epoch_id)) assert os.path.isfile(csv_path) rows = cm.fetch_csv_data(csv_path) print("{}, {}".format(len(rows), frame_count)) assert frame_count == len(rows) for row in rows: ret, img = cap.read() if not ret: break img = preprocess.preprocess(img) angle = float(row['wheel']) if random.random() < params.train_pct: imgs['train'].append(img) wheels['train'].append([angle]) else: imgs['val'].append(img) wheels['val'].append([angle]) cap.release() print('Total data: train:{}, val:{}'.format(len(imgs['train']), len(imgs['val'])))
def trainWord2Vec(filename): # Read the file data for training file = open(filename,'r'); count = 0; lines = []; while count < 339: try: line = (file.readline()); if line is not '': lines.append(line); line = ''; except UnicodeDecodeError: continue; count += 1; trainData = []; #print(len(lines)); #print(lines); file = open('text.txt','w'); file.writelines(lines); file.close(); targetWord = (lines[0].split('\t'))[0]; # Preprocess the data for wrod2vec model for line in lines: sent = preprocess.preprocess(line); if sent is not None: trainData.append(sent); # Log statement for training progress monitoring logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO); # Train the model num_features = 300; min_word_count = 1; num_workers = 4; context = 10; downsampling = 1e-3; model = word2vec.Word2Vec(trainData, workers=num_workers, size=num_features, min_count = min_word_count, window = context, sample = downsampling); model.init_sims(replace=True); # Save the model model_name = "modelWord2Vec_"+targetWord; model.save(model_name); # Print similarity of the target word print('Words that are most similar to',targetWord,'in the dataset'); print('__________________________________________________________________'); print(model.most_similar(targetWord)); print('__________________________________________________________________');
def process_epoch(epoch_id): print('---------- processing video for epoch {} ----------').format(epoch_id) vid_path = cm.jn(params.data_dir, 'out-video-{}.avi'.format(epoch_id)) frame_count = cm.frame_count(vid_path) vid_scaled_path = cm.jn(params.data_dir, 'out-video-{}-scaled.avi'.format(epoch_id)) if not os.path.exists(vid_scaled_path): assert os.path.isfile(vid_path) os.system("ffmpeg -i " + vid_path + " -vf scale=1280:720 " + vid_scaled_path) print("ffmpeg -i " + vid_path + " -vf scale=1280:720 " + vid_scaled_path) vid_path = vid_scaled_path cap = cv2.VideoCapture(vid_path) machine_steering = [] print('performing inference...') time_start = time.time() for frame_id in range(frame_count): ret, img = cap.read() assert ret prep_start = time.time() img = preprocess.preprocess(img) pred_start = time.time() rad = model.y.eval(feed_dict={model.x: [img], model.keep_prob: 1.0})[0][0] deg = rad2deg(rad) pred_end = time.time() prep_time = pred_start - prep_start pred_time = pred_end - pred_start # print 'pred: {} deg. took {} ms'.format(deg, pred_time * 1000) # print 'pred: {} deg (rad={})'.format(deg, rad) machine_steering.append(deg) cap.release() fps = frame_count / (time.time() - time_start) print ('completed inference, total frames: {}, average fps: {} Hz'.format(frame_count, round(fps, 1))) # print "Machine Steering:", machine_steering return machine_steering
def strongly_test(): net = preprocess(example_graph) print(net) initial_marking = Marking() final_marking = Marking() for place in net.places: if place.name != "p2": initial_marking[place] = 1 else: final_marking[place] = 1 parameters = { pn_visualizer.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg" } gviz = pn_visualizer.apply(net, initial_marking, final_marking, parameters=parameters) pn_visualizer.save(gviz, "alpha.svg")
def build_corpus(files): thirty_second_chunks = {'transcript': [], 'startTime': [], 'file_Name': []} for file in files: filename = os.fsdecode('C:/../' + file) # Opening JSON file jsonfile = open(filename, ) # returns JSON object as a dictionary data = json.load(jsonfile) # Iterating through the json list for results in data['results']: for alternatives in results['alternatives']: if alternatives.get('transcript'): transcript = ' '.join( p.preprocess(alternatives.get('transcript'))) thirty_second_chunks['transcript'].append(transcript) thirty_second_chunks['startTime'].append( alternatives.get('words')[0].get('startTime')) thirty_second_chunks['file_Name'].append(file) return thirty_second_chunks
def result(): if request.method == 'POST': result = request.form print(result.to_dict()) res = result.to_dict() res["tenure"] = int(res["tenure"]) res["MonthlyCharges"] = float(res["MonthlyCharges"]) df = pd.DataFrame([res]) # apply the preprocessing (one hot encoding, etc to the input) sample = preprocess(df) # load the model model = load('LR.joblib') pred = model.predict(sample) print(pred) if pred[0] == 1: data = {"churn": "Yes"} else: data = {"churn": "No"} return render_template("result.html", data=data, result=result)
def main(args): """ Excute detect lines process :param None: argment is None """ # read img and preprocess pre_img, rawImg = preprocess(args) # detect edges #edges_img = canny(pre_img, args) # hough trasform lines = hough(pre_img, rawImg, args) # postprocess ## sort lines if (not args.opt): sortedLines = sortLines(lines)
def run(): """ Runs entirety of model: trains, checkpoints, tests. """ # Get the data. images = preprocess.preprocess(k_data_path) # Create the model. generator, discriminator = setup_model() # Global canonical latent state for testing test_latent_state = tf.random.normal([3, generator.latent_dimension], seed=1) # Train the model k_epochs = 5000 train(generator, discriminator, images, test_latent_state, k_epochs) # View an example view(generator, test_latent_state)
def test(): image_url = request.form.get("image_url") file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER']) if not os.path.exists(file_dir): os.makedirs(file_dir) if image_url and allowed_file(image_url): fname = secure_filename(image_url) ext = fname.rsplit('.', 1)[1] new_filename = str(uuid.uuid3(uuid.NAMESPACE_URL, fname)) + '.' + ext image_path = os.path.join(file_dir, new_filename) urllib.request.urlretrieve(image_url, filename=image_path) # image_url.save(os.path.join(file_dir, new_filename)) image = preprocess(image_path) # predict = densenet.predict(image) predict = 'not for now' return jsonify({"success": 0, "msg": "上传成功", "predict": predict}) else: return jsonify({"error": 1001, "msg": "上传失败"})
def main(preproc=True): zipin = zipfile.ZipFile(sys.argv[1]) if preproc: doc = preprocess(zipin.open('word/document.xml'), debug=True) else: doc = zipin.read('word/document.xml').decode('utf-8') processed_doc = render(doc, json.load(sys.stdin)) print(processed_doc) target = 'Processed_' + sys.argv[1] outzip = zipfile.ZipFile(target, "w") for fileinfo in zipin.infolist(): if fileinfo.filename != 'word/document.xml': outzip.writestr(fileinfo, zipin.read(fileinfo)) else: outzip.writestr('word/document.xml', processed_doc)
def run(classifier, proto=False): ## Reading input X_train = pd.read_csv('files/X_train_aug.csv').drop('Unnamed: 0', axis=1) X_test = pd.read_csv('files/X_test_aug.csv').drop('Unnamed: 0', axis=1) y_train = pd.read_csv('files/y_train.csv').drop('id', axis=1) X_columns = X_train.columns.values y_columns = y_train.columns.values ## Splitting for validation if proto: X_train, X_test, y_train, y_test = preprocess(X_train, y_train) print("Finished splitting") # sampling adds one row more to X_train for some reason... also make generic # this part is probably not needed for rfcs as it does it itself sometimes X_train, y_train = sampling(X_train, y_train, X_columns, y_columns) print("Finished sampling") print(np.shape(X_train)) print(np.shape(X_test)) print(np.shape(y_train)) ## Trainining OvRClassifier = OneVsOneClassifier(classifier) OvRClassifier.fit(X_train, y_train) print("Finished training") ## Predicting y_predict = OvRClassifier.predict(X_test) if proto: print(f1_score(y_test, y_predict, average='weighted')) ## Writing output if not proto: output = pd.read_csv('files/sample.csv') for i in range(output.shape[0]): output.iat[i, 1] = y_predict[i] output.to_csv( f"outputs/{OvRClassifier.__class__.__name__}.{classifier.__class__.__name__}.csv", index=False) print("Finished predicting")
def trainWord2Vec(filename): file = open(filename, 'r') count = 0 lines = [] while count < 339: try: line = (file.readline()) if line is not '': lines.append(line) line = '' except UnicodeDecodeError: continue count += 1 trainData = [] file = open('text.txt', 'w') file.writelines(lines) file.close() targetWord = (lines[0].split('\t'))[0] for line in lines: sent = preprocess.preprocess(line) if sent is not None: trainData.append(sent) logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) num_features = 300 min_word_count = 1 num_workers = 4 context = 10 downsampling = 1e-3 model = word2vec.Word2Vec(trainData, workers=num_workers, size=num_features, min_count=min_word_count, window=context, sample=downsampling) model.init_sims(replace=True) model_name = "modelWord2Vec_" + targetWord model.save(model_name)
def main(): config = ConfigXT() load = FileXT(config.audio_path) print( colored('Preprocessing audio for ', 'blue', attrs=['bold']) + load.basename) data = preprocess.preprocess(load.filename, config.speaker, config, verbose=False) dataloader = dataprocess.load_infer(data) model = Tacotron(config) model.load_state_dict( torch.load(config.model_path, map_location='cpu')['state_dict']) model = set_device(model, config.device) model.eval() print( colored('Generating mel-spectrogram with ', 'blue', attrs=['bold']) + config.model_path) mel = [] y_prev = set_device(torch.zeros(1, config.mel_size, 1), config.device) for batch in tqdm(dataloader, leave=False, ascii=True): x, y_prev, _ = set_device(batch, config.device) y_gen, _ = model(x, y_prev) mel.append(y_gen.data) y_prev = y_gen[..., -1].unsqueeze(-1) mel = torch.cat(mel, dim=-1) if config.vocoder == 'wavernn': wave = wavernn_infer(mel, config) elif config.vocoder == 'waveglow': wave = waveglow_infer(mel, config) savename = config.model_path.replace('.pt', '_') + FileXT( config.vocoder_path).basestem + '_speaker' + str( config.speaker) + '_' + load.basename torchaudio.save(savename, wave, config.sample_rate) print(colored('Audio generated to ', 'blue', attrs=['bold']) + savename)
def analyse_datasets(dataset:str, model:str='cca', dataset_args:dict=None, loader_args:dict=None, preprocess_args:dict=None, clsfr_args:dict=None): """analyse a set of datasets (multiple subject) and generate a summary decoding plot. Args: dataset ([str]): the name of the dataset to load model (str, optional): The type of model to fit. Defaults to 'cca'. dataset_args ([dict], optional): additional arguments for get_dataset. Defaults to None. loader_args ([dict], optional): additional arguments for the dataset loader. Defaults to None. clsfr_args ([dict], optional): additional aguments for the model_fitter. Defaults to None. """ if dataset_args is None: dataset_args = dict() if loader_args is None: loader_args = dict() if clsfr_args is None: clsfr_args = dict() loader, filenames, dataroot = get_dataset(dataset,**dataset_args) scores=[] decoding_curves=[] nout=[] for i, fi in enumerate(filenames): print("{}) {}".format(i, fi)) #try: if 1: X, Y, coords = loader(fi, **loader_args) if preprocess_args is not None: X, Y, coords = preprocess(X, Y, coords, **preprocess_args) score, decoding_curve, _, _ = analyse_dataset(X, Y, coords, model, **clsfr_args) nout.append(Y.shape[-1] if Y.ndim<=3 else Y.shape[-2]) scores.append(score) decoding_curves.append(decoding_curve) del X, Y gc.collect() #except Exception as ex: # print("Error: {}\nSKIPPED".format(ex)) avescore=sum(scores)/len(scores) avenout=sum(nout)/len(nout) print("\n--------\n\n Ave-score={}\n".format(avescore)) # extract averaged decoding curve info int_len, prob_err, prob_err_est, se, st = flatten_decoding_curves(decoding_curves) print("Ave-DC\n{}\n".format(print_decoding_curve(np.mean(int_len,0),np.mean(prob_err,0),np.mean(prob_err_est,0),np.mean(se,0),np.mean(st,0)))) plot_decoding_curve(int_len,prob_err) plt.suptitle("{} ({}) AUDC={:3.2f}(n={} ncls={})\nloader={}\nclsfr={}({})".format(dataset,dataset_args,avescore,len(scores),avenout-1,loader_args,model,clsfr_args)) plt.savefig("{}_decoding_curve.png".format(dataset)) plt.show()
def loadValDataFromFile(valFile, size=224, percentage=1.0): ''' load validation data from a txt file. the format of valFile: "spu_id img_path category_id" percentage: only take percentage*100% of whole images in valFile as validation set. percentage = 0.02 is usually used as validation set in learning curve size: image will be resize to size*size return data, label data: Variable(torch.FloatTensor) ( num of images , 3, size, size) label: torch.LongTensor (num of images) ''' val = open(valFile, 'r') count = 0 for line in val: count += 1 valsize = int(count * percentage) val.seek(0) valy = [] valx = torch.empty(valsize, 3, size, size) count = 0 for line in val: if count < valsize: spuid, img, cat = line.strip().split(" ") impTmp = preprocess.readImage(img) imgTmp2 = preprocess.preprocess(imgTmp, size) imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type( torch.FloatTensor) valy.append(int(cat)) valx[count:(count + 1)] = imgout count += 1 label = torch.LongTensor(valy) data = Variable(valx) print(count) print(label.shape) print(data.shape) # return return data, label
def main(arg_list=None): # Initialize parser = get_parser() if arg_list: # Called from another script args = parser.parse_args(arg_list) else: # Called from command line args = parser.parse_args() path_image = args.path_image path_model = args.path_model size_of_image = args.size_of_image if args.size_of_image else SIZE_OF_IMAGE path_json = args.path_json if args.path_json else PATH_JSON top_k = args.top_k if args.top_k else 1 # Load and preprocess images X, file_names = preprocess.load_images(path_image, size_of_image=size_of_image) print(X) X = preprocess.preprocess(X, size_of_image=size_of_image) # Load trained model model = models.load_model(path_model) # model.summary() # Load label_name from .json with open(path_json, 'r') as f: label_name = json.load(f) # Make predictions predictions_int = model.predict(X) print(predictions_int) prediction_name = [ ] # Stores top-k predicted pokemon names of the input images for prediction in predictions_int: # Find indices with top-k highest values (numpy array: [highest, ..., lowest]) top_k_index = np.argsort(prediction)[::-1][:top_k] prediction_name.append( [label_name[str(index.item())] for index in top_k_index]) for file, prediction in zip(file_names, prediction_name): print("Below are the top {} likely Pokemon in {}".format(top_k, file)) print(prediction)
def proceed(self, frame, noDepth=False, noPose=False, noSkeleton=False, noTime=False): frameRGB, frameD = frame if self.frameNumber == 0: try: c.frameHeight, c.frameWidth, _ = frameRGB.shape c.depthHeight, c.depthWidth = frameD.shape except: pass datum = op.Datum() datum.cvInputData = frameRGB self.opWrapper.emplaceAndPop([datum]) # array of people with keypoints is in datum.poseKeypoints # getSkeletons gives for every human skeleton image if not noSkeleton: frameRGB = datum.cvOutputData # image is frame with drawn skeleton humans = preprocess(datum.poseKeypoints, frameD, noDepth) # convert frame to skeleton image poses = [] if not noPose: poses = self.frame.proceedFrame(humans) for j, human in enumerate(humans): try: display.displayPose( frameRGB, human, str(poses[j][1]) + ": " + c.poses[np.argmax(poses[j][0])] + f" - { int( np.max( poses[ j ][ 0 ] ) * 100 ) }%") except: pass if not noTime: display.displayFrameTime(frameRGB, time() - self.time) self.time = time() self.frameNumber = self.frameNumber + 1 return frameRGB, poses, humans