Пример #1
0
    def getFeature(self, ori_q,rel_q):
        ori_q[0]=preprocess(ori_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)
        ori_q[1]=preprocess(ori_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)

        rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)
        rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)

        word2vec_q_subject=self.getWordVectorFeatures(ori_q[0])
        word2vec_q_body=self.getWordVectorFeatures(ori_q[1])

        word2vec_rel_q_subject=self.getWordVectorFeatures(rel_q[0])
        word2vec_rel_q_body=self.getWordVectorFeatures(rel_q[1])



        subject=np.concatenate((word2vec_q_subject*word2vec_rel_q_subject,
                                np.abs(word2vec_q_subject-word2vec_rel_q_subject)),axis=0)

        body=np.concatenate((word2vec_q_body*word2vec_rel_q_body,
                                np.abs(word2vec_q_body-word2vec_rel_q_body)),axis=0)

        '''
        subject_dist=self.dist(ori_q[0], rel_q[0])
        body_dist=self.dist(ori_q[1], rel_q[1])

        return np.array([subject_dist,body_dist]).T
        '''

        return np.concatenate((subject, body,),axis=0).T
Пример #2
0
def gen_train_sample(im):
#	train.classifierclassifier.load('svm_class.xml')
	img = pp.preprocess(im.copy())
	# img,rot = pp.skew_correction(img)
	hight,width=im.shape
	# M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1)
	# im = cv2.warpAffine(im,M,(width,hight))
	# cv2.imwrite('skew correct.png',im)
	contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	contours = []
	for cnt in contours2:
		print (cv2.contourArea(cnt))
		if(cv2.contourArea(cnt)>20):
			contours.append(cnt)
	X = [cv2.contourArea(C) for C in contours]
#	print len(contours),len(X)
	t=[i for i in range (0,len(contours))]
	X,t = zip(*sorted(zip(X,t)))
	i=0
	for j in t:
		x,y,w,h=cv2.boundingRect(contours[j])
		box = im[y-1:y+h+1,x-1:x+w+1]
		char = pp.preprocess(box.copy())
		try:
			f = train.find_feature(char)
			fu= train.np.array(f,train.np.float32)
			# print len(fu)
			t = train.classifier.predict(fu)
			print t
		except IndexError:
			t = 0
		cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box)
		# cv2.imwrite('./samp/'+str(i)+'.png',box)
		i+=1
Пример #3
0
 def export_sentences_for_giza(self, lang1, lang2, stream1, stream2,
                               use_metaphone=True):
     for row in self.rows:
         if lang1 in row and lang2 in row:
             print >> stream1, \
                 preprocess(row[lang1], use_metaphone).encode('utf-8')
             print >> stream2, \
                 preprocess(row[lang2], use_metaphone).encode('utf-8')
Пример #4
0
    def preprocess_data(self):
        print "Preprocessing Data"
        self.train_dataframe = preprocess.preprocess(self.train_dataframe)
        self.test_dataframe = preprocess.preprocess(self.test_dataframe)

        # Add dummy column to test dataframe to match dimensions
        # Quick hack: should take away
        self.test_dataframe["IsBadBuy"] = 0
Пример #5
0
 def main(self) :
     pathnow = os.getcwd()
     try :
         os.mkdir("D:/WebZ")
     except Exception as e:
         pass
     case = getcase.case_format().case_in_xlsx("D:/case.xlsx")
     preprocess.preprocess().process(case, pathnow)
Пример #6
0
def main(argv):
    # x = sys.argv[2]
    # hmm_train_file_path = '../../data/hmm_train_data'
    # hmm_train_file_path = '../../data/hmm_test_data'
    # hmm_train_file_path = '../../data/hmm_train_data_jpn'
    hmm_train_file_path = '../../data/hmm_test_data_jpn'
    vtb_train_file_path = '../../data/viterbi_train_data'
    # F-B stopping criteria
    threshold = 1e-5
    param_log_filepath = 'log1'

    # preprocess
    input_str = preprocess(hmm_train_file_path)
    # do analysis
    analyze(input_str)

    # forward & backward algorithms
    index_dic, A, B = gen_matrix()
    pcll_old = -1000.0
    pcll_new = -1000.0
    pcll_list = []
    itr = 1
    while True:
        alpha_table, pcll_alpha = forward(input_str, A, B, index_dic)
        beta_table, pcll_beta = backward(input_str, A, B, index_dic)
        A, B = forward_backward(alpha_table, beta_table, A, B, index_dic, input_str)

        # update pcll
        pcll_new = pcll_alpha
        pcll_list.append(pcll_new)
        if abs(pcll_new - pcll_old) < threshold:
            break

        print 'ITERATION#' + str(itr) + ': ' + str(pcll_new)

        # update
        pcll_old = pcll_new
        itr += 1

    # plot
    plot_pcll(pcll_list)

    print '\n=========FINAL A & B==========='
    print 'A:'
    print A
    print '\nB:'
    print B
    print '==============================\n'


    # viterbi decoder
    vtb_input_str = preprocess(vtb_train_file_path)
    vtb_hidden_state_list = vtb_decode(vtb_input_str, A, B, index_dic)
    ll_hidden_state_list = ll_decode(vtb_input_str, B, index_dic)
    print ll_hidden_state_list

    print 'END!'
Пример #7
0
 def count(self, ori_q, rel_q):
     ori_q[0]=preprocess(ori_q[0],bigram=self.bigram,trigram=self.trigram)
     rel_q[0]=preprocess(rel_q[0],bigram=self.bigram,trigram=self.trigram)
     ori_q[1]=preprocess(ori_q[1],bigram=self.bigram,trigram=self.trigram)
     rel_q[1]=preprocess(rel_q[1],bigram=self.bigram,trigram=self.trigram)
     self.c_title.append(len(ori_q[0].split()))
     self.c_title.append(len(rel_q[0].split()))
     self.c_body.append(len(ori_q[1].split()))
     self.c_body.append(len(rel_q[1].split()))
Пример #8
0
    def getFeature(self, ori_q,rel_q):
        # ori_q[0]=preprocess(ori_q[0])
        # rel_q[0]=preprocess(rel_q[0])
        ori_q[0]=preprocess(ori_q[0],bigram=self.bigram,trigram=self.trigram, no_stopwords=True)
        rel_q[0]=preprocess(rel_q[0],bigram=self.bigram,trigram=self.trigram, no_stopwords=True)
        ori_q[1]=preprocess(ori_q[1],bigram=self.bigram,trigram=self.trigram, no_stopwords=True)
        rel_q[1]=preprocess(rel_q[1],bigram=self.bigram,trigram=self.trigram, no_stopwords=True)

        word2vec_q_subject, q_len=self.getWordVectorFeatures(ori_q[0], title=True)
        word2vec_rel_q_subject, rel_q_len=self.getWordVectorFeatures(rel_q[0], title=True)

        word2vec_q_body, q_len=self.getWordVectorFeatures(ori_q[1])
        word2vec_rel_q_body, rel_q_len=self.getWordVectorFeatures(rel_q[1])

        return [word2vec_q_subject, word2vec_rel_q_subject, word2vec_q_body, word2vec_rel_q_body]
Пример #9
0
Файл: cvm.py Проект: haldean/cvm
def run(args):
    if args.input_file:
        with open(args.input_file, "r") as input_file:
            source = input_file.read()
    else:
        import sys

        source = sys.stdin.read()

    if args.assemble:
        with open(args.output, "w") as binout:
            write_binary(parse_instructions(source), binout)
    else:
        tree = parse(preprocess(source))
        if args.print_ast:
            print_tree(tree)
        if not tree:
            return

        instructions = link(*translate(tree))
        if args.print_assembly:
            print_instructions(instructions)

        with open(args.output, "w") as binout:
            write_binary(instructions, binout)
Пример #10
0
def load_imgs():
    global imgs
    global wheels

    for p in purposes:
        for epoch_id in epochs[p]:
            print 'processing and loading "{}" datasets {} into memory, current num of imgs is {}...'.format(p, epoch_id, len(imgs[p]))
            vid_path = data_dir +"/dataset{0}/out-mencoder.avi".format(epoch_id) 
            assert os.path.isfile(vid_path)

            frame_count = cm.frame_count(vid_path)
            cap = cv2.VideoCapture(vid_path)

            csv_path = data_dir + "/dataset%i/data.csv" % epoch_id
            assert os.path.isfile(csv_path)
            rows = cm.fetch_csv_data(csv_path)
            assert frame_count == len(rows)
            yy = [[float(row['angle'])] for row in rows]

            while True:
                ret, img = cap.read()
                if not ret:
                    break

                if img.any():
                    img = preprocess.preprocess(img)
                    imgs[p].append(img)


            wheels[p].extend(yy)
            assert len(imgs[p]) == len(wheels[p])

            cap.release()
Пример #11
0
 def put(self, sentence, uid, fbid, timestamp):
     wordList = re.split(r"\s", sentence)
     wordList = [preprocess(word) for word in wordList]
     modelList = [(uid, fbid, word, timestamp) for word in wordList if word != ""]
     self.modelList = self.modelList.union(modelList)
     if(len(self.modelList) >= 1000):
         self.flush()
def classify(c, t, word_data, save=False):
    targets, clf = t[1], c[1]
    target_names = class_labels[np.unique(targets)]
    if len(target_names) == 1: return _, 1
    features_train, features_test, labels_train, labels_test = preprocess(word_data, targets)

    print("\n### CLASSIFICATION using {} ###".format(c[0]))
    t0 = time()
    clf.fit(features_train.toarray(), labels_train)
    print("Training time:", round((time()-t0)/60, 2), "m")

    t1 = time()
    prediction = clf.predict(features_test.toarray())
    print("Prediction time:", round((time()-t1)/60, 2), "m")

    print(classification_report(labels_test, prediction, target_names=target_names))
    acc = accuracy_score(prediction, labels_test)
    print("Accuracy:", acc)

    cm = np.nan_to_num(confusion_matrix(labels_test, prediction))
    norm_cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    score = np.diag(norm_cm)
    if save:
        try:
            joblib.dump((clf, score), "./data/{} Classifier.pkl".format(c[0]))
        except:
            clf.save("./data/{} Classifier".format(c[0]), score)
        plot_confusion_matrix(norm_cm, target_names, title, show=False)

    return clf, score
Пример #13
0
	def onPreview(self):
		f=open(self.compare_url.get(),'r')
		g=open(self.output_url.get(),'w')
		img=cv2.imread(self.source_url.get(),0)
		if(img==None):
		    print url+' does\'nt exist'
		    exit()
		img = pp.preprocess(img)
		im,rot = pp.skew_correction(img)
		line = pp.find_lines(im.copy())
		# print len(linene)
		label_list=it.train.label_unicode()
		q=f.readlines()
		i=0
		num=[]
		for l in line:
		    for w in l.word_list:
		        for c in w.char_list:
		           
		            tup=label_list[int(c.label)]
		            if(q[i][:-1]!=tup):
		                tup=q[i][:-1]
		           
		            g.write(tup)
		           
		            i+=1
		        g.write(' ')
		    g.write('\n')
		f.close()
		g.close()
		
		self.draw_frame3()
Пример #14
0
def run_models():
    """ Run all classification models. """
    # TODO: Vary max_features.
    # feature_range = range(1, 31)
    # scores = {}
    # for i in feature_range:
    #     print "Beginning preprocessing..."
    #     train_test_sets = pp.preprocess(max_features=i, force_load=True)
    #     print "...finished preprocessing"

    #     print "Depth-limited Decision Tree Classifier..."
    #     scores[i] = test_decision_tree_classifier(
    #         train_test_sets, depth_limited=True)

    # max_features = max(feature_range, key=lambda x: scores[x])
    # print "Best max_features: ", max_features

    train_test_sets = pp.preprocess(max_features=10)

    # print "Beginning training..."

    # TODO: Change all metrics to F1-score once we have imbalanced data set.
    # Also change number of folds if necessary.

    # print "Baseline Classifier..."
    # # test_subreddit_baseline_classifier()

    # print "Decision Tree Classifier..."
    # # test_decision_tree_classifier(train_test_sets)

    print "Depth-limited Decision Tree Classifier..."
    test_decision_tree_classifier(train_test_sets, depth_limited=True)
Пример #15
0
def compile(filename):
    logger = yacc.NullLogger()
    yacc.yacc()
    data =preprocess(get_input(filename))
    print data
    ast =  yacc.parse(data,lexer = lex.lex(),debug=1)   
    return ast
Пример #16
0
def make_compare_file():
	f=open('./corrected_docs/Samp_'+str(tno)+'/compare_list_new.txt','w')
	g=open('./corrected_docs/Samp_'+str(tno)+'/output_file_new.txt','w')
	# img=cv2.imread('./Example/dc_books_page.png',0)
	path='./corrected_docs/Samp_'+str(tno)+'/*.png'
	url=glob.glob(path)
	img=cv2.imread(url[0],0)
	# img=cv2.imread('./Samp_3/samp3.png',0)
	if(img==None):
		print 'image does\'nt exist'
		exit()
	img = pp.preprocess(img)
	# im=img
	# im,rot = pp.skew_correction(img)

	line = pp.find_lines(img.copy())
	# print len(linene)
	label_list=train.label_unicode()
	i=0
	num=[]
	for l in line:
		for w in l.word_list:
			for c in w.char_list:
				# num.append((str(i),label_list[int(c.label)]))
				tup=label_list[int(c.label)]
				f.write(tup+'\n')
				g.write(tup)
				# cv2.imwrite('./Samp_22/samp/'+str(i)+'.png',c.data)
				i+=1
			g.write(' ')
		g.write('\n')
	f.close()
	g.close()
def get_feature_matrix(tfidf_matrix, phrase_list, true_keys, first_occurrence, phrase_entropy):
    #X = np.empty((0, len(features)))
    #y = np.empty(0)
    X = []
    y = []
    doc_tfidf_vecs = tfidf_matrix.toarray().tolist() # tfidf matrix

    # lower true keywords
    true_keys = [[preprocess(key) for key in key_list] for key_list in true_keys]

    for doc_id, tfidf_vec in enumerate(doc_tfidf_vecs):
        # traverse the doc vector
        print "--extracting features from doc {}".format(doc_id)
        for i, tfidf in enumerate(tfidf_vec):
            if tfidf != 0: # Why is this case here?
                feature_vec = get_feature_vector(phrase_list[i], tfidf, first_occurrence[doc_id][phrase_list[i]], phrase_entropy[doc_id][phrase_list[i]])
                #X = np.append(X, feature_vec, axis=0)
                X.append(feature_vec)
                #if feature_vec[2] == 0:
                    #print "phrase {} entropy 0 in doc {}".format(phrase_list[i], doc_id)
                label = lambda: 1 if phrase_list[i] in true_keys[doc_id] else 0
                y.append(label())
                #y = np.append(y, label())

    return np.array(X), y
Пример #18
0
	def computerTrainError(self):
		posPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/pos/'
		negPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/neg/'
		testPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/pos/'

		process = preprocess()

		posTrain = []
		negTrain = []
		testData = []

		posTrain = process.getCleanTxt(posPath, 'pos')
		negTrain = process.getCleanTxt(negPath, 'neg')
		testData = process.getCleanTestData(testPath)

		classifier = naiveBayes()
		result = classifier.test(posTrain, negTrain, testData)

		errorCount = 0
		for i in result:
			if i[1] == 'neg':
				errorCount = errorCount + 1

		testPath = '/Users/sunxinzi/Documents/Machine_Learning/Project/Sentiment Prediction/train/neg/'

		testData = []

		testData = process.getCleanTestData(testPath)
		result = classifier.test(posTrain, negTrain, testData)

		for i in result:
			if i[1] == 'pos':
				errorCount = errorCount + 1

		return errorCount
Пример #19
0
def telemetry(sid, data):
    if data:
        # The current steering angle of the car
        steering_angle = data["steering_angle"]
        # The current throttle of the car
        throttle = data["throttle"]
        # The current speed of the car
        speed = data["speed"]
        # The current image from the center camera of the car
        imgString = data["image"]
        image = Image.open(BytesIO(base64.b64decode(imgString)))
		
		#Change the image to be usable by the cv2 module
        image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
		
		#Process the image and use it to predict the steering angle
        image = preprocess.preprocess(image)
        image_array = np.asarray(image)
        steering_angle = model.y.eval(feed_dict={model.x: image_array[None, :, :, :], model.keep_prob: 1.0})[0][0] #Predict the angle

        throttle = controller.update(float(speed))

        print(steering_angle, throttle)
        send_control(steering_angle, throttle)
    else:
        # NOTE: DON'T EDIT THIS.
        sio.emit('manual', data={}, skip_sid=True)
Пример #20
0
def parse_jstruct(filename, include_paths=[]):
    parser = c_parser.CParser()
    with open(filename, 'r') as infile:
        text = infile.read()

    # insert some header includes and a 'do not modify'
    text = re.sub(
        GUARD_HEADERS_EXPR,
        r'\g<0>' + GENERATED + PREPEND_HEADERS,
        text, count=1
    )

    pptext, err = preprocess(text,
        include_paths=include_paths,
        defines=['__attribute__(x)=']
    )
    if err:
        import os
        rel_filename = os.path.relpath(filename)
        err = err.replace('<stdin>', rel_filename)
        raise Exception('C Preprocessor: ' + err)

    ast = parser.parse(pptext, filename=filename)

    return (ast, text)
Пример #21
0
def make_modified_file():
	f=open('./compare_list.txt','r')
	g=open('./output_file.txt','w')
	img=cv2.imread('./Example/dc_books_page.png',0)

	if(img==None):
		print url+' does\'nt exist'
		exit()
	img = pp.preprocess(img)
	im,rot = pp.skew_correction(img)

	line = pp.find_lines(im.copy())
	# print len(linene)
	label_list=train.label_unicode()

	q=f.readlines()
	i=0
	num=[]
	for l in line:
		for w in l.word_list:
			for c in w.char_list:
				# num.append((str(i),label_list[int(c.label)]))
				tup=label_list[int(c.label)]
				if(q[i][:-1]!=tup):
					print tup
				# f.write(tup+'\n')
				g.write(tup)
				# cv2.imwrite('samp/'+str(i)+'.png',c.data)
				i+=1
			g.write(' ')
		g.write('\n')
	f.close()
	g.close()
Пример #22
0
  def __init__(self, schemafile):
    p = etree.XMLParser(remove_comments=True)
    self.tree = etree.parse(cStringIO.StringIO(preprocess.preprocess(schemafile)), p)

    self.callbacks = {'element': self.cb_element,
                      'documentation': self.cb_documentation,
                      'value': self.cb_value,
                      'attribute': self.cb_attribute,
                      'data': self.cb_data,
                      'optional': self.cb_optional,
                      'zeroOrMore': self.cb_zeroormore,
                      'oneOrMore': self.cb_oneormore,
                      'choice': self.cb_choice,
                      'empty': self.cb_empty,
                      'list': self.cb_list,
                      'group': self.cb_group,
                      'interleave': self.cb_group,
                      'name': self.cb_name,
                      'text': self.cb_text,
		      'anyName' : self.cb_anyname,
		      'nsName' : self.cb_nsname,
		      'except' : self.cb_except,
                      'ignore' : self.cb_ignore,
                      'notAllowed' : self.cb_notallowed}
                      
    self.lost_eles  = []
    self.added_eles = []
    self.lost_attrs  = []
    self.added_attrs = []
  
    return
def main(argv):
    if len(argv) < 2 or len(argv) > 3:
        print "Wrong number or arguements"
        sys.exit(2)

    # Get command line arguements
    input_file_name = argv[0]
    output_file_name = argv[len(argv) - 1]  # Last arguement

    input_file = open(input_file_name, "r")
    data = input_file.readlines()
    input_file.close()

    if len(argv) == 3:  # Optional arguement
        group_number = int(argv[1])

        # TODO: Make 5500 and 800,000 constants?
        first_half = data[group_number * 5500 : (group_number + 1) * 5500]
        last_half = data[800000 + group_number * 5500 : 800000 + (group_number + 1) * 5500]
        data = first_half + last_half

    # Process the tweet as per the steps in the assignment
    processed_tweets = preprocess.preprocess(data)

    # write results to the file
    output_file = open(output_file_name, "w")
    output_file.write(processed_tweets)
    output_file.close()
Пример #24
0
    def getFeature(self, ori_q,rel_q):
        ori_q[0]=preprocess(ori_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)
        ori_q[1]=preprocess(ori_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)

        rel_q[0]=preprocess(rel_q[0],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)
        rel_q[1]=preprocess(rel_q[1],no_stopwords=True,bigram=self.bigram,trigram=self.trigram)

        word2vec_q_subject=self.getWordVectorFeatures(ori_q[0])
        word2vec_q_body=self.getWordVectorFeatures(ori_q[1])
        word2vec_q=np.concatenate((word2vec_q_subject,word2vec_q_body),axis=0)

        word2vec_rel_q_subject=self.getWordVectorFeatures(rel_q[0])
        word2vec_rel_q_body=self.getWordVectorFeatures(rel_q[1])
        word2vec_rel_q=np.concatenate((word2vec_rel_q_subject,word2vec_rel_q_body),axis=0)

        return np.concatenate((word2vec_q,word2vec_rel_q),axis=0)
Пример #25
0
def test():
	load()
	count,correct=0,0
	url='../samples/train_images/'
	for i in range(101,150):
		s_list=glob.glob(url+str(i)+'/*.png')
		for j in s_list:
			imgo=cv2.imread(j,0)
			img=pp.preprocess(imgo.copy())
			f = find_feature(img.copy())
			fu= np.array(f,np.float32)
			# print len(fu)
			t = classifier.predict(fu)
			print label_uni[i-100],label_uni[int(t)],int(i-100==t)
			if(i-100==t):
				correct+=1
			else:
				name = './zerr_'+str(i)+'_'+str(count)+'.png'
				print j
				print count
#				cv2.imwrite('./zerr_'+str(i)+'_'+str(count)+'z.png',img)
				shutil.copyfile(j,name)
			count+=1
	print 'accuracy :'+str(100.0*correct/count)+'%'
	print ('accurate recognition :'+str(correct))
	print ('total character tested :'+str(count))
Пример #26
0
def train_svm():

	# CV2 SVM
	svm_params = dict( kernel_type = cv2.SVM_RBF,
	                    svm_type = cv2.SVM_C_SVC,
	                    C=9.34, gamma=15.68 )
	svm=cv2.SVM()
	label_list=[]
	label_list.append('a')
	url='train_images/'
	train_set = []
	s_list=sorted(os.listdir(url))
	label = 0
	for i in s_list:
		s_list=glob.glob(url+i+'/*.png')
		# if(len(s_list)>25):
		if(len(s_list)>500):
			file=open(url+i+'/utf8',"r")
			i_uni=file.read()
			i_uni=i_uni[:-1]
			label_list.append(i_uni)
			label+=1
		else:
			continue
		print str(label),i,label_list[label],len(s_list)
		int test=10;
		for j in s_list:
			
			if(!test-=1)
				break;
			img=cv2.imread(j,0)
			img=pp.preprocess(img)
			f =train.find_feature(img.copy())
			# print len(f)
			s = [label,f]
			train_set.append(s)
	f=open('label','w')
	for l in label_list:
		f.write(l+'\n')
	f.close()

	shuffle(train_set)
	f_list = []
	label = []
	for t in train_set:
		label.append(t[0])
		f_list.append(t[1])
#	np.savetxt('feature.txt',f_list)
#	np.savetxt('label.txt',label)
#	samples = np.loadtxt('feature.txt',np.float32)
#	responses = np.loadtxt('label.txt',np.float32)
#	responses = responses.reshape((responses.size,1))  
	samples = np.array(f_list,np.float32)
	responses = np.array(label,np.float32)
	print 'auto training initiated'
	print 'please wait.....'
	svm.train(samples,responses,params=svm_params)
	# svm.train_auto(samples,responses,None,None,params=svm_params)
	svm.save("svm_class.xml")
Пример #27
0
    def vector(self, text):
	seg = segment(preprocess(text))
	word_count = len(seg)
	x = [0] * self.n
	for w in seg:
	    if w in self.dic:
		x[self.dic[w]] += 1.0 / word_count * math.log(self.doc_count / self.df[w])
	return x
def detectPlates(imgOriginal):

    # cv.ShowImage("image", image)

    imgGrayscale, imgThresh = preprocess.preprocess(imgOriginal)

    # cv.ShowImage("imgGrayscale", imgGrayscale)
    # cv.ShowImage("imgThresh", imgThresh)
    # cv.WaitKey()

    # ------------------------------------------------------------------

    listOfPossibleChars = findPossibleChars(imgGrayscale, imgThresh)

    # print "len of listOfPossibleChars = " + str(len(listOfPossibleChars))     # 246

    # for possibleChar in listOfPossibleChars:
    #     imageToDrawContourOn = cv.CreateImage(cv.GetSize(imgGrayscale), cv.IPL_DEPTH_8U, 1)
    #     cv.DrawContours(imageToDrawContourOn, possibleChar.contour, 255, 255, 1000, 2)
    #     cv.ShowImage("imageToDrawContourOn", imageToDrawContourOn)
    #     cv.WaitKey()
    # # end for

    # imageToDrawContourOn = cv.CreateImage(cv.GetSize(imgGrayscale), cv.IPL_DEPTH_8U, 1)
    #
    # for possibleChar in listOfPossibleChars:
    #     cv.DrawContours(imageToDrawContourOn, possibleChar.contour, 255, 255, 1000, 1)
    # # end for
    #
    # cv.ShowImage("imageToDrawContourOn", imageToDrawContourOn)
    # cv.WaitKey()

    # ------------------------------------------------------------------

    listOfListsOfMatchingChars = findListOfListsOfMatchingChars(listOfPossibleChars)

    # print "len of listOfListsOfMatchingChars = " + str(len(listOfListsOfMatchingChars))

    # ------------------------------------------------------------------

    imgListOfPlates = []

    for listOfListsOfMatchingChars in listOfListsOfMatchingChars:
        imgPlate = extractPlate(imgOriginal, listOfListsOfMatchingChars)
        if imgPlate is not None:
            imgListOfPlates.append(imgPlate)
        # end if
    # end for

    # debugCount = 0
    # for plate in listOfPlates:
    #     cv.ShowImage("plate" + str(debugCount), plate)
    #     debugCount = debugCount + 1
    # # end for
    #
    # cv.WaitKey()

    return imgListOfPlates
def _testOneInputFile(self, fname):
    import preprocess

    infile = os.path.join('inputs', fname) # input
    reffile = os.path.join('outputs', fname) # expected output
    outfile = os.path.join('tmp', fname) # actual output
    errfile = os.path.join('outputs', fname+'.err')  # expected error
    optsfile = os.path.join('inputs', fname+'.opts') # input options

    # Determine input options to use, if any.
    opts = {}
    if os.path.exists(optsfile):
        for line in open(optsfile, 'r').readlines():
            if line[-1] == "\n": line = line[:-1]
            name, value = line.split('=', 1)
            try:
                value = eval(value)
            except NameError:
                pass
            opts[name] = value
        #print "options from '%s': %s" % (optsfile, pprint.pformat(opts))

    # If there is no reference output file this means that processing
    # this file is expected to fail.
    if os.path.exists(reffile):
        ref = open(reffile, 'r').readlines()
        if not sys.platform.startswith("win"):
            ref = [line.replace('\\','/') for line in ref] # use Un*x paths
        preprocess.preprocess(infile, outfile, **opts)
        out = open(outfile, 'r').readlines()
        if ref != out:
            diff = list(difflib.ndiff(ref, out))
            self.fail("%r != %r:\n%s"\
                      % (reffile, outfile, pprint.pformat(diff)))
    elif os.path.exists(errfile):
        err = open(errfile, 'r').read()
        if not sys.platform.startswith("win"):
            err = err.replace('\\','/') # use Un*x paths
        try:
            preprocess.preprocess(infile, outfile, **opts)
        except preprocess.PreprocessError, ex:
            #print "XXX ex: %s" % str(ex).strip()
            self.failUnlessEqual(err.strip(), str(ex).strip())
        else:
            self.fail("No PreprocessError when expected one.")
Пример #30
0
 def __init__(self,filename):
     self.filename = filename
     self.tag_array,self.tag_dict, self.words_dict = preprocess(filename)
     self.num_of_tags = len(self.tag_array) #including START and END
     self.num_of_words = len(self.words_dict.keys()) #including START and END
     self.transition = np.zeros((self.num_of_tags,self.num_of_tags))
     self.emission_prob = np.zeros((self.num_of_tags, self.num_of_words))
     self.__model_initialization()
     self.start_prob = self.transition[self.tag_dict['START'],:]
Пример #31
0
def mymethod():
    original_train_data = pd.read_csv("../datasets/train_set.csv", sep="\t")

    clf = MultinomialNB()

    X = preprocess(original_train_data)

    le = preprocessing.LabelEncoder()
    y = le.fit_transform(original_train_data['Category'])

    cv = CountVectorizer(stop_words=STOPWORDS)
    X = cv.fit_transform(X)

    ksplits = 10
    kf = KFold(n_splits=ksplits, shuffle=False)

    precs = 0
    recs = 0
    f1s = 0
    accs = 0

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)

        precs += precision_score(y_test, predictions, average='micro')
        recs += recall_score(y_test, predictions, average='micro')
        f1s += f1_score(y_test, predictions, average='micro')
        accs += accuracy_score(y_test, predictions)

    avgprec = precs / ksplits
    avgrec = recs / ksplits
    avgf1 = f1s / ksplits
    avgacc = accs / ksplits

    scores = list()
    scores.append(avgacc)
    scores.append(avgprec)
    scores.append(avgrec)
    scores.append(avgf1)

    return scores
def load_batch(purpose):
    global current_batch_id
    xx = []
    yy = []

    # fetch the batch definition
    batch_id = current_batch_id[purpose]
    assert batch_id < len(batches[purpose])
    batch = batches[purpose][batch_id]
    epoch_id, frame_start, frame_end = batch['epoch_id'], batch[
        'frame_start'], batch['frame_end']
    assert epoch_id is not None and frame_start is not None and frame_end is not None

    # update the current batch
    current_batch_id[purpose] = (current_batch_id[purpose] + 1) % len(
        batches[purpose])

    # fetch image and steering data
    vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id))
    assert os.path.isfile(vid_path)
    frame_count = cm.frame_count(vid_path)
    cap = cv2.VideoCapture(vid_path)
    cm.cv2_goto_frame(cap, frame_start)

    csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id))
    assert os.path.isfile(csv_path)
    rows = cm.fetch_csv_data(csv_path)
    assert frame_count == len(rows)
    yy = [[float(row['wheel'])] for row in rows[frame_start:frame_end + 1]]

    for frame_id in xrange(frame_start, frame_end + 1):
        ret, img = cap.read()
        assert ret

        img = preprocess.preprocess(img)

        #cv2.imwrite(os.path.abspath('output/sample_frame.jpg'), img)

        xx.append(img)

    assert len(xx) == len(yy)

    cap.release()

    return xx, yy
Пример #33
0
def doshit(positives, poswordcount, numpostweets, negatives, negwordcount, numnegtweets, vocabsize):
	inputfile = open(os.path.dirname(__file__) + "trumptweets.csv", "r")
	# used to compute accuracy
	pcount = 0
	ncount = 0
	count = 1

	alldata = []

	for line in inputfile:
		line = line[:-2]
		line = line.strip("\"")

		# read in custom file
		symbol, date, time, tweet, sentiment = line.split("\",\"")
		date = date.split(" ")
		date = Date(date[1], date[0], date[2])
		data = TweetData(symbol, date, time, tweet)

		tokens = preprocess.preprocess(tweet)
		probpos = math.log(numpostweets / (numpostweets + numnegtweets))
		probneg = math.log(numnegtweets / (numpostweets + numnegtweets))
		for token in tokens:
			if token in positives:
				probpos += math.log(positives[token])
			else:
				probpos += math.log(1.0 / (poswordcount + vocabsize))
			if token in negatives:
				probneg += math.log(negatives[token])
			else:
				probneg += math.log(1.0 / (negwordcount + vocabsize))
		data.correctsentiment = sentiment
		
		if probpos >= probneg:
			data.sentiment = "positive"
			if sentiment != "positive":
				pcount += 1
		else:
			data.sentiment = "negative"
			if sentiment != "negative":
				ncount += 1
		count += 1
		alldata.append(data)
	print "Text Sentiment Accuracy: ", float(count - pcount - ncount) / float(count)
	return alldata
Пример #34
0
def load_imgs():
    global imgs
    global wheels

    for p in purposes:
        for epoch_id in epochs[p]:
            print ('processing and loading "{}" epoch {} into memory, current num of imgs is {}...'.format(p, epoch_id, len(imgs[p])))

            # vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id))
            vid_path = cm.jn(data_dir, 'out-video-{}.avi'.format(epoch_id))

            assert os.path.isfile(vid_path)

            frame_count = cm.frame_count(vid_path)

            cap = cv2.VideoCapture(vid_path)

            # csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id))
            csv_path = cm.jn(data_dir, 'out-key-{}.csv'.format(epoch_id))
            assert os.path.isfile(csv_path)

            rows = cm.fetch_csv_data(csv_path)
            print ("{}, {}".format(len(rows), frame_count))
            assert frame_count == len(rows)
            yy = [[float(row['wheel'])] for row in rows]

            print ("{}, {}".format(len(imgs[p]), len(wheels[p])))
            
            while True:
                ret, img = cap.read()
                if not ret:
                    break

                img = preprocess.preprocess(img)
                imgs[p].append(img)
            

            wheels[p].extend(yy)

            while len(imgs[p]) < len(wheels[p]):
                wheels[p].pop()

            assert len(imgs[p]) == len(wheels[p])

            cap.release()
Пример #35
0
    def __iter__(self):
        train = codecs.open(self.filename, 'r', 'utf-8')
        papers = []
        for line in train:
            line = line.replace("###FORMULA###", "||FORMULA||")
            line = line.replace("###TABLE###", "||TABLE||")
            line = line.replace("###FIGURE###", "||FIGURE||")

            map = line.split('\t')
            paper_id = map[0]
            summary = map[3]
            print(paper_id)

            lines = tokenizer.tokenize(summary)
            for uid, line in enumerate(lines):
                yield gensim.models.doc2vec.LabeledSentence(
                    words=preprocess(summary),
                    tags=['ABS_' + str(paper_id) + '_' + str(uid)])
Пример #36
0
def run_som_pcn():
    '''
    Runs a perceptron using the activations from a SOM. 
    The initial data is split into two sets, one for use in the SOM, and the other for use in the perceptron. 
    '''
    x = preprocess.preprocess('Pollens')
    pollen = np.array(x.create_one_file(SIMPLE_GRASS))
    pollen = x.normalise_max(pollen)
    som_train_set, som_train_set_target, pcn_set, pcn_set_target, empty_set, empty_set_target = x.make_groups(
        pollen,
        LABEL_SIZE,
        algorithm='mlp',
        train_size=300,
        test_size=350,
        validation_size=0)
    net = som.som(5, 5, som_train_set)
    net.somtrain(som_train_set, 300)
    net.run_perceptron(pcn_set, pcn_set_target, train_size=200, test_size=150)
Пример #37
0
def main():
    object = ps.preprocess()
    X_train, X_test, y_train, y_test = object.cleaning()
    param_grid = {
                'objective': ['binary:logistic'],
                'nround': [1000],
                'max_depth': [8]
    }
    estimator = xgb.XGBRegressor()
    grid_search = GridSearchCV(estimator, param_grid, verbose=2, cv=2,  n_jobs=-1)
    client = Client(processes=False)
    start_time = time.time()
    with joblib.parallel_backend("dask"):
        grid_search.fit(X_train, y_train)
    end_time = time.time()
    grid_search.predict(X_test)
    print ("time difference in GridSearchCV first XGBRegressor is %d seconds" % end_time)
    client.shutdown()
Пример #38
0
def index():
    try:
        content = request.get_json()
        path2test = content['path_to_test_csv']
        path2model = content['path_to_model']
        path2ohe = content["path_to_onehotencoder"]
        path2scaler = content["path_to_scaler"]
        path2poly = content["path_to_poly"]
        scaler = load_model(path2scaler)
        poly = load_model(path2poly)
        ohe = load_model(path2ohe)
        model = load_model(path2model)
        X_test, y_test, idx = preprocess(path2test, ohe, scaler, poly)
        prediction = predict(X_test, model)
    except:
        return redirect(url_for('bad_request'))
    #return jsonify(json.loads(pd.Series(prediction, index=idx).to_json()))
    return pd.Series(prediction, index=idx).to_json()
Пример #39
0
def cross_validation():
    from sklearn.model_selection import KFold
    new_dataset = 1
    if new_dataset:
        df = pd.read_csv('./dataset/af1and5.csv')
        paths = df['path']
        classes = df['class']
        X, y = preprocess(paths, classes)
        np.save('X', X)
        np.save('y', y)
    else:
        X = np.load('X.npy')
        y = np.load('y.npy')

    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
Пример #40
0
    def gender_predict(self, features: dict):
        # Convert the jsonrequest to dict
        X_dict = {"Favorite Color": features["Favorite Color"],
                  "Favorite Music Genre": features["Favorite Music Genre"],
                  "Favorite Beverage": features["Favorite Beverage"],
                  "Favorite Soft Drink": features["Favorite Soft Drink"]}

        # Convert the dict request to dataframe
        X_df = pd.DataFrame([X_dict])
        X_df = prepro.preprocess(X_df)
        # Get the columns dummies not inquire
        X_df = X_df.reindex(labels=self.X.columns, axis=1).fillna(0)
        prediction = self.clf.predict_proba(X_df)
        dict_res = {"class": self.gender_type[np.argmax(prediction)],
                    'probabilité': round(max(prediction[0]), 2)}
        text_res = f"L'algorithme prédit que vous êtes un(e) {self.gender_type[np.argmax(prediction)]}\n" \
                   f"avec une probabilité de {round(max(prediction[0]), 2) * 100}%"
        return text_res
Пример #41
0
 def get_coeff(self):
     X, Y, x_mean, y_mean, x_std, y_std, X_test, Y_test = preprocess(
         "3D_spatial_network.csv")
     theta = np.random.rand(3)
     ntheta, ith, coh = self.grad_desc(X, Y, theta)
     print(ntheta)
     predictions = np.dot(X, ntheta)
     print("RMSE Train: ", sqrt(mean_squared_error(Y, predictions)))
     print("R2 Score Train: ", r2_score(Y, predictions))
     predictions = np.dot(X_test, ntheta)
     print("RMSE Test: ", sqrt(mean_squared_error(Y_test, predictions)))
     print("R2 Score Test: ", r2_score(Y_test, predictions))
     plt.plot(ith, coh)
     plt.xlabel("Iterations")
     plt.ylabel("Cost")
     plt.title("L1 Gradient Desc")
     plt.show()
     return ntheta, X_test, Y_test
Пример #42
0
def test_sample_input_correctType():
    raw = mne.io.read_raw_edf('/Users/raphaelbechtold/Documents/MATLAB/Automagic/automagic/data/Subj1/S001R01.edf')

    params = {'line_noise' : 50, \
              'filter_type' : 'high', \
              'filt_freq' : None, \
              'filter_length' : 'auto', \
              'eog_index' : -1, \
              'lam' : -1,
              'tol' : 1e-7,
              'max_iter': 1000
             }


    eeg,fig1,fig2 = preprocess(raw, params)
    assert(type(eeg) == mne.io.edf.edf.RawEDF)
    assert(type(fig1) == type(plt.figure()))
    assert(type(fig2) == type(plt.figure()))
def createTextdictionary(X_train):
    i = 2
    count = {}
    for data in X_train:
        word_array = preprocess(data)
        for word in word_array:
            if not word in count:
                count[word] = 1
            else:
                count[word] = count[word] + 1
    count = sorted(count.items(), key=lambda item: item[1], reverse=True)
    for word, value in count:
        if not word in dictionary:
            dictionary[word] = i
            i = i + 1
        if i >= num_words + 2:  # Only take top 15000 words with most occurrence.
            break
    save_dictionary(dictionary, 'dictionary.pkl')
Пример #44
0
def load_imgs_v2():
    global imgs
    global wheels

    for epoch_id in epochs['all']:
        print('processing and loading epoch {} into memorys. train:{}, val:{}'.
              format(epoch_id, len(imgs['train']), len(imgs['val'])))

        # vid_path = cm.jn(data_dir, 'epoch{:0>2}_front.mkv'.format(epoch_id))
        vid_path = cm.jn(data_dir, 'out-video-{}.avi'.format(epoch_id))

        if not os.path.isfile(vid_path):
            continue

        frame_count = cm.frame_count(vid_path)
        cap = cv2.VideoCapture(vid_path)

        # csv_path = cm.jn(data_dir, 'epoch{:0>2}_steering.csv'.format(epoch_id))
        csv_path = cm.jn(data_dir, 'out-key-{}.csv'.format(epoch_id))
        assert os.path.isfile(csv_path)

        rows = cm.fetch_csv_data(csv_path)
        print("{}, {}".format(len(rows), frame_count))
        assert frame_count == len(rows)

        for row in rows:
            ret, img = cap.read()
            if not ret:
                break

            img = preprocess.preprocess(img)
            angle = float(row['wheel'])

            if random.random() < params.train_pct:
                imgs['train'].append(img)
                wheels['train'].append([angle])
            else:
                imgs['val'].append(img)
                wheels['val'].append([angle])

        cap.release()

    print('Total data: train:{}, val:{}'.format(len(imgs['train']),
                                                len(imgs['val'])))
Пример #45
0
def trainWord2Vec(filename):
    # Read the file data for training
    file = open(filename,'r');
    count = 0;
    lines = [];
    while count < 339:
        try:
            line = (file.readline());
            if line is not '':
                lines.append(line);
                line = '';
        except UnicodeDecodeError:
            continue;
        count += 1;
    trainData = [];
    #print(len(lines));
    #print(lines);
    file = open('text.txt','w');
    file.writelines(lines);
    file.close();
    targetWord = (lines[0].split('\t'))[0];
    # Preprocess the data for wrod2vec model    
    for line in lines:
        sent = preprocess.preprocess(line);
        if sent is not None:
            trainData.append(sent);
    # Log statement for training progress monitoring
    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO);
    # Train the model
    num_features = 300;
    min_word_count = 1;
    num_workers = 4;
    context = 10;
    downsampling = 1e-3;
    model = word2vec.Word2Vec(trainData, workers=num_workers, size=num_features, min_count = min_word_count, window = context, sample = downsampling);
    model.init_sims(replace=True);
    # Save the model    
    model_name = "modelWord2Vec_"+targetWord;
    model.save(model_name);
    # Print similarity of the target word
    print('Words that are most similar to',targetWord,'in the dataset');
    print('__________________________________________________________________');
    print(model.most_similar(targetWord));
    print('__________________________________________________________________');
Пример #46
0
def process_epoch(epoch_id):
    print('---------- processing video for epoch {} ----------').format(epoch_id)
    vid_path = cm.jn(params.data_dir, 'out-video-{}.avi'.format(epoch_id))
    frame_count = cm.frame_count(vid_path)        
    
    vid_scaled_path = cm.jn(params.data_dir, 'out-video-{}-scaled.avi'.format(epoch_id))
    if not os.path.exists(vid_scaled_path):
        assert os.path.isfile(vid_path)
        os.system("ffmpeg -i " + vid_path + " -vf scale=1280:720 " + vid_scaled_path)
        print("ffmpeg -i " + vid_path + " -vf scale=1280:720 " + vid_scaled_path)
    vid_path = vid_scaled_path
    
    cap = cv2.VideoCapture(vid_path)

    machine_steering = []

    print('performing inference...')
    time_start = time.time()
    for frame_id in range(frame_count):
        ret, img = cap.read()
        assert ret

        prep_start = time.time()
        img = preprocess.preprocess(img)

        pred_start = time.time()
        rad = model.y.eval(feed_dict={model.x: [img], model.keep_prob: 1.0})[0][0]
        deg = rad2deg(rad)
        pred_end   = time.time()

        prep_time = pred_start - prep_start
        pred_time = pred_end - pred_start

        # print 'pred: {} deg. took {} ms'.format(deg, pred_time * 1000)
        # print 'pred: {} deg (rad={})'.format(deg, rad)

        machine_steering.append(deg)

    cap.release()

    fps = frame_count / (time.time() - time_start)
    print ('completed inference, total frames: {}, average fps: {} Hz'.format(frame_count, round(fps, 1)))
    # print "Machine Steering:", machine_steering
    return machine_steering
Пример #47
0
def strongly_test():
    net = preprocess(example_graph)
    print(net)
    initial_marking = Marking()
    final_marking = Marking()
    for place in net.places:
        if place.name != "p2":
            initial_marking[place] = 1
        else:
            final_marking[place] = 1

    parameters = {
        pn_visualizer.Variants.WO_DECORATION.value.Parameters.FORMAT: "svg"
    }
    gviz = pn_visualizer.apply(net,
                               initial_marking,
                               final_marking,
                               parameters=parameters)
    pn_visualizer.save(gviz, "alpha.svg")
Пример #48
0
def build_corpus(files):
    thirty_second_chunks = {'transcript': [], 'startTime': [], 'file_Name': []}
    for file in files:
        filename = os.fsdecode('C:/../' + file)
        # Opening JSON file
        jsonfile = open(filename, )
        # returns JSON object as a dictionary
        data = json.load(jsonfile)
        # Iterating through the json list
        for results in data['results']:
            for alternatives in results['alternatives']:
                if alternatives.get('transcript'):
                    transcript = ' '.join(
                        p.preprocess(alternatives.get('transcript')))
                    thirty_second_chunks['transcript'].append(transcript)
                    thirty_second_chunks['startTime'].append(
                        alternatives.get('words')[0].get('startTime'))
                    thirty_second_chunks['file_Name'].append(file)
    return thirty_second_chunks
Пример #49
0
def result():
    if request.method == 'POST':
        result = request.form
        print(result.to_dict())
        res = result.to_dict()
        res["tenure"] = int(res["tenure"])
        res["MonthlyCharges"] = float(res["MonthlyCharges"])
        df = pd.DataFrame([res])
        # apply the preprocessing (one hot encoding, etc to the input)
        sample = preprocess(df)
        # load the model
        model = load('LR.joblib')
        pred = model.predict(sample)
        print(pred)
        if pred[0] == 1:
            data = {"churn": "Yes"}
        else:
            data = {"churn": "No"}
        return render_template("result.html", data=data, result=result)
Пример #50
0
def main(args):
    """
    Excute detect lines process

    :param None: argment is None
    """
    # read img and preprocess
    pre_img, rawImg = preprocess(args)

    # detect edges
    #edges_img = canny(pre_img, args)

    # hough trasform
    lines = hough(pre_img, rawImg, args)

    # postprocess
    ## sort lines
    if (not args.opt):
        sortedLines = sortLines(lines)
Пример #51
0
def run():
    """
    Runs entirety of model: trains, checkpoints, tests.
    """
    # Get the data.
    images = preprocess.preprocess(k_data_path)

    # Create the model.
    generator, discriminator = setup_model()

    # Global canonical latent state for testing
    test_latent_state = tf.random.normal([3, generator.latent_dimension], seed=1)

    # Train the model
    k_epochs = 5000
    train(generator, discriminator, images, test_latent_state, k_epochs)

    # View an example
    view(generator, test_latent_state)
Пример #52
0
def test():
    image_url = request.form.get("image_url")
    file_dir = os.path.join(basedir, app.config['UPLOAD_FOLDER'])
    if not os.path.exists(file_dir):
        os.makedirs(file_dir)
    if image_url and allowed_file(image_url):
        fname = secure_filename(image_url)
        ext = fname.rsplit('.', 1)[1]
        new_filename = str(uuid.uuid3(uuid.NAMESPACE_URL, fname)) + '.' + ext
        image_path = os.path.join(file_dir, new_filename)
        urllib.request.urlretrieve(image_url, filename=image_path)
        # image_url.save(os.path.join(file_dir, new_filename))
        image = preprocess(image_path)
        # predict = densenet.predict(image)
        predict = 'not for now'
        return jsonify({"success": 0, "msg": "上传成功", "predict": predict})

    else:
        return jsonify({"error": 1001, "msg": "上传失败"})
Пример #53
0
def main(preproc=True):
    zipin = zipfile.ZipFile(sys.argv[1])

    if preproc:
        doc = preprocess(zipin.open('word/document.xml'), debug=True)
    else:
        doc = zipin.read('word/document.xml').decode('utf-8')

    processed_doc = render(doc, json.load(sys.stdin))

    print(processed_doc)

    target = 'Processed_' + sys.argv[1]
    outzip = zipfile.ZipFile(target, "w")
    for fileinfo in zipin.infolist():
        if fileinfo.filename != 'word/document.xml':
            outzip.writestr(fileinfo, zipin.read(fileinfo))
        else:
            outzip.writestr('word/document.xml', processed_doc)
Пример #54
0
def run(classifier, proto=False):

    ## Reading input
    X_train = pd.read_csv('files/X_train_aug.csv').drop('Unnamed: 0', axis=1)
    X_test = pd.read_csv('files/X_test_aug.csv').drop('Unnamed: 0', axis=1)
    y_train = pd.read_csv('files/y_train.csv').drop('id', axis=1)

    X_columns = X_train.columns.values
    y_columns = y_train.columns.values

    ## Splitting for validation
    if proto:
        X_train, X_test, y_train, y_test = preprocess(X_train, y_train)
        print("Finished splitting")

    # sampling adds one row more to X_train for some reason... also make generic
    # this part is probably not needed for rfcs as it does it itself sometimes
    X_train, y_train = sampling(X_train, y_train, X_columns, y_columns)
    print("Finished sampling")

    print(np.shape(X_train))
    print(np.shape(X_test))
    print(np.shape(y_train))

    ## Trainining
    OvRClassifier = OneVsOneClassifier(classifier)
    OvRClassifier.fit(X_train, y_train)
    print("Finished training")

    ## Predicting
    y_predict = OvRClassifier.predict(X_test)
    if proto:
        print(f1_score(y_test, y_predict, average='weighted'))

    ## Writing output
    if not proto:
        output = pd.read_csv('files/sample.csv')
        for i in range(output.shape[0]):
            output.iat[i, 1] = y_predict[i]
        output.to_csv(
            f"outputs/{OvRClassifier.__class__.__name__}.{classifier.__class__.__name__}.csv",
            index=False)
    print("Finished predicting")
Пример #55
0
def trainWord2Vec(filename):

    file = open(filename, 'r')
    count = 0
    lines = []
    while count < 339:
        try:
            line = (file.readline())
            if line is not '':
                lines.append(line)
                line = ''
        except UnicodeDecodeError:
            continue
        count += 1
    trainData = []

    file = open('text.txt', 'w')
    file.writelines(lines)
    file.close()
    targetWord = (lines[0].split('\t'))[0]
    for line in lines:
        sent = preprocess.preprocess(line)
        if sent is not None:
            trainData.append(sent)

    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',
                        level=logging.INFO)

    num_features = 300
    min_word_count = 1
    num_workers = 4
    context = 10
    downsampling = 1e-3
    model = word2vec.Word2Vec(trainData,
                              workers=num_workers,
                              size=num_features,
                              min_count=min_word_count,
                              window=context,
                              sample=downsampling)
    model.init_sims(replace=True)

    model_name = "modelWord2Vec_" + targetWord
    model.save(model_name)
Пример #56
0
def main():
    config = ConfigXT()
    load = FileXT(config.audio_path)

    print(
        colored('Preprocessing audio for ', 'blue', attrs=['bold']) +
        load.basename)
    data = preprocess.preprocess(load.filename,
                                 config.speaker,
                                 config,
                                 verbose=False)
    dataloader = dataprocess.load_infer(data)

    model = Tacotron(config)
    model.load_state_dict(
        torch.load(config.model_path, map_location='cpu')['state_dict'])
    model = set_device(model, config.device)
    model.eval()

    print(
        colored('Generating mel-spectrogram with ', 'blue', attrs=['bold']) +
        config.model_path)
    mel = []
    y_prev = set_device(torch.zeros(1, config.mel_size, 1), config.device)
    for batch in tqdm(dataloader, leave=False, ascii=True):
        x, y_prev, _ = set_device(batch, config.device)

        y_gen, _ = model(x, y_prev)
        mel.append(y_gen.data)
        y_prev = y_gen[..., -1].unsqueeze(-1)

    mel = torch.cat(mel, dim=-1)
    if config.vocoder == 'wavernn':
        wave = wavernn_infer(mel, config)
    elif config.vocoder == 'waveglow':
        wave = waveglow_infer(mel, config)

    savename = config.model_path.replace('.pt', '_') + FileXT(
        config.vocoder_path).basestem + '_speaker' + str(
            config.speaker) + '_' + load.basename
    torchaudio.save(savename, wave, config.sample_rate)

    print(colored('Audio generated to ', 'blue', attrs=['bold']) + savename)
def analyse_datasets(dataset:str, model:str='cca', dataset_args:dict=None, loader_args:dict=None, preprocess_args:dict=None, clsfr_args:dict=None):
    """analyse a set of datasets (multiple subject) and generate a summary decoding plot.

    Args:
        dataset ([str]): the name of the dataset to load
        model (str, optional): The type of model to fit. Defaults to 'cca'.
        dataset_args ([dict], optional): additional arguments for get_dataset. Defaults to None.
        loader_args ([dict], optional): additional arguments for the dataset loader. Defaults to None.
        clsfr_args ([dict], optional): additional aguments for the model_fitter. Defaults to None.
    """    
    if dataset_args is None: dataset_args = dict()
    if loader_args is None: loader_args = dict()
    if clsfr_args is None: clsfr_args = dict()
    loader, filenames, dataroot = get_dataset(dataset,**dataset_args)
    scores=[]
    decoding_curves=[]
    nout=[]
    for i, fi in enumerate(filenames):
        print("{}) {}".format(i, fi))
        #try:
        if 1:
            X, Y, coords = loader(fi, **loader_args)
            if preprocess_args is not None:
                X, Y, coords = preprocess(X, Y, coords, **preprocess_args)
            score, decoding_curve, _, _ = analyse_dataset(X, Y, coords, model, **clsfr_args)
            nout.append(Y.shape[-1] if Y.ndim<=3 else Y.shape[-2])
            scores.append(score)
            decoding_curves.append(decoding_curve)
            del X, Y
            gc.collect()
        #except Exception as ex:
        #    print("Error: {}\nSKIPPED".format(ex))
    avescore=sum(scores)/len(scores)
    avenout=sum(nout)/len(nout)
    print("\n--------\n\n Ave-score={}\n".format(avescore))
    # extract averaged decoding curve info
    int_len, prob_err, prob_err_est, se, st = flatten_decoding_curves(decoding_curves)
    print("Ave-DC\n{}\n".format(print_decoding_curve(np.mean(int_len,0),np.mean(prob_err,0),np.mean(prob_err_est,0),np.mean(se,0),np.mean(st,0))))
    plot_decoding_curve(int_len,prob_err)
    plt.suptitle("{} ({}) AUDC={:3.2f}(n={} ncls={})\nloader={}\nclsfr={}({})".format(dataset,dataset_args,avescore,len(scores),avenout-1,loader_args,model,clsfr_args))
    plt.savefig("{}_decoding_curve.png".format(dataset))
    plt.show()
Пример #58
0
def loadValDataFromFile(valFile, size=224, percentage=1.0):
    '''
    load validation data from a txt file.
    the format of valFile: "spu_id img_path category_id"
    percentage: only take percentage*100% of whole images in valFile as validation set.
    percentage = 0.02 is usually used as validation set in learning curve
    size: image will be resize to size*size
    return data, label
    data: Variable(torch.FloatTensor) ( num of images , 3, size, size)
    label: torch.LongTensor (num of images)
    '''
    val = open(valFile, 'r')

    count = 0
    for line in val:
        count += 1
    valsize = int(count * percentage)

    val.seek(0)

    valy = []
    valx = torch.empty(valsize, 3, size, size)

    count = 0
    for line in val:
        if count < valsize:
            spuid, img, cat = line.strip().split(" ")
            impTmp = preprocess.readImage(img)
            imgTmp2 = preprocess.preprocess(imgTmp, size)
            imgout = torch.from_numpy(imgTmp2).permute(0, 3, 1, 2).type(
                torch.FloatTensor)
            valy.append(int(cat))
            valx[count:(count + 1)] = imgout
            count += 1

    label = torch.LongTensor(valy)
    data = Variable(valx)
    print(count)
    print(label.shape)
    print(data.shape)
    # return
    return data, label
def main(arg_list=None):
    # Initialize
    parser = get_parser()
    if arg_list:  # Called from another script
        args = parser.parse_args(arg_list)
    else:  # Called from command line
        args = parser.parse_args()

    path_image = args.path_image
    path_model = args.path_model
    size_of_image = args.size_of_image if args.size_of_image else SIZE_OF_IMAGE
    path_json = args.path_json if args.path_json else PATH_JSON
    top_k = args.top_k if args.top_k else 1

    # Load and preprocess images
    X, file_names = preprocess.load_images(path_image,
                                           size_of_image=size_of_image)
    print(X)
    X = preprocess.preprocess(X, size_of_image=size_of_image)

    # Load trained model
    model = models.load_model(path_model)
    # model.summary()

    # Load label_name from .json
    with open(path_json, 'r') as f:
        label_name = json.load(f)

    # Make predictions
    predictions_int = model.predict(X)
    print(predictions_int)
    prediction_name = [
    ]  # Stores top-k predicted pokemon names of the input images
    for prediction in predictions_int:
        # Find indices with top-k highest values (numpy array: [highest, ..., lowest])
        top_k_index = np.argsort(prediction)[::-1][:top_k]
        prediction_name.append(
            [label_name[str(index.item())] for index in top_k_index])

    for file, prediction in zip(file_names, prediction_name):
        print("Below are the top {} likely Pokemon in {}".format(top_k, file))
        print(prediction)
Пример #60
0
    def proceed(self,
                frame,
                noDepth=False,
                noPose=False,
                noSkeleton=False,
                noTime=False):
        frameRGB, frameD = frame
        if self.frameNumber == 0:
            try:
                c.frameHeight, c.frameWidth, _ = frameRGB.shape
                c.depthHeight, c.depthWidth = frameD.shape
            except:
                pass

        datum = op.Datum()
        datum.cvInputData = frameRGB
        self.opWrapper.emplaceAndPop([datum])

        # array of people with keypoints is in datum.poseKeypoints
        # getSkeletons gives for every human skeleton image
        if not noSkeleton:
            frameRGB = datum.cvOutputData  # image is frame with drawn skeleton

        humans = preprocess(datum.poseKeypoints, frameD, noDepth)
        # convert frame to skeleton image
        poses = []
        if not noPose:
            poses = self.frame.proceedFrame(humans)
        for j, human in enumerate(humans):
            try:
                display.displayPose(
                    frameRGB, human,
                    str(poses[j][1]) + ": " + c.poses[np.argmax(poses[j][0])] +
                    f" - { int( np.max( poses[ j ][ 0 ] ) * 100 ) }%")
            except:
                pass
        if not noTime:
            display.displayFrameTime(frameRGB, time() - self.time)
            self.time = time()

        self.frameNumber = self.frameNumber + 1
        return frameRGB, poses, humans