def progress_preprocess(infile, outfile, defines): if debug > 1: print " preprocess %s %s %s" % (defines, infile, outfile) else: echo(".") if not isinstance(infile, (list)): infile = [infile] preprocess_outfile = open(outfile, "w") preprocess(includes=infile, defines=defines, output=preprocess_outfile, line_endings="lf") preprocess_outfile.close()
def feature_transformer2(data,emo, lexicon_feat, embed_feat): stopWords = stopwords.words('english') if type(data) == dict: data = [preprocess(data['text']).encode('utf-8')] elif type(data) == list: data = [preprocess(d['text']).encode('utf-8') for d in data] ngram_feat = CountVectorizer(ngram_range=(1,3), analyzer='word', binary=False, stop_words=stopWords, min_df=0.01, vocabulary=pickle.load(open("../vocab/vocab."+emo+".pkl", "rb"))) all_features = FeatureUnion([('lexicon_feature', lexicon_feat), ('embeddings', embed_feat), ('ngrams', ngram_feat)]) pipeline = Pipeline([('all_feature', all_features)]) feat = pipeline.fit_transform(data) return feat
def feature_transformer2(data, emo, lexicon_feat, embed_feat): stopWords = stopwords.words('english') if type(data) == dict: data = [preprocess(data['text']).encode('utf-8')] elif type(data) == list: data = [preprocess(d['text']).encode('utf-8') for d in data] ngram_feat = CountVectorizer(ngram_range=(1, 3), analyzer='word', binary=False, stop_words=stopWords, min_df=0.01, vocabulary=pickle.load(open("../vocab/vocab."+emo+".pkl", "rb"))) all_features = FeatureUnion([('lexicon_feature', lexicon_feat), ('embeddings', embed_feat), ('ngrams', ngram_feat)]) pipeline = Pipeline([('all_feature', all_features)]) feat = pipeline.fit_transform(data) return feat
def progress_preprocess(infile, outfile, defines): if debug > 1: print ' preprocess %s %s %s' % (defines, infile, outfile) else: echo('.') if not isinstance(infile, (list)): infile = [infile] preprocess_outfile = open(outfile, 'w') preprocess( includes = infile, defines = defines, output = preprocess_outfile, line_endings = 'lf', ) preprocess_outfile.close()
def build_data(self): self.imgs = np.zeros((self.n, self.img_h, self.img_w)) self.texts = [] for i, (img_filepath, text) in enumerate(self.samples): img = preprocess(img_filepath, self.img_w, self.img_h) self.imgs[i, :, :] = img self.texts.append(text)
def getNext(self): #load a new image batchRange = range(self.currIdx, self.currIdx + self.batchSize) gtTexts = [self.samples[i].gtText for i in batchRange] imgs = [ preprocess( cv2.imread(self.samples[i].filePath, cv2.IMREAD_GRAYSCALE), self.imgSize, self.dataAugmentation) for i in batchRange ] self.currIdx += self.batchSize return Batch(gtTexts, imgs)
def predictTest(model, fnImg, charErrorRate, wordAccuracy): img = preprocess(cv2.imread(fnImg, cv2.IMREAD_GRAYSCALE), Model.imgSize) # preprocess the image batch = Batch(None, [img]) # create a batch object (recognized, probability) = model.makePrediction(batch, True) print('Predicted word:', '"' + recognized[0] + '"') now = datetime.now().time() current_time = now.strftime("%H:%M:%S") text_file = open("../predictions.txt", "a") message = current_time + " >>> " + "prediction : " + str( recognized[0]) + " | model accuracy : " + str(wordAccuracy) + '\n' print(message, file=text_file) text_file.close() displayPlot(charErrorRate, wordAccuracy, recognized, probability, img)
def predict_image(model_predict, path, is_word): if is_word: width = word_cfg['img_w'] else: width = line_cfg['img_w'] img = preprocess(path, width, 64) img = img.T if K.image_data_format() == 'channels_first': img = np.expand_dims(img, 0) else: img = np.expand_dims(img, -1) img = np.expand_dims(img, 0) net_out_value = model_predict.predict(img) pred_texts = decode_label(net_out_value) return pred_texts
import TenCrossValidation as tcv from Preprocessor import preprocess import sys file = sys.argv[1] data = preprocess(file) result = tcv.validation(data)
def load_images_from_folder(folder): images = [] for filename in os.listdir(folder): img = cv2.imread(os.path.join(folder, filename)) if img is not None: images.append(img) return images vals = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ] currdir = os.getcwd() #for dir in vals: os.chdir(currdir + '\\images_off_jetson') images = load_images_from_folder(currdir + '\\images_off_jetson') k = 0 os.chdir(currdir + '\\preprocessed') #os.mkdir(os.getcwd() + '\\' +dir) #os.chdir(os.getcwd() + '\\' +dir) neg = np.zeros([28, 28, 3]) for i in images: img = preprocess(i) cv2.imwrite(str(k) + '.jpg', img) k = k + 1