def text_rank(data): """Applies text_rank algorithm on text passed as a parameter and returns a summary""" sentences = tokenize(data) l = len(sentences) clean_sentences = preprocess(sentences) sentence_vectors = vectorize(clean_sentences) # Computing Similarity Matrix s_mat = np.zeros([l, l]) for i in range(l): for j in range(l): if i != j: s_mat[i][j] = cosine_similarity( sentence_vectors[i].reshape(1, 50), sentence_vectors[j].reshape(1, 50))[0, 0] # Applying PageRank Algorithm - To Calculate Sentence Scores graph = nx.from_numpy_array(s_mat) sentence_scores = nx.pagerank(graph) ranked_sentences = sorted( ((sentence_scores[i], s) for i, s in enumerate(sentences)), reverse=True) summary = [] x = round(l * 0.2) for i in range(x): summary.append(ranked_sentences[i][1]) print(l) print(x) return clean_summary(summary)
def video_stream(self): ret, frame = self.cap.read() frame = cv2.flip(frame, 1) hand_img = preprocess(frame) probs_class_map, self.prediction = self.predict(hand_img) if len(self.current_word) != 0 and self.prediction == 'blank': self.blank_count += 1 if self.blank_count > 80: self.sentence += ' ' self.sentence += self.current_word self.current_word = '' for i in classes: self.count[i] = 0 self.blank_count = 0 elif self.prediction != 'blank': self.count[self.prediction] += 1 if self.count[self.prediction] > 50: self.current_word += self.prediction for i in classes: self.count[i] = 0 self.blank_count = 0 self.plot(probs_class_map) self.updateDepositLabel(self.prediction, self.current_word, self.sentence) cv2Img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGBA) img = Image.fromarray(cv2Img) imgtk = ImageTk.PhotoImage(image=img) self.video_label.imgtk = imgtk self.video_label.configure(image=imgtk) self.video_label.after(1, self.video_stream)
def step(self, action): next_state = [[] for empty in range(self.group_size)] reward_sum = np.zeros(self.batch_size) for i in range(self.group_size): outcomes = [env.step(act) for env, act in zip(self.env, action)] cols = [[], [], [], []] # next_state, reward, done, info for j in range(self.batch_size): one_step = outcomes[j] for col, value in zip(cols, one_step): col.append(value) cols = [np.array(col) for col in cols] cols[0] = np.array([preprocess(cols[0][k][:][:]) for k in range(self.batch_size)]) next_state[i].append(cols[0]) reward_sum += cols[1] # Now next_state has shape (group_size, 1, batch_size, 88, 80, 1) # So reshape to (group_size, batch_size, 88, 80, 1) # Split them, stack them to get (batch_size, 88, 80, group_size) next_state = np.reshape(next_state, [self.group_size, self.batch_size, 88, 80, 1]) split_states = [next_state[k] for k in range(self.group_size)] next_state = np.array(np.concatenate(split_states, axis=-1)) return next_state, reward_sum, cols[2], cols[3]
def main(neval=30, nfolds=5, ncvjobs=1, nreps=5, kbest=None, ngram_hi=3, jobs=1, seed=1, *event_sel): print(locals()) #return #event_sel=[70, 71] #nreps=1 df = read_train() X, y = preprocess(df, event_sel=event_sel, ngrams=(2, ngram_hi)) best = evaluate_hyper( X, y, hyper_objective, neval=neval, nfolds=nfolds, ncvjobs=ncvjobs, nreps=nreps, nbest=kbest, njobs=jobs, seed=seed, ) print('Final best: {}'.format(best)) return
def FinalIndexDoc2Vec(final_query, desc): desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10] sentences = [] for item_no, line in enumerate(desc): sentences.append(LabeledSentence(line, [item_no])) dm = 1 size = 300 context_window = 50 seed = 42 min_count = 1 alpha = 0.5 max_iter = 200 model = gensim.models.doc2vec.Doc2Vec(documents=sentences, dm=dm, alpha=alpha, seed=seed, min_count=min_count, max_vocab_size=None, window=context_window, size=size, sample=1e-4, negative=5, iter=max_iter) tokens = final_query.split() new_vector = model.infer_vector(tokens) sims = model.docvecs.most_similar([new_vector], topn=10) refined = [i[0] for i in sims if i[1] > 0] return refined
def on_status(self, status): tweet = str(status.text).lower() if any(i in tweet for i in targetWords): preprocessedtweet = preprocess(tweet) predictedlabel = predictInterest([preprocessedtweet], NBClassifier, bestCount_Vectorizer) print(tweet) print(label[predictedlabel[0]])
def predict(image): pp_image = preprocess(image) pp_image = pp_image.reshape(-1, 45, 45, 1) prediction = model.predict(pp_image) cls = classes[str(np.argmax(prediction))] return cls
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.linear_model import Perceptron clf = Perceptron(max_iter=50, tol=1e-3, random_state=1) return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors=10) return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.linear_model import RidgeClassifier clf = RidgeClassifier(tol=1e-2, solver="sag", random_state=1) return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.neighbors import NearestCentroid clf = NearestCentroid() return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.naive_bayes import BernoulliNB, MultinomialNB clf = BernoulliNB(alpha=.01) return benchmark(clf, X_train, y_train)
def reset_all(self): """ Returns a stack of 4 copies of the original reset state for each runner: return shape is (batch_size, 88, 80, group_size)""" reset_env = [env.reset() for env in self.env] # (64, 210, 163, 3) reset_env = np.array([preprocess(reset_env[i]) for i in range(self.batch_size)]) # (64, 88, 80, 1) reset_env_stack = [reset_env for k in range(self.group_size)] # (4, 64, 88, 80, 1) reset_env = np.concatenate(reset_env_stack, axis=-1) # (64, 88, 80, 4) return reset_env
def preprocess_search(keyword): corpus = df.columns keyword = preprocess(keyword) keyword = make_bigrams(keyword) search_words = list() for word in keyword: if '*' in word: search_words.extend(fnmatch.filter(corpus, word)) else: search_words.extend(difflib.get_close_matches(word, corpus)) return search_words
def FinalIndexJaccard(final_query, desc): desc = [preprocess(i) for i in desc if i != '' and len(i.split()) > 10] list_indx = [] for indx, i in enumerate(desc): dict_indx = {} dict_indx['index'] = indx dict_indx['similarity'] = 1 - distance.jaccard(final_query, i) if dict_indx['similarity'] > .5: list_indx.append(dict_indx) refined = sorted(range(len(list_indx)), key=lambda index: list_indx[index]['similarity'], reverse=True)[:10] return refined
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=event_sel) # X_train, y_train = preprocess(df, event_sel=[31, 78]) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11]) X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) from lightgbm import LGBMClassifier clf = LGBMClassifier(verbose=1, random_state=1, silent=0, n_estimators=400) return benchmark(clf, X_train.astype(float), y_train)
def predict(input_path, output_path, resources_path): #The prediction works by uploading first of all the vocabularies used in training phase vocab = dict() vocabu = dict() with open('../resources/vocab.pkl', 'rb') as f: vocab = pickle.load(f) with open('../resources/vocabu.pkl', 'rb') as f: vocabu = pickle.load(f) #we pass the whole test dataset through the preprocessing phase and save the gold data in a file (this in case the input file has whitespaces, in which case a gold data set can be retrieved, otherwise # it will just generate a random variable file that we can just ignore) TInput, TLabel, TFullline, TFulllabelline, lens, senlens = preprocess( input_path) X_testu, X_testb, Y_test, vocabt, vocabut = buildvector( TInput, TLabel, TFullline, TFulllabelline, vocab, vocabu, lens) savegoldtofile(TLabel) #initialize the model and upload the weights and configuration from the file model = create_keras_model((len(vocab) + 1), (len(vocabu) + 1), 256) model = load_model(resources_path) #proceed with the prediction. We feed the X vectors to the predict function and get back a vector with one hot encoding. We reverese the encoding through the argmax function, retrieve the data in numerical form #and proceed by assigning the label corresponding the the value #in the end we remove the padding that we added through the preprocessing and save the file prediction = model.predict([X_testu, X_testb]) text_file = open(output_path, "w+") sen = [] count = 0 for row in prediction: line = [] for element in row: val = np.argmax(element) if val == 0: line.extend("B") elif val == 1: line.extend("E") elif val == 2: line.extend("I") else: line.extend("S") linez = ''.join(line) linez2 = linez[:senlens[count]] sen.append(linez2) text_file.write(linez2 + '\n') count += 1 text_file.close()
def QueryProcess(query): data = Data(path) cleaned_query = preprocess(query) txtn = nlp(cleaned_query) txtp = nlp(query) np = [np.text for np in txtn.noun_chunks] ner = [ent.text for ent in txtp.ents] tokens = cleaned_query.split() keywords = [ token.text for token in txtn if token.pos_ == 'VERB' or token.pos_ == 'ADJ' or token.pos_ == 'NOUN' or token.pos_ == 'PROPN' ] synonyms = list( set([ l.name() for i in keywords for syn in wordnet.synsets(i) for l in syn.lemmas() ])) synonyms = [i for i in synonyms if '_' not in i] new_text = tokens + synonyms + np + [i.lower() for i in ner] + [query] final_query = ' '.join(new_text) cleaned_data = [''.join(preprocess(i) + i) for i in data] doc = [final_query] + cleaned_data indx = indexLSA(doc) # or indexTFIDF(doc) return final_query, indx, data
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) X_train, y_train = preprocess(df, event_sel=event_sel) from sklearn.linear_model import SGDClassifier clf = SGDClassifier(alpha=.0001, max_iter=50, tol=1e-3, penalty='l2', random_state=1) return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() X_train, y_train = preprocess(df, event_sel=event_sel) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11]) # X_train, y_train = preprocess(df, event_sel=[62, 63, 60]) from sklearn.svm import LinearSVC clf = LinearSVC(loss='squared_hinge', penalty='l2', dual=False, tol=1e-3, verbose=0, random_state=1) return benchmark(clf, X_train, y_train)
def main(event_sel=None): df = read_train() # X_train, y_train = preprocess(df, event_sel=event_sel) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42]) # X_train, y_train = preprocess(df, event_sel=[71, 62, 42, 55, 11]) X_train, y_train = preprocess(df, event_sel=[62, 63, 60], ngrams=(2,4)) print('Extracting best features by a chi-squared test') from sklearn.feature_selection import SelectKBest, chi2 ch2 = SelectKBest(chi2, k=12000) X_train = ch2.fit_transform(X_train, y_train) print('Extracting done, {}'.format(X_train.shape)) from sklearn.svm import LinearSVC clf = LinearSVC(loss='squared_hinge', penalty='l2', dual=False, tol=1e-3, verbose=0, random_state=1) return benchmark(clf, X_train, y_train)
def summarizeDocuments(documents, stopwords, useTfIdfSimilarity, useSentimentSimilarity, useWordModel, usePageRank, useAggregateClustering, length, anaphoraResolution, alphaValueForPagerank, alphaValueForMMR, debugMode): documents = preprocess(documents, femaleNamesFileName, maleNamesFileName, anaphoraResolution, debugMode) sentenceSimilarities = getTfIdfValues(documents, stopwords) matrices = list() flat_sentences = [ sentence for document in documents for sentence in document ] if useSentimentSimilarity: positiveWords = getPositiveWords(positiveSentimentFileName) negativeWords = getnegativeWords(negativeSentimentFileName) (pos, neg) = analyzeSentiment(flat_sentences, positiveWords, negativeWords, debugMode) matrices.append(pos) matrices.append(neg) if useWordModel: word2Vec = getWordToVectorMatrix(flat_sentences, debugMode) matrices.append(word2Vec) if useTfIdfSimilarity or len(matrices) == 0: matrices.append(sentenceSimilarities["tfidf_cosine"]) # USE THE MATRICES LIST TO COMBINE THEM BY MULTIPLICATION ACCORDING TO SELECTION aggregateSimilartyMatrix = calcAggregateSimMatrix(matrices) # DETERMINE WHETHER PAGERANK OR CLUSTERING ALGORITHM if (usePageRank): output = usePageRankImplementation(documents, aggregateSimilartyMatrix, length, alphaValueForPagerank, alphaValueForMMR, debugMode) else: output = useClusteringAlgorithm(documents, aggregateSimilartyMatrix, length, sentenceSimilarities, useAggregateClustering) return output
def main(): init_time = int(time()) parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter,usage=splash.replace(" ","",1)+__useage___,add_help=False) inArgs,genoArgs,optArgs = parser.add_argument_group('input arguments'),parser.add_argument_group('genotype arguments'),parser.add_argument_group('optional arguments') inArgs.add_argument('-i','-bam',type=str,default=None,nargs='*') inArgs.add_argument('-b','-bed',type=str,default=None,nargs='*') inArgs.add_argument('-v','-vcf',type=str,default=None,nargs='*') inArgs.add_argument('-snv',type=str,default=None,nargs='*') inArgs.add_argument('-p','-ped',type=str,default=None,nargs='*') genoArgs.add_argument('-g','-genome',required=False,default='hg19',type=str) genoArgs.add_argument('-pcrfree',required=False,default=False,action="store_true") genoArgs.add_argument('-M',default=False,required=False,action="store_true") genoArgs.add_argument('-pre',required=False,default=None) genoArgs.add_argument('-feats',required=False,default=None) optArgs.add_argument('-L','-log',default=None,required=False) optArgs.add_argument('-T','-tmp-dir',default=os.getcwd()+'/sv2_tmp_'+rand_id(),required=False) optArgs.add_argument('-s','-seed',required=False,default=42,type=int) optArgs.add_argument('-o','-out',required=False,default="sv2_training_features",type=str) optArgs.add_argument('-O','-odir',required=False,default=os.getcwd(),type=str) optArgs.add_argument('-h','-help',required=False,action="store_true",default=False) args = parser.parse_args() bams,bed,vcf,snv,ped = args.i,args.b,args.v,args.snv,args.p gen,pcrfree,legacy_m,predir,featsdir= args.g,args.pcrfree,args.M,args.pre,args.feats logfh, tmp_dir, seed, ofh, odir = args.L,args.T,args.s,args.o,args.O _help = args.h if (_help==True or len(sys.argv)==1): print splash+__useage___ sys.exit(0) if logfh!=None: lfh = open(logfh,'w') sys.stderr=lfh preprocess_files,feats_files={},{} gens = ['hg19','hg38','mm10'] olog = logfh if olog == None: olog = 'STDOUT' print 'sv2 version:{} report bugs to <dantaki at ucsd dot edu> error messages located in {}'.format(__version__,olog) Confs=Config() if bams==None and predir==None and featsdir==None: print 'FATAL ERROR: No BAM file specified <-i, -bam FILE ...>' sys.stderr.write('FATAL ERROR: No BAM file specified <-i, -bam FILE ...>\n') sys.exit(1) if snv==None and predir==None and featsdir==None: print 'FATAL ERROR: No SNV VCF file specified <-snv FILE ...>' sys.stderr.write('FATAL ERROR: No SNV VCF file specified <-snv FILE ...>\n') sys.exit(1) if ped==None: print 'FATAL ERROR: No PED file specified <-p, -ped FILE ...>' sys.stderr.write('FATAL ERROR: No PED file specified <-p, -ped FILE ...>\n') sys.exit(1) if bed==None and vcf==None: print 'FATAL ERROR: No SVs provided <-b, -bed BED ...> <-v,-vcf VCF ...>' sys.stderr.write('FATAL ERROR: No SVs provided <-b, -bed BED ...> <-v,-vcf VCF ...>\n') sys.exit(1) if gen not in gens: print 'FATAL ERROR -g must be hg19 or hg38. NOT {}'.format(gen) sys.stderr.write('FATAL ERROR -g must be hg19 or hg38. NOT {}\n'.format(gen)) sys.exit(1) Peds=ped_init(ped) if bams!=None: Bams=bam_init(bams,Peds,snv_init(snv),gen) SV = sv_init(bed,vcf,gen) ofh = ofh.replace('.vcf','').replace('.out','').replace('.txt','') make_dir(tmp_dir) tmp_dir=slash_check(tmp_dir) if not odir.endswith('/'): odir = odir+'/' make_dir(odir) """ PREPROCESSING """ if predir == None: outdir = odir+'sv2_preprocessing/' make_dir(outdir) for bam in Bams: preofh = outdir+bam.id+'_sv2_preprocessing.txt' preprocess_files[bam.id]=preofh preprocess(bam,preofh,seed,gen,tmp_dir) else: predir=slash_check(predir) for fh in glob(predir+'*sv2_preprocessing.txt'): f = open(fh) if sum(1 for l in open(fh)) <= 1: continue else: preids=[] for l in f: if l.startswith('#'):continue preids.append(l.rstrip('\n').split('\t').pop(0)) f.close() for iid in set(preids): if iid in Peds.ids : preprocess_files[iid]=fh report_time(init_time,'PREPROCESSING COMPLETE') """" FEATURE EXTRACTION """ if featsdir == None: outdir = odir+'sv2_features/' make_dir(outdir) for bam in Bams: if preprocess_files.get(bam.id) == None: sys.stderr.write('WARNING: BAM sample id {} not found in preprocessing files. Skipping ...\n'.format(bam.id)) continue prefh = preprocess_files[bam.id] featfh = outdir+bam.id+'_sv2_features.txt' feats_files[bam.id]=featfh extract_feats(bam,SV.raw,prefh,featfh,gen,pcrfree,legacy_m,Confs,tmp_dir) else: featsdir=slash_check(featsdir) for fh in glob(featsdir+'*sv2_features.txt'): f = open(fh) if sum(1 for l in open(fh)) <= 1: continue else: featsid=[] for l in f: if l.startswith('#'):continue featsid.append(l.rstrip('\n').split('\t').pop(5)) f.close() for iid in set(featsid): if iid in Peds.ids : feats_files[iid]=fh feats=[] train_dir = odir+'sv2_training_features/' make_dir(train_dir) for iid in feats_files: with open(feats_files[iid]) as f: for l in f: feats.append(tuple(l.rstrip('\n').split('\t'))) sv2_train_output(feats,Peds,gen,train_dir+ofh) shutil.rmtree(tmp_dir) lfh.close() report_time(init_time,'FEATURE EXTRACTION COMPLETE')
def train(args): #数据预处理,生成vocab和data preprocess(args['cap_path'], args['vocab_path'], args['data_path']) if not os.path.exists(args['model_path']): os.mkdir(args['model_path']) #对图片进行处理,进行数据增强 transform = transforms.Compose([ transforms.Resize((args['resize'], args['resize'])), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) with open(args['vocab_path'], 'rb') as f: vocab = pickle.load(f) with open(args['data_path'], 'rb') as f: Data = pickle.load(f) data_loader = get_loader(args['train_img_path'], Data, vocab, transform, args['batch_size'], shuffle=True, num_workers=args['num_workers']) encoder = Encoder(args['embed_size'], args['pooling_kernel']).cuda() decoder = Decoder(args['embed_size'], args['hidden_size'], len(vocab), args['num_layers']).cuda() criterion = nn.CrossEntropyLoss().cuda() params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.bn.parameters()) optimizer = torch.optim.Adam(params, lr=args['learning_rate']) total_step = len(data_loader) for epoch in range(args['num_epochs']): for i, (images, captions, lengths) in enumerate(data_loader): images = images.cuda() captions = captions.cuda() targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) loss = criterion(outputs, targets) decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() #打印训练信息 if i % args['log_step'] == 0: print( 'Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Perplexity: {:5.4f}' .format(epoch, args['num_epochs'], i, total_step, loss.item(), np.exp(loss.item()))) #保存模型 if (i + 1) % args['save_step'] == 0: torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) #每个epoch结束也保存一次模型 torch.save( decoder.state_dict(), os.path.join(args['model_path'], 'decoder-{}-{}.ckpt'.format(epoch + 1, i + 1))) torch.save( encoder.state_dict(), os.path.join(args['model_path'], 'encoder-{}-{}.ckpt'.format(epoch + 1, i + 1)))
def detectPlatesInScene(imgOriginalScene, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel, PlateWidthPaddingFactor, PlateHeightPaddingFactor, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea, MaxDiagSizeMultipleAway, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars, MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, debugMode): """ License Plate Detection in a given input image scene, using geometrical analysis techniques """ # Pre-processing (CSC --> contrast --> blur --> threshold): imgGrayscaleScene, imgThreshScene = preprocess(imgOriginalScene, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel) # Find all possible characters in the scene (finds all contours that could be characters, w/o OCR yet): listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea, debugMode) # Given a list of all possible chars, find groups of matching characters (later on, each group will attempt to be recognized as a plate): listOfListsOfMatchingCharsInScene = findListOfListsOfMatchingChars(listOfPossibleCharsInScene, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars, MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, MaxDiagSizeMultipleAway) # For each group of matching chars, attempt to extract plate: listOfPossiblePlates = [] for listOfMatchingChars in listOfListsOfMatchingCharsInScene: possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars, PlateWidthPaddingFactor, PlateHeightPaddingFactor) # Add plate to list of possible plates (if found): if possiblePlate.imgPlate is not None: listOfPossiblePlates.append(possiblePlate) info("%d possible plates found" % len(listOfPossiblePlates)) # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. if debugMode: height, width, _ = imgOriginalScene.shape # Original image: imwrite("img_original.jpg", imgOriginalScene) # Pre-processing images: imwrite("img_gray.jpg", imgGrayscaleScene) imwrite("img_threshold.jpg", imgThreshScene) # Possible characters in image: imgContours = zeros((height, width, 3), uint8) contours = [] for possibleChar in listOfPossibleCharsInScene: contours.append(possibleChar.contour) drawContours(imgContours, contours, -1, Colors.white) imwrite("img_contours_possible_chars.jpg", imgContours) # Matching characters: imgContours = zeros((height, width, 3), uint8) for listOfMatchingChars in listOfListsOfMatchingCharsInScene: intRandomBlue = randint(0, 255) intRandomGreen = randint(0, 255) intRandomRed = randint(0, 255) contours = [] for matchingChar in listOfMatchingChars: contours.append(matchingChar.contour) drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed)) imwrite("img_contours_matching_chars.jpg", imgContours) # Possible license-plates: for i in range(0, len(listOfPossiblePlates)): p2fRectPoints = boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene) line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Colors.red, 2) line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Colors.red, 2) line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Colors.red, 2) line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Colors.red, 2) imwrite("img_contours_possible_plates_%d.jpg" % i, imgContours) imwrite("img_plate_%d.jpg" % i, listOfPossiblePlates[i].imgPlate) debug("Plate detection complete", True) return listOfPossiblePlates
ap = argparse.ArgumentParser(add_help=False) ap.add_argument('-c', '--content', required=True) ap.add_argument('-s', '--style', required=True) ap.add_argument('-a', '--alpha', default=1e-3) ap.add_argument('-b', '--beta', default=1.0) ap.add_argument('-e', '--steps', default=300) ap.add_argument('-h', '--img_h', default=512) ap.add_argument('-w', '--img_w', default=512) ap.add_argument('-o', '--output', default='./outputs/') ap.add_argument('-d', '--display', default=False) ap.add_argument('-n', '--name', required=True) args = vars(ap.parse_args()) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') content, style = preprocess(args['content'], args['style'], args['img_h'], args['img_w']) content = content.to(device) style = style.to(device) neural_style = Neural_Style(content, style) neural_style.to(device) steps = int(args['steps']) LBFGS = torch.optim.LBFGS([neural_style.target]) alpha = float(args['alpha']) beta = float(args['beta']) i = 0 while i <= steps:
def reset_one(self, i): reset_state = self.env[i].reset() reset_state = preprocess(reset_state) reset_state = np.array(np.concatenate([reset_state for k in range(self.group_size)], axis=-1)) return reset_state
for image_name in files: start_time = time.time() _, image = os.path.split(image_name) image_name_base = os.path.splitext(image)[0] output_image_directory = os.path.abspath( os.path.join(output_directory, image_name_base)) remove_directory(output_image_directory) ensure_directory(output_image_directory) # Preprocess image print("") print("Processing " + image) preprocessed_image = preprocess(image_name, output_image_directory, runmode=runmode) print("Finished preprocessing " + image) print(" **** ") # Segment preprocessed image print("Segmenting " + image) words_li_li = segment(preprocessed_image, output_image_directory, runmode=runmode) print("Finished segmenting " + image) print(" **** ") # Classify segmented image
def detectCharsInPlates(listOfPossiblePlates, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea, MinDiagSizeMultipleAway, MaxDiagSizeMultipleAway, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars, MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, ResizedCharImageWidth, ResizedCharImageHeight, kNearest, DebugMode): """ Detect characters in the pre-detected plate (OCR analysis, over KNN engine) """ # Early break condition (empty input): if len(listOfPossiblePlates) == 0: return listOfPossiblePlates # For each possible plate --> preprocess, find all characters, try to group them, remove overlaps and perform OCR: intPlateCounter = 0 longestListOfMatchingCharsInPlate = [] for possiblePlate in listOfPossiblePlates: # Pre-processing (CSC --> contrast --> blur --> threshold): possiblePlate.imgGrayscale, imgThreshScene = preprocess(possiblePlate.imgPlate, PreprocessGaussKernel, PreprocessThreshBlockSize, PreprocessThreshweight, PreprocessMorphKernel) # Increase size of plate image for easier viewing and char detection possiblePlate.imgThresh = resize(imgThreshScene, (0, 0), fx=1.6, fy=1.6) # Threshold again to eliminate any gray areas: _, possiblePlate.imgThresh = threshold(possiblePlate.imgThresh, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU) # Find all possible chars in the plate (finds all contours that could be chars): listOfPossibleCharsInPlate = findPossibleCharsInPlate(possiblePlate.imgThresh, MinPixelWidth, MaxPixelWidth, MinPixelHeight, MaxPixelHeight, MinAspectRatio, MaxAspectRatio, MinPixelArea, MaxPixelArea) # Given a list of all possible chars, find groups of matching chars within the plate: listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(listOfPossibleCharsInPlate, MinNumberOfMatchingChars, MaxNumberOfMatchingChars, MinAngleBetweenChars, MaxAngleBetweenChars, MinChangeInArea, MaxChangeInArea, MinChangeInWidth, MaxChangeInWidth, MinChangeInHeight, MaxChangeInHeight, MaxDiagSizeMultipleAway) # If groups of matching chars were found in the plate: if len(listOfListsOfMatchingCharsInPlate) > 0: # Within each list of matching chars, sort chars from left to right and remove inner overlapping chars: for i in range(0, len(listOfListsOfMatchingCharsInPlate)): listOfListsOfMatchingCharsInPlate[i].sort(key=lambda tmpMatchingChar: tmpMatchingChar.intCenterX) listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(listOfListsOfMatchingCharsInPlate[i], MinDiagSizeMultipleAway) # Within each possible plate, loop through all the vectors of matching chars, get the index of the one with the most chars: intLenOfLongestListOfChars = 0 intIndexOfLongestListOfChars = 0 for i in range(0, len(listOfListsOfMatchingCharsInPlate)): if len(listOfListsOfMatchingCharsInPlate[i]) > intLenOfLongestListOfChars: intLenOfLongestListOfChars = len(listOfListsOfMatchingCharsInPlate[i]) intIndexOfLongestListOfChars = i # Suppose that the longest list of matching chars within the plate is the actual list of chars: longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[intIndexOfLongestListOfChars] # Characters recognition (OCR): possiblePlate.strChars = recognizeCharsInPlate(possiblePlate.imgThresh, longestListOfMatchingCharsInPlate, ResizedCharImageWidth, ResizedCharImageHeight, kNearest, intPlateCounter, DebugMode) # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. if DebugMode: height, width, _ = possiblePlate.imgPlate.shape contours1 = []; imgContours1 = zeros((height, width, 3), uint8) contours2 = []; imgContours2 = zeros((height, width, 3), uint8) contours3 = []; imgContours3 = zeros((height, width, 3), uint8) contours4 = []; imgContours4 = zeros((height, width, 3), uint8) for possibleChar in listOfPossibleCharsInPlate: contours1.append(possibleChar.contour) drawContours(imgContours1, contours1, -1, Colors.white) if len(listOfListsOfMatchingCharsInPlate) > 0: for listOfMatchingChars in listOfListsOfMatchingCharsInPlate: intRandomBlue = randint(0, 255) intRandomGreen = randint(0, 255) intRandomRed = randint(0, 255) for matchingChar in listOfMatchingChars: contours2.append(matchingChar.contour) drawContours(imgContours2, contours2, -1, (intRandomBlue, intRandomGreen, intRandomRed)) for listOfMatchingChars in listOfListsOfMatchingCharsInPlate: intRandomBlue = randint(0, 255) intRandomGreen = randint(0, 255) intRandomRed = randint(0, 255) for matchingChar in listOfMatchingChars: contours3.append(matchingChar.contour) drawContours(imgContours3, contours3, -1, (intRandomBlue, intRandomGreen, intRandomRed)) for matchingChar in longestListOfMatchingCharsInPlate: contours4.append(matchingChar.contour) drawContours(imgContours4, contours4, -1, Colors.white) imwrite("img_possible_plate_%d.jpg" % intPlateCounter, possiblePlate.imgPlate) imwrite("img_possible_plate_gray_%d.jpg" % intPlateCounter, possiblePlate.imgGrayscale) imwrite("img_possible_plate_threshold_scene_%d.jpg" % intPlateCounter, imgThreshScene) imwrite("img_possible_plate_threshold_%d.jpg" % intPlateCounter, possiblePlate.imgThresh) imwrite("img_possible_plate_contours1_%d.jpg" % intPlateCounter, imgContours1) if len(listOfListsOfMatchingCharsInPlate) > 0: imwrite("img_possible_plate_contours2_%d.jpg" % intPlateCounter, imgContours2) imwrite("img_possible_plate_contours3_%d.jpg" % intPlateCounter, imgContours3) imwrite("img_possible_plate_contours4_%d.jpg" % intPlateCounter, imgContours4) if len(listOfListsOfMatchingCharsInPlate) > 0: debug("Characters found in plate number #%d = %s" % (intPlateCounter, possiblePlate.strChars), True) intPlateCounter = intPlateCounter + 1 else: debug("Characters found in plate number #%d = (none)" % intPlateCounter, True) intPlateCounter = intPlateCounter + 1 # -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. -- .. # If no groups of matching chars were found in the plate, continue for next plate candidate: if len(listOfListsOfMatchingCharsInPlate) == 0: possiblePlate.strChars = "" continue if DebugMode: debug("Characters detection complete", True) return listOfPossiblePlates
def preprocess(self): """ Preprocessing phase to execute boundary finding, reordering and equation calculation sequentially. """ self._preprocessRan = True self._node_reorder2, self._reorder_E, self._L_inv, self._U_inv, self._L_k_inv, self._U_k_inv, self._boundary_start_number, self._index_start, self._T1, self._T2 = preprocess( self._partition_list, self._nparts, self._n, self._E)