def main(): opts = get_parser().parse_args() n = opts.n checker = SpellChecker(Aligner(opts.sigma, opts.bayes)) # record top 1, 2, 3 accuracy top1=top2=top3 = 0 allpairs = 0 data = pd.read_csv('data/testdata.txt', sep=" ", header=None) data.columns = ["correct", "mis"] subdata = data.sample(n = n) for index, row in tqdm(subdata.iterrows()): allpairs += 1 fs = checker.give_suggestions(row["mis"], opts.topk) if fs is not None: resultlist = [row[0] for row in fs] if row["correct"].strip() == resultlist[0]: top1 += 1 top2 += 1 top3 += 1 elif len(resultlist) >= 2 and row["correct"].strip() == resultlist[1] : top2 += 1 top3 += 1 elif len(resultlist) == 3 and row["correct"].strip() == resultlist[2] : top3 += 1 print("Top 1 precision: ", top1 / allpairs) print("Top 2 precision: ", top2 / allpairs) print("Top 3 precision: ", top3 / allpairs) print("-----------------------------------------")
def read_file(self, file): if file not in self.data: pair = [] data = [] for item in super().read_file(file): assert len(item) >= 2 src, trg, rest = item[0], item[1], item[2:] pair.append(([BOS] + src + [EOS], [BOS] + trg + [EOS])) data.append(rest) align = Aligner(pair, align_symbol=ALIGN) assert len(pair) == len(data) == len(align.alignedpairs) for idx in range(len(pair)): action = self.gen_act(*align.alignedpairs[idx]) step_cnt = sum([int(x == STEP) for x in action]) assert step_cnt + 1 == len( pair[idx][0]), "step cnt {}\n{}\n{}\n{}".format( step_cnt, pair[idx], action, align.alignedpairs[idx]) data[idx] = tuple([pair[idx][0], action, *data[idx]]) self.data[file] = data yield from self.data[file]
set(vowelswaps(word)) & real_words or # vowels "weke" => "wake" set(variants(word)) & real_words or # other "nonster" => "monster" set(both(word)) & real_words or # both "CUNsperrICY" => "conspiracy" set(double_variants(word)) & real_words or # other "nmnster" => "manster" {}) else: return ({word} & real_words or (set(reductions(word)) | set(vowelswaps(word)) | set(variants(word)) | set(both(word)) | set(double_variants(word))) & real_words or {}) def give_suggestions(self, word: str, topk=3): sug = self.suggestions(word) if sug: return self.aligner.final_suggestions(word, sug, topk=topk) else: return None if __name__ == "__main__": from align import Aligner sc = SpellChecker(Aligner()) while True: word = str(input('>')) fs = sc.give_suggestions(word, topk=10) print(fs)
default=1, type=int, metavar='n', dest='n_jobs', help='Set num threads to use (default: 1)') args = parser.parse_args() if len(sys.argv) == 1: parser.print_help() sys.exit(1) # Determine ltfs to process. if not args.scpf is None: with open(args.scpf, 'r') as f: args.ltfs = [l.strip() for l in f.readlines()] # Initialize chunker, aligner, and encoder. chunker = BILOUChunkEncoder() aligner = Aligner() encf = os.path.join(args.model_dir, 'tagger.enc') with open(encf, 'r') as f: enc = cPickle.load(f) # Perform tagging in parallel, dumping results to args.tagged_dir. n_jobs = min(len(args.ltfs), args.n_jobs) modelf = os.path.join(args.model_dir, 'tagger.crf') f = delayed(tag_file) Parallel(n_jobs=n_jobs, verbose=0)( f(ltf, aligner, enc, chunker, modelf, args.tagged_dir, args.ext) for ltf in args.ltfs)
jobs = [] for source_document in [ d for d in os.listdir(directory) if d.endswith('.' + source_suffix) ]: source_document = os.path.join(directory, source_document) target_document = source_document[:-len(source_suffix)] + target_suffix translation_document = source_document[:-len(source_suffix )] + translation_suffix # Sanity checks for f in source_document, target_document, translation_document: if not os.path.isfile(f): sys.stderr.write( 'ERROR: File {0} expected, but not found\n'.format(f)) exit() jobs.append((source_document, target_document, translation_document)) for (source_document, target_document, translation_document) in jobs: options['srcfile'] = source_document options['targetfile'] = target_document options['srctotarget'] = [translation_document] options['output-src'] = source_document + '.aligned' options['output-target'] = target_document + '.aligned' a = Aligner(options) a.mainloop()
from align import Aligner if __name__ == "__main__": aligner = Aligner() pairs = aligner.utils.get_pairs() outputs = [] for pair in pairs: n, ptr = aligner.align(pair[0], pair[1]) alignments = aligner.utils.reconstruct_ptr(pair, ptr) output = (alignments[0], alignments[1], n) outputs.append(output) aligner.utils.write_output(outputs)
options['verbosity'] = 1 options['printempty'] = False options['output'] = None jobs = [] for source_document in [d for d in os.listdir(directory) if d.endswith('.' + source_suffix)]: source_document = os.path.join(directory, source_document) target_document = source_document[:-len(source_suffix)] + target_suffix translation_document = source_document[:-len(source_suffix)] + translation_suffix # Sanity checks for f in source_document, target_document, translation_document: if not os.path.isfile(f): sys.stderr.write('ERROR: File {0} expected, but not found\n'.format(f)) exit() jobs.append((source_document, target_document, translation_document)) for (source_document,target_document,translation_document) in jobs: options['srcfile'] = source_document options['targetfile'] = target_document options['srctotarget'] = [translation_document] options['output-src'] = source_document + '.aligned' options['output-target'] = target_document + '.aligned' a = Aligner(options) a.mainloop()
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument('--caffe', type=str, default=os.path.expanduser("~/caffe"), help='--caffe ~/caffe') parser.add_argument( '--gpu', type=int, help='--gpu 0 . If --gpu not specified, then cpu will be used') parser.add_argument('--model', type=str, default='./', help='--model /root/model/') parser.add_argument('--train', type=str, default='train', help='--test /root/train') parser.add_argument('--descriptorLayer', type=str, default='embed', help='--descriptorLayer embed') parser.add_argument('--verbose', default='true', action='store_true') self.args = parser.parse_args() alignerArgs = properties() alignerArgs.inputDir = self.args.train alignerArgs.dlibFacePredictor = 'shape_predictor_68_face_landmarks.dat' alignerArgs.align = '' alignerArgs.landmarks = 'outerEyesAndNose' alignerArgs.size = 128 alignerArgs.skipMulti = False alignerArgs.verbose = False alignerArgs.fallbackLfw = False print('Train folder:', self.args.train) alignerArgs.inputDir = self.args.train alignerArgs.outputDir = self.args.train self.aligner = Aligner(alignerArgs) #todo? #caffe_root = os.path.join(self.args.caffe, 'python') #sys.path.insert(0, caffe_root) #import caffe # Load the net, list its data and params, and filter an example image. if self.args.gpu: caffe.set_mode_gpu() caffe.set_device(self.args.gpu) else: caffe.set_mode_cpu() deployProtoPath = sorted( list(glob.glob(os.path.join(self.args.model, '*.deploy.prototxt'))))[-1] if self.args.verbose: print(deployProtoPath) caffeModelPath = human_numeric_sort( list(glob.glob(os.path.join(self.args.model, '*.caffemodel'))))[-1] if self.args.verbose: print(caffeModelPath) self.net = caffe.Net(deployProtoPath, caffeModelPath, caffe.TEST) if self.args.verbose: print("blobs {}\nparams {}".format(self.net.blobs.keys(), self.net.params.keys())) self.transformer = caffe.io.Transformer( {'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose( 'data', (2, 0, 1)) # move image channels to outermost dimension self.transformer.set_mean('data', np.array( [127.5, 127.5, 127.5])) # subtract the dataset-mean value in each channel self.transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255] self.transformer.set_channel_swap( 'data', (2, 1, 0)) # swap channels from RGB to BGR
class Reidentifier: def __init__(self): parser = argparse.ArgumentParser() parser.add_argument('--caffe', type=str, default=os.path.expanduser("~/caffe"), help='--caffe ~/caffe') parser.add_argument( '--gpu', type=int, help='--gpu 0 . If --gpu not specified, then cpu will be used') parser.add_argument('--model', type=str, default='./', help='--model /root/model/') parser.add_argument('--train', type=str, default='train', help='--test /root/train') parser.add_argument('--descriptorLayer', type=str, default='embed', help='--descriptorLayer embed') parser.add_argument('--verbose', default='true', action='store_true') self.args = parser.parse_args() alignerArgs = properties() alignerArgs.inputDir = self.args.train alignerArgs.dlibFacePredictor = 'shape_predictor_68_face_landmarks.dat' alignerArgs.align = '' alignerArgs.landmarks = 'outerEyesAndNose' alignerArgs.size = 128 alignerArgs.skipMulti = False alignerArgs.verbose = False alignerArgs.fallbackLfw = False print('Train folder:', self.args.train) alignerArgs.inputDir = self.args.train alignerArgs.outputDir = self.args.train self.aligner = Aligner(alignerArgs) #todo? #caffe_root = os.path.join(self.args.caffe, 'python') #sys.path.insert(0, caffe_root) #import caffe # Load the net, list its data and params, and filter an example image. if self.args.gpu: caffe.set_mode_gpu() caffe.set_device(self.args.gpu) else: caffe.set_mode_cpu() deployProtoPath = sorted( list(glob.glob(os.path.join(self.args.model, '*.deploy.prototxt'))))[-1] if self.args.verbose: print(deployProtoPath) caffeModelPath = human_numeric_sort( list(glob.glob(os.path.join(self.args.model, '*.caffemodel'))))[-1] if self.args.verbose: print(caffeModelPath) self.net = caffe.Net(deployProtoPath, caffeModelPath, caffe.TEST) if self.args.verbose: print("blobs {}\nparams {}".format(self.net.blobs.keys(), self.net.params.keys())) self.transformer = caffe.io.Transformer( {'data': self.net.blobs['data'].data.shape}) self.transformer.set_transpose( 'data', (2, 0, 1)) # move image channels to outermost dimension self.transformer.set_mean('data', np.array( [127.5, 127.5, 127.5])) # subtract the dataset-mean value in each channel self.transformer.set_raw_scale('data', 255) # rescale from [0, 1] to [0, 255] self.transformer.set_channel_swap( 'data', (2, 1, 0)) # swap channels from RGB to BGR #def set_reference_descriptors(self): # pass def reindetify(self, testImagePath, train_image_dir): testImage = caffe.io.load_image(testImagePath) center = np.array([testImage.shape[0], testImage.shape[1]]) / 2.0 crop_dims = np.array([ self.net.blobs['data'].data.shape[2], self.net.blobs['data'].data.shape[3] ]) crop = np.tile(center, (1, 2))[0] + np.concatenate( [-crop_dims / 2.0, crop_dims / 2.0]) crop = crop.astype(int) #isCropNeeded = testImage.shape[0] != self.net.blobs['data'].data.shape[2] or testImage.shape[1] != \ # self.net.blobs['data'].data.shape[3] isCropNeeded = False trainImages = getImages(train_image_dir) print(trainImages) #testImages = {-1: testImagePath} if isCropNeeded: trainDescriptors = self.getDescriptors(self.args, trainImages, caffe, self.net, self.transformer, crop) #testDescriptors = self.getDescriptors(self.args, testImages, caffe, self.net, self.transformer, crop) else: trainDescriptors = self.getDescriptors(self.args, trainImages, caffe, self.net, self.transformer) #testDescriptors = self.getDescriptors(self.args, testImages, caffe, self.net, self.transformer) print('Results:') #testPath = testDescriptors.keys()[0] #similar = cosine_similarity(np.array(testDescriptors[testPath]).reshape(1,-1), trainDescriptors.values())[0] descriptor = self.get_descriptor(self.args, caffe, crop, testImagePath, self.net, self.transformer) similar = cosine_similarity( np.array(descriptor).reshape(1, -1), trainDescriptors.values())[0] print(similar) idx = similar.argsort()[::-1] class_name = trainDescriptors.keys()[idx[0]] class_name = os.path.dirname(class_name) class_name = os.path.basename(class_name) return class_name, similar[idx[0]] def getDescriptors(self, args, images, caffe, net, transformer, crop=None): descriptors = {} for folder in images.keys(): for index, imagePath in enumerate(images[folder]): if args.verbose: print(folder, index, imagePath) output = self.get_descriptor(args, caffe, crop, imagePath, net, transformer) descriptors[imagePath] = output return descriptors def get_descriptor(self, args, caffe, crop, imagePath, net, transformer): image = self.aligner.align(imagePath).astype(np.float32) image /= 255 #image = caffe.io.load_image(imagePath) #if crop is not None: # # central crop # image = image[crop[0]:crop[2], crop[1]:crop[3], :] transformed_image = transformer.preprocess('data', image) # copy the image data into the memory allocated for the net net.blobs['data'].data[...] = transformed_image output = net.forward( end=args.descriptorLayer)[args.descriptorLayer][0].tolist() return output