def _update_sdk(path_info): LOG.info('Updating SDK and downloading required Android platform ' '(about 90MB, may take some time)') APPROX_UPPER_BOUND_ON_ANDROID_OUTPUT = 60 android_process = lib.PopenWithoutNewConsole( [ path_info.android, "update", "sdk", "--no-ui", "--filter", "platform-tool,tool,android-8" ], stdout=PIPE, stderr=STDOUT, ) with ProgressBar('Installing Android SDK Components') as bar: finished = [] def kill_adb_occasionally(): """When updating the android sdk, occasionally ADB will have a lock on some files causing the update to fail. Killing it here helps the update succeed. """ while not finished: time.sleep(5) try: # XXX: still time from check to use issue, but close enough if not finished: _kill_adb() except Exception: pass adb_killing_thread = threading.Thread(target=kill_adb_occasionally) adb_killing_thread.daemon = True adb_killing_thread.start() for i, line in enumerate(iter(android_process.stdout.readline, '')): bar.progress(float(i) / APPROX_UPPER_BOUND_ON_ANDROID_OUTPUT) finished.append(True)
tv.transforms.ToTensor(), tv.transforms.Normalize([0.5] * 3, [0.5] * 3) ])) train_loader = t_data.DataLoader(dataset, batch_size=CONFIG["BATCH_SIZE"], shuffle=True) one = t.FloatTensor([1]) mone = -1 * one one_var = t_auto.Variable(one.cuda() if CONFIG["GPU_NUMS"] > 0 else one) mone_var = t_auto.Variable(mone.cuda() if CONFIG["GPU_NUMS"] > 0 else mone) fix_noise = t.FloatTensor(100, CONFIG["NOISE_DIM"]).normal_(0, 1) fix_noise_var = t_auto.Variable( fix_noise.cuda() if CONFIG["GPU_NUMS"] > 0 else fix_noise) bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(train_loader), "D Loss:%.3f;G Loss:%.3f") for epoch in range(1, CONFIG["EPOCH"] + 1): for index, (image, label) in enumerate(train_loader): real = image real_var = t_auto.Variable( real.cuda() if CONFIG["GPU_NUMS"] > 0 else real) noise = t.randn(real_var.size(0), CONFIG["NOISE_DIM"]) noise_var = t_auto.Variable( noise.cuda() if CONFIG["GPU_NUMS"] > 0 else noise) for parm in NetD.parameters(): parm.data.clamp_(-CONFIG["CLAMP_NUM"], CONFIG["CLAMP_NUM"]) NetD.zero_grad() D_real = NetD(real_var) D_real.backward(one_var)
def run(file_path): # Init logging and database init_logging() client, file_col, schema_col, source_data_col = init_mongodb(config) # Set up counters and file index successfully_ingested_files = 0 file_counter = 0 file_list = DirLister.get_file_list_recursive(file_path) logging.info('Processing %d files from %s' % (len(file_list), file_path)) for file in file_list: file_counter += 1 ProgressBar.update_progress(file_counter / len(file_list), ('Processing file %s' % file)) # get the file stats document = { 'stats': FileStatter.stats(file), 'filePath': file, '_id': file, 'hash': FileStatter.sha1_from_file(file) } # Load the data or skip if unable if file.lower().endswith('.mif'): try: data = MIFparser.to_dict(file) except ValueError as e: logging.error(e) # if the data loading doesn't work out, just log the error and skip the file continue elif file.lower().endswith('.mid'): logging.debug('Skipping .mid file.') continue # .mid files are processed along with its 'parented' .mif file else: try: data = CSVparser.to_dict(file) except ValueError as e: logging.error('CSV parsing error on file %s: %s' % (file, e)) # if the data loading doesn't work out, just log the error and skip the file continue # Generate the schema and try to ingest it try: schema_data = SchemaGenerator.generate_schema(data) except Exception as e: logging.error('Schema error on file %s: %s' % (file, e)) continue schema_hash = FileStatter.sha1(schema_data) schema = { '_id': schema_hash, 'schema': schema_data, } try: schema_col.insert_one(schema) except DuplicateKeyError: logging.debug('Schema %s was previously processed' % schema_hash) except Exception as e: logging.error('Ingest schema error on file %s: %s' % (file, e)) # if the schema loading doesn't work out, just log the error and skip the file continue # Store the source data source_data_doc_sha1 = FileStatter.sha1(data) source_data_doc = {'_id': source_data_doc_sha1, 'data': data} try: source_data_col.insert_one(document=source_data_doc) except DuplicateKeyError: logging.debug('Sourcedata with sha1 %s was previously processed' % source_data_doc_sha1) except Exception as e: logging.error('Ingest source data error on file %s: %s' % (file, e)) continue # Finalize the file document with the data reference and the schema reference document['data'] = source_data_doc_sha1 document['schema'] = schema['_id'] try: file_col.insert_one(document=document) except DuplicateKeyError: logging.warning('File %s was previously processed, skipping' % file) # Skip to next file continue except Exception as e: logging.error('Ingest file metadata error on file %s: %s' % (file, e)) continue logging.debug('File %s was successfully ingested' % file) successfully_ingested_files += 1 logging.info('Finished!') logging.info('Successfully ingested %d files of %d' % (successfully_ingested_files, len(file_list))) client.close()
os.mkdir("output") train_set = j_data.DataSetFromFolderForPix2Pix( os.path.join("/input/facades_fixed", "train")) test_set = j_data.DataSetFromFolderForPix2Pix( os.path.join("/input/facades_fixed", "test")) train_data_loader = t.utils.data.DataLoader(dataset=train_set, batch_size=CONFIG["BATCH_SIZE"], shuffle=True) test_data_loader = t.utils.data.DataLoader(dataset=test_set, batch_size=CONFIG["BATCH_SIZE"], shuffle=True) test_input, test_target = test_data_loader.__iter__().__next__() bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(train_data_loader), "D loss:%.3f;G loss:%.3f") for epoch in range(1, CONFIG["EPOCH"] + 1): for i, (input, target) in enumerate(train_data_loader): x_ = t_auto.Variable(input.cuda() if CONFIG["GPU_NUM"] > 0 else input) y_ = t_auto.Variable( target.cuda() if CONFIG["GPU_NUM"] > 0 else target) # Train discriminator with real data D_real_decision = Net_D(x_, y_).squeeze() real_ = t_auto.Variable( t.ones(D_real_decision.size()).cuda( ) if CONFIG["GPU_NUM"] > 0 else t.ones(D_real_decision.size())) D_real_loss = BCE_loss(D_real_decision, real_) # Train discriminator with fake data gen_image = Net_G(x_)
''' HOCR ''' hocrFiles = [ hocrFolder + f for f in os.listdir(hocrFolder) if f.find('.hocr') != -1 ] hocrFiles = sorted(hocrFiles) HOCRs = {} allGlyphs = [] print(hocrFiles) for i, f in enumerate(hocrFiles): with open(f, "rb") as fp: pageHOCR = re.findall('\d+', f.split("/")[-1])[0] HOCRs[pageHOCR] = BeautifulSoup(fp, "lxml") allGlyphs += HOCRs[pageHOCR].find_all(attrs={"class": u"ocrx_cinfo"}) # Step one : sort unsorted images which are in the root folder Bar1 = ProgressBar(len(listOutputFolder), 30, "Sort unsorted images (Step 1/2)") for imgUnsorted in listOutputFolder: Bar1.update() if re.findall('.png', imgUnsorted): # if this element is an image : # get the char name of the glyph glyphName = imgUnsorted[0] # create dir if it's necessary, move the image in this dir if glyphName == ".": # to fix "." name subprocess.call(["mkdir", "-p", outputFolder + ".point"]) subprocess.call( ["mv", outputFolder + imgUnsorted, outputFolder + ".point/"]) else: subprocess.call(["mkdir", "-p", outputFolder + glyphName]) subprocess.call( ["mv", outputFolder + imgUnsorted, outputFolder + glyphName]) subprocess.call(
betas=(0, .9)) def generate_random_sample(): while True: random_indexes = numpy.random.choice(dataset.__len__(), size=CONFIG["BATCH_SIZE"], replace=False) batch = [dataset[i][0] for i in random_indexes] yield t.stack(batch, 0) random_sample = generate_random_sample() ## Fitting model bar = j_bar.ProgressBar(1, 5000, "D Loss%.3f;G Loss%.3f") for i in range(1, 5000 + 1): for p in NetD.parameters(): p.requires_grad = True for j in range(5): ######################## # (1) Update D network # ######################## NetD.zero_grad() # Sample real data real_images = random_sample.__next__() real_images = real_images.cuda(
dataset = j_data.Cifar10DataSetForPytorch( train=True, transform=tv.transforms.Compose([ tv.transforms.ToTensor(), # Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) ])) train_loader = t.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True) noise = t.randn(100, NOISE_DIM) noise_var = t_auto.Variable(noise.cuda() if GPU_NUMS > 0 else noise) k = 0 proBar = j_bar.ProgressBar(EPOCHS, len(train_loader), "D Loss:%.3f;G Loss:%.3f") for epoch in range(1, EPOCHS + 1): for index, (images, _) in enumerate(train_loader): mini_batch = images.shape[0] noise = t_auto.Variable( t.FloatTensor(mini_batch, NOISE_DIM, 1, 1).cuda( ) if GPU_NUMS > 0 else t.FloatTensor(mini_batch, NOISE_DIM, 1, 1)) real = t_auto.Variable( t.FloatTensor(mini_batch, IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE ).cuda() if GPU_NUMS > 0 else t. FloatTensor(mini_batch, IMAGE_CHANNEL, IMAGE_SIZE, IMAGE_SIZE)) label = t_auto.Variable( t.FloatTensor(1).cuda() if GPU_NUMS > 0 else t.FloatTensor(1)) Net_D.zero_grad() real.data.resize_(images.size()).copy_(images)
if args.resize is not None: resize = args.resize if args.tmp is not None: resizedFolder = args.temp if args.style is not None: fontStyles = args.style subprocess.call(["mkdir", "-p", levelFolder]) subprocess.call(["mkdir", "-p", resizedFolder]) percentResize = str(resize*100)+"%" for fontStyle in fontStyles: styleFolder = averageFolder + "/" + fontStyle images = [styleFolder+"/"+f for f in os.listdir(styleFolder)] BarGlyph = ProgressBar(len(images), 30, "Glyphs : ") outputFolder = levelFolder + "/" + fontStyle if not os.path.isdir(outputFolder): os.mkdir(outputFolder) for glyph in images: glyphName = glyph.split("/")[-1].split(".")[-2] # get the name of the glyph # print(glyphName) # save rescale and modify versions if resize != 1: subprocess.call(["convert", glyph, "-resize", percentResize, resizedFolder + glyphName + ".png"]) #save a rescale version for blur in blurs: for delta in deltas: for level in levels: m = int(level) - delta / 2
# overwrite data from command line arguments if args.output is not None: outputFolder = args.output if args.target is not None: images2analysis = args.target if args.lang is not None: lang = args.lang # beginning print( "start {} \nanalysis from :{} \nto output folder : {}\nin language : {}\n". format(__file__, images2analysis, outputFolder, lang)) subprocess.call(["mkdir", "-p", outputFolder]) if len(images2analysis) > 0: progressBar = ProgressBar(len(images2analysis), 30, "Analysis : ") for img in images2analysis: if os.path.isfile(img) and re.search( r"\.png|\.PNG|\.jpg|\.jpeg|\.JPG|\.JPEG|\.tif|\.TIF|\.jp2", img): outputName = img.split("/")[-1].split(".")[0:-1] subprocess.call([ "tesseract", str(img), str(outputFolder) + str(outputName[0]), "-l", lang, "--dpi", str(resolution), "-c", "tessedit_create_hocr=1", "-c", "hocr_char_boxes=1" ]) else: print(" ----> invalid file found : {}".format(img))
if CONFIG["GPU_NUMS"] > 0: G = G.cuda() D = D.cuda() x = x.cuda() z = z.cuda() z_test = z_test.cuda() x = Variable(x) z = Variable(z) z_test = Variable(z_test) optimizerD = torch.optim.Adam(D.parameters(), lr=CONFIG["LEARNING_RATE"], betas=(0.5, 0.999), weight_decay=0) optimizerG = torch.optim.Adam(G.parameters(), lr=CONFIG["LEARNING_RATE"], betas=(0.5, 0.999), weight_decay=0) ## Fitting model bar = j_bar.ProgressBar(CONFIG["EPOCH"], len(dataset), "D loss:%.3f;G loss:%.3f") for epoch in range(1, CONFIG["EPOCH"] + 1): for i, data_batch in enumerate(dataset, 0): ######################## # (1) Update D network # ######################## for p in D.parameters(): p.requires_grad = True # Train with real data D.zero_grad() # We can ignore labels since they are all cats! images, labels = data_batch # Mostly necessary for the last one because if N might not be a multiple of batch_size
ifHTML = args.html if args.style is not None: fontStyles = args.style if levelValue != 0: images = [i for i in images if i.find(str(levelValue)) != -1] subprocess.call(["mkdir", "-p", pnmFolder]) subprocess.call(["mkdir", "-p", vectorsFolder]) for style in fontStyles: outputFolder = vectorsFolder + "/" + style + "/" subprocess.call(["mkdir", "-p", outputFolder]) styleFolder = configdata['levelsFolder'] + "/" + style + "/" images = [styleFolder + "/" + f for f in os.listdir(styleFolder)] Bar = ProgressBar(len(images), 30, "Vectorisation : ") print(images) for i in images: print(i) iName = i.split("/")[-1].split(".")[-2] subprocess.call(["convert", i, pnmFolder + iName + ".pnm"]) subprocess.call([ "potrace", pnmFolder + iName + ".pnm", "-s", "-o", outputFolder + iName + ".svg" ]) clearSvg = subprocess.check_output([ "Toolbox/venv2/bin/python2.7", "Toolbox/extensionInkscape/applytransform.py", outputFolder + iName + ".svg" ]) # print(clearSvg.decode("utf-8"))
# imgs[pageHOCR] = Image.open(imageSources[(int(pageHOCR) - 9)]) if len(HOCRs) == len(imgs): for pageNumber in sorted(HOCRs): # page by page hocrDocument = HOCRs[pageNumber] imgPage = imgs[pageNumber] firstPage = hocrDocument.find(attrs={"class": u"ocr_page"}) # xml browsing if firstPage is not None: nodeGlyphs = firstPage.find_all(attrs={"class": u"ocrx_cinfo"}) if len(nodeGlyphs) > 0: BarByPage = ProgressBar(len(nodeGlyphs), 30, 'Extraction page ' + pageNumber) print(pageNumber) print(type(pageNumber)) # unicodeChars = [] coordsCorpList = [] # find all element matched with cssSelector to find style of char stylised_nodes = {} for selector in cssSelectors: stylised_nodes[cssSelectors[selector]] = [] for selector in cssSelectors: print(selector) print(cssSelectors[selector]) stylised_nodes[cssSelectors[selector]] += hocrDocument.select(selector)
print(styleFolder) if os.path.isdir(styleFolder): if not os.path.exists(outputFolder + "/" + fontStyle): os.mkdir(outputFolder + "/" + fontStyle) if len(specified_glyph) > 0: glyphsFolders = [styleFolder + "/" + specified_glyph] print(glyphsFolders) if not os.path.exists(glyphsFolders[0]): print("specified glyph doesn't exist") exit() else: glyphsFolders = [ styleFolder + "/" + f for f in os.listdir(styleFolder) if os.path.isdir(styleFolder + "/" + f) ] bar = ProgressBar(len(glyphsFolders), 30, "Averaging :") for f in glyphsFolders: images = list_all_fullpath_images(f) glyphStr = f.split("/")[-1] glyphName = str2glyphName(glyphStr) subprocess.call(["convert"] + images + [ "-average", outputFolder + "/" + fontStyle + "/" + str(glyphName) + ".png" ]) print(outputFolder + "/" + fontStyle + "/" + str(glyphName) + ".png") #bar.update() else: print("no folder found")