class TopicModeler: def __init__(self, **kwargs): ''' kwargs:: class gensim.models.ldamodel.LdaModel(corpus=None, num_topics=100, id2word=None, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001, minimum_probability=0.01, random_state=None, ns_conf=None, minimum_phi_value=0.01, per_word_topics=False, callbacks=None, dtype=<class 'numpy.float32'>)¶ ''' self.cleaner = Cleaner() self.lda_model = None self.lda_kwargs = kwargs self.dictionary = None def update(self, docs): cleaned = [list(self.cleaner.clean(doc)) for doc in docs] self.dictionary = corpora.Dictionary(cleaned) corpus = [self.dictionary.doc2bow(text) for text in cleaned] if self.lda_model is None: self.lda_model = models.ldamodel.LdaModel(corpus, id2word=self.dictionary, **self.lda_kwargs) else: self.lda_model.update(corpus, id2word=self.dictionary) def classify(self, doc): bow = self.dictionary.doc2bow(list(self.cleaner.clean(doc))) topic = max(self.lda_model.get_document_topics(bow), key=lambda x: x[1])[0] return self.lda_model.show_topic(topic) def print_topics(self): print(self.lda_model.print_topics(num_topics=10, num_words=3))
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output): whitespace_and_ignorable_list = get_whitespace_and_ignorable_list() cleaner = Cleaner(fontfile, hinting, whitespace_and_ignorable_list) cleaner.clean() # Flatten cmap format 4 (no idRangeOffset/glyphIdArray) so it is a simple # subset of format 12. change_method(_c_m_a_p.cmap_format_4,_cmap_format_4_compile, 'compile') cleaner.save(output) cleaner.close()
def build(self): '''build the database''' reddit = Reddit() cleaner = Cleaner() for subreddit in reddit.get_subreddits(): for post in reddit.get_posts(subreddit): self.database.insert(cleaner.clean(post)) for comment in reddit.get_comments(post): self.database.insert(cleaner.clean(comment))
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output, verbose): whitespace_and_ignorable_list = get_whitespace_and_ignorable_list() cleaner = Cleaner(fontfile, hinting, whitespace_and_ignorable_list) cleaner.clean(verbose) # Flatten cmap format 4 (no idRangeOffset/glyphIdArray) so it is a simple # subset of format 12. # do we still what this? change_method(_c_m_a_p.cmap_format_4, _cmap_format_4_compile, "compile") old_12_or_13_compile = change_method(_c_m_a_p.cmap_format_12_or_13, _cmap_format_12_or_13_compile, "compile") cleaner.save(output) cleaner.close() change_method(_c_m_a_p.cmap_format_12_or_13, old_12_or_13_compile, "compile")
def perform_coverage_delete(self, files): # perform coverage delete cleaner = Cleaner('pass # pragma: no cover\n') for filename in files.keys(): content = self.read(filename) content = cleaner.clean(content, files[filename]) self.write(filename, content)
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output, verbose): whitespace_and_ignorable_list = get_whitespace_and_ignorable_list() cleaner = Cleaner(fontfile, hinting, whitespace_and_ignorable_list) cleaner.clean(verbose) # Flatten cmap format 4 (no idRangeOffset/glyphIdArray) so it is a simple # subset of format 12. # do we still what this? change_method(_c_m_a_p.cmap_format_4, _cmap_format_4_compile, 'compile') old_12_or_13_compile = change_method(_c_m_a_p.cmap_format_12_or_13, _cmap_format_12_or_13_compile, 'compile') cleaner.save(output) cleaner.close() change_method(_c_m_a_p.cmap_format_12_or_13, old_12_or_13_compile, 'compile')
def main(endpoint_models: list): """ Loop over endpoint models, search file directory for endpoints and compare to a standard endpoint config. """ audit_list = list() for model in tqdm(endpoint_models, desc="Looping over endpoint models..."): # first get the gold standard config file standard_config_file = get_standard_config(model) # now open that standard file with open(standard_config_file, 'r') as standard_config: standard_config_json = json.load(standard_config) audit = Parser(standard_config_json) # gather endpoint filenames endpoint_config_files = gather_endpoints(model) for endpoint in tqdm(endpoint_config_files, desc="Looping over endpoint config files..."): with open(endpoint, 'r') as endpoint_file: endpoint_json = json.load(endpoint_file) config_diff = audit.compare(endpoint_json, endpoint.name) cleaner = Cleaner(config_diff) cleaned = cleaner.clean() audit_list.append({f"{model}": cleaned}) return audit_list
def clean_corpus(self, sentences): wordlist = [] for sent in sentences: cleaned = Cleaner.clean(sent) words = cleaned.split() wordlist.extend(words) corpus = " ".join(wordlist) return corpus
def test_to_submission_format(self): e = Cleaner('./tests/data/models/ae3_213750_model.pkl') img = '../data/test/10.png' img, id = e.clean(img) csv = e.to_submission_format(img, id) row = csv[300].split(',') self.assertEqual(row[0], '%s_%d_%d' % (id, 1, 301)) self.assertTrue(float(row[1]) <= 1.0)
def do(task): logging.debug("Start doing task: %s" % task) cleaner = Cleaner() try: return cleaner.clean(task) except: traceback.print_exc(file=sys.stderr) logging.critical('Failed while cleaning for task %s' % (task['ID'])) return False
class CleanerTest(unittest.TestCase): def setUp(self): self.uut = Cleaner() def test_delte_nothing(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, []), lines) def test_delete_one_line(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [0]), ['b', 'c', 'd', 'e']) def test_delete_multiple_line(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [0, 2, 3]), ['b', 'e']) def test_delete_line_that_does_not_exist(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [10, 20, 30]), lines)
class LinuxBackup: def __init__(self): self.config_parser = ConfigParser() self.out_dir = os.path.join(os.path.dirname(__file__), "backups/") self.create_empty_dir() self.backup_name = "%s-%s.zip" % (os.getlogin(), time.strftime("%d-%m-%Y")) self.directorys = self.config_parser.directories_to_backup self.path = os.path.join(self.out_dir, self.backup_name) self.zip_creator = ZipCreator(self.path, self.directorys) self.drive_connector = DriveConnector(self.out_dir, self.config_parser) self.cleaner = Cleaner(self.out_dir, self.config_parser.get_clean_time()) def create_empty_dir(self): if not os.path.exists(self.out_dir): os.mkdir(self.out_dir) def backup(self): self.zip_creator.create() self.drive_connector.upload(self.path) self.cleaner.clean()
class CleanerWithReplacementTest(unittest.TestCase): def setUp(self): self.uut = Cleaner('pass') def test_delte_nothing(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, []), lines) def test_replace_one_linestuff(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [0]), ['pass', 'b', 'c', 'd', 'e']) def test_replacing_respects_intentation(self): lines = [' a', 'b', ' c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [0, 2]), [' pass', 'b', ' pass', 'd', 'e']) def test_collapses_two_pass_together(self): lines = ['a', 'b', 'c', 'd', 'e'] self.assertEqual(self.uut.clean(lines, [0, 1]), ['pass', 'c', 'd', 'e'])
def clean_comment(self, comment): return Cleaner.clean(comment) # Move this to Self
from os import environ as environment import argparse, yaml import logging from cleaner import Cleaner parser = argparse.ArgumentParser() parser.add_argument("--path", help="path to run cleaner on", type=str) args = parser.parse_args() # logging.basicConfig(level=logging.DEBUG) with open("config.yml") as sets: config = yaml.load(sets) path = args.path if not path: path = config["cleaner"]["general_pattern"] cleaner = Cleaner(config["cleaner"]) print "Cleaning path: " + str(path) cleaner.clean(path, True)
def main(p): start = time.time() # 选择文件名以'json.gz'结尾的记录 file_name_list = filter(lambda x: x.endswith('json.gz'), os.listdir(p)) # TODO 添加文件是否是24个的判断(glob模块) for file_name in file_name_list: with open(os.path.join(p, file_name), 'r') as f: raw_json_file = gzip.GzipFile(fileobj=f) record_cleaner = Cleaner() record_grouper = Grouper(db) record_normalizer = Normalizer(db) mongo_helper = MongoHelper(db) counter = ActorCounter() evaluater = Evaluater() # 数据清洗 record_cleaner.set_dirty_data(raw_json_file) record_cleaner.clean() clean_record = record_cleaner.get_clean_data() log.log('clean record %s' % len(clean_record)) # 数据处理 # 分组 record_grouper.set_records(clean_record) record_grouper.group() record_actor_exist = record_grouper.get_group_1() record_actor_new = record_grouper.get_group_2() log.log('record_actor_exist: %s' % len(record_actor_exist)) log.log('record_actor_new: %s' % len(record_actor_new)) # 处理记录的actor已存在的记录 log.log('Begin processing actor-exist records...') # 只需要删掉记录的actor_attrs即可 for record in record_actor_exist: del record['actor_attributes'] log.log('Finished.') # 处理记录的actor不存在的记录 record_normalizer.set_records(record_actor_new) record_normalizer.normalize() record_actor_new = record_normalizer.get_record_actor_new() new_actors = record_normalizer.get_new_actors() # 把本地的今日新增的Actor更新到数据库 actors = new_actors.values() mongo_helper.insert_new_actors(actors) # 对新增的Actor, 改变Redis中相应的计数 counter.count_actor_list(actors) # 计算每条记录的val evaluater.set_records(record_actor_exist) evaluater.evaluate() val_actor_exist = evaluater.get_val_cache() evaluater.set_records(record_actor_new) evaluater.evaluate() val_actor_new = evaluater.get_val_cache() # 将记录插入数据库 mongo_helper.insert_new_reocrds(record_actor_new) mongo_helper.insert_new_reocrds(record_actor_exist) # 将今日用户新增的val更新到数据库 mongo_helper.update_val(val_actor_new) mongo_helper.update_val(val_actor_exist) record_cleaner.free_mem() del record_cleaner del record_grouper del record_normalizer del mongo_helper del counter del evaluater # 生成CSV文件 util.grcount2csv() end = time.time() log.log('total: %s s' % (end - start))
def main(): config = create_config() args = parse_args() cleaner = Cleaner(config, args) cleaner.clean()
import time t1 = time.time() fd = FeedDownloader("http://mybroadband.co.za/news/feed", "My Broadband") wh = WebHelper() cleaner = Cleaner() articles = fd.parse() print time.time() - t1, "setup complete" t1 = time.time() wh.get_html_threaded(articles) print time.time() - t1, "threaded download complete" t1 = time.time() for a in articles: a.html = wh.attempt_get_html(a.url) print time.time() - t1, "non threaded download complete" t1 = time.time() for a in articles: if a.html: a.plaintext = cleaner.clean(a.html) print(a.plaintext[:300]) print('') print time.time() - t1, "cleaning complete"
def vocabulary_analysis(self, cleaned_corpus, text): len_wrds = len(text.split()) emos = [] balance = {} for all_keys in self.meta_data.keys(): balance[all_keys] = [] text = text.lower() words = text.split() covered = [] for word in self.meta_keywords: if " " + word + " " in " " + text + " ": for tag in self.meta_keywords[word]: if word not in balance[tag]: actual_count = self.count_occurrences(word, text) for i in range(actual_count): balance[tag].append(word) if word in self.both: emos.append(word) covered.append(word) else: cln_wrd = Cleaner.clean(word) if cln_wrd != "" and cleaned_corpus != "" and " " + cln_wrd + " " in " " + cleaned_corpus + " ": for tag in self.meta_keywords[word]: if word not in balance[tag] and word.lower().strip( ) in text.lower().strip(): actual_count = self.count_occurrences( cln_wrd, cleaned_corpus) for i in range(actual_count): balance[tag].append(word) if word in self.both: emos.append(word) covered.append(word) visited = [] for key, values in balance.iteritems(): vals = Counter(values) vals = sorted(vals.items(), key=operator.itemgetter(1), reverse=True) for ww in vals: if ww[0] not in visited: visited.append(ww[0]) cnts = round((float(sum([x[1] for x in vals]) * 100) / len_wrds), 2) balance[key] = {} value_words = [ "".join(a for a in x[0] if a not in punctuation) for x in vals ] balance[key]['values'] = list(set(value_words)) balance[key]['percentage'] = cnts balance[key][key + '_count'] = sum([x[1] for x in vals]) word_balance_grade, score = self.getWordBalanceGrade(balance) balance['grade'] = word_balance_grade balance['score'] = score other = [] cln_words = cleaned_corpus.split() for wd in words: wd = "".join(a for a in wd if a not in punctuation) if wd and wd not in covered: other.append(wd) # print other other_per = float(len(other)) * 100 / len_wrds # other_per = 100 - balance['common']['percentage'] - balance['uncommon']['percentage'] - balance['emotional']['percentage'] - balance['power']['percentage'] # if other_per < 0: # other_per = 0 balance['other'] = {} balance['other']['percentage'] = other_per balance['other']['values'] = other balance['common']['common_count'] = balance['common']['percentage'] # if emos: # balance['emotional']['values'].extend(emos) # balance['emotional']['emotional_count'] += len(emos) return balance
def extract(raw_txt, logger): c = Cleaner() cleaned_text_list = c.clean(raw_txt) logger.info('Done cleaning') logger.debug(len(cleaned_text_list)) logger.debug(cleaned_text_list) matrix_builder = MatrixBuilder() matrix = matrix_builder.build_sim_matrix(cleaned_text_list, logger) logger.info('Done building sim matrix') logger.debug('Dimensions: {}'.format(matrix.shape)) logger.debug(matrix) g = Grapher() pageranks = g.graph(matrix) logger.info('Generated graph and got pageranks') logger.debug(pageranks) total_doc_size = len(cleaned_text_list) if total_doc_size in range(0, 300): summary_length = int(0.4 * total_doc_size) elif total_doc_size in range(301, 800): summary_length = int(0.2 * total_doc_size) elif total_doc_size in range(801, 1500): summary_length = int(0.1 * total_doc_size) else: summary_length = int(0.05 * total_doc_size) top_ranked = nlargest(summary_length, pageranks, key=pageranks.get) top_ranked.sort() cl = Cluster() top_ranked = cl.splitIntoParagraph(top_ranked, 7.5) logger.debug(top_ranked) result = '' for paragraph in top_ranked: for key in paragraph: top_ranked_sentence = cleaned_text_list[key] result += '{}. '.format(top_ranked_sentence) result += '\n\n' try: del c del cleaned_text_list del matrix_builder del matrix del g del pageranks del total_doc_size del summary_length del top_ranked del cl del raw_txt except: pass return result
def main(): parser = OptionParser(prog="reaper", version="0.1.0", usage="%prog [options] <path to folder> "+ "[<path to folder...>]", description="PyReaper is a small tool that detects " + "duplicated files by hashing them and then deletes " + "these duplicated files leaving just one of them", epilog="CAUTION: handle with EXTREME CARE, " + "use -n option first if you are not sure of " + "what are you doing, this thing deletes stuff!!!") parser.add_option("-n", "--no-action", dest="noaction", action="store_true", help="does not executes any file action") parser.add_option("-d", "--delete", dest="delete", action="store_true", help="delete every duplicated file") parser.add_option("-m", "--move-to", dest="moveto", metavar="DIR", help='Moves duplicated files instead of deleting them') parser.add_option("-p", "--print-rm-commands", dest="rmcommands", action="store_true", help="skips delete process and prints a set of \"rm\" " + "commands so you can delete the duplicate files yourself") parser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="interactive mode, will ask for each duplicate. " + "By default it deletes every duplicate found but " + "the first one") parser.add_option("-y", "--dont-ask-confirmation", dest="noconfirmation", action="store_true", help="skips confirmation question. ") parser.add_option("-s", "--store-hashes", dest="storehash", action="store_true", help="store and keep calculated hashes in .digest hidden files ") parser.add_option("-t", "--delete-empty-trees", dest="deletedirs", action="store_true", help="deletes empty trees when finishes") parser.add_option("-e", "--ext", dest="extension", action="store", help="only digests files with the given extension" ) parser.add_option("-v", "--verbose", dest="verbose", action="store_true", help="outputs much more information during process " + "(sometimes even too much)") parser.add_option("", "--ignore-stored-hashes", dest="ignorehashes", action="store_true", help="ignores stored calculated hashes in .digest " + "hidden files, this means every hash will be " + "recalculated") (options, args) = parser.parse_args() if not args: exit_with_error('', parser) br = Walker(options.extension, \ options.storehash, \ options.verbose, \ options.ignorehashes) action = None moveto = None rmcommands = False if options.noaction: action = 'n' elif options.moveto: action = 'm' moveto = options.moveto if not moveto: exit_with_error('No "move to" target provided', parser) elif not os.path.exists(moveto): exit_with_error('Path %s does not exists' % moveto, parser) elif not os.path.isdir(moveto): exit_with_error('Path %s is not a directory' % moveto, parser) elif options.delete: action = 'd' rmcommands = options.rmcommands if action is None: exit_with_error('No action selected', parser) for path in args: if not os.path.exists(path): exit_with_error("path {0} does not exists".format(path), parser) br.digest(path) duplicates = br.collisions() clean = False if duplicates: print "Duplicates found, cleaning..." c = Cleaner( duplicates, options.interactive, options.verbose, action, rmcommands, options.noconfirmation, moveto) clean = c.clean() else: print "No duplicates found" if not options.storehash: print "Deleting digest files..." c = Cleaner(verbose = options.verbose) c.delete(br.digestFiles(), -1, True) if options.deletedirs: c = Cleaner(verbose = options.verbose) for path in args: empty_dirs = br.findEmptyDirs(path) for dir in empty_dirs: if options.rmcommands or options.noaction: print "Keeping empty tree {0}".format(dir) else: c.deleteDir(dir) if clean: sys.exit(0) else: sys.exit(1)
import json from finder_interface import FinderInterface from cleaner import Cleaner from writer_interface import WriterInterface from cli_messages import Message ''' Get the config object from config/config.txt ''' json_file = open("config/config.txt", "r") config = json.load(json_file) ''' Get the word from the wikipedia page, pass term from config ''' wiki_finder = FinderInterface(config["wiki_term"]) word = wiki_finder.get_word() ''' Clean the word of special characters and signs ''' cleaner = Cleaner() clean_word = cleaner.clean(word) ''' Save the word and the validating word in the file / database ''' writer = WriterInterface() writer.save_word(clean_word) ''' Start the game here ''' echo = input("Are you ready to play? ") print(" ") name = input("What is your name darling? ") message = Message(name) if echo == "yes": # Choose the level level = input("What is your LEVEL (easy/medium/hard)? ") if level == "easy": max_turns = len(clean_word) + config["levels"]["easy"] elif level == "medium": max_turns = len(clean_word) + config["levels"]["medium"] elif level == "hard":
def collect(): storage = Storage('constituents.db') try: result = storage.run_query('Select * from constituents') except: ## If constituents under DAX isnt present fetch it by calling another script. collect_constituent() result = storage.run_query('Select * from constituents') df = pd.DataFrame(result) df.columns = ['constituent_name','wkn'] url = 'https://www.boerse-frankfurt.de/equity/{}?lang=en' options = webdriver.ChromeOptions() # options.add_argument('--headless') driver = webdriver.Chrome('../chromedriver',options = options) final_data = {} try: for index,row in df.iterrows(): ## To loop over each constituent page. final_data = {} driver.get(url.format(row['wkn'])) sleep(3) tabs = driver.find_elements_by_xpath('//button[contains(@class,"data-menue-button")]') for tab in tabs: if tab.text in ['Charts','News','Company Details']: continue tab.click() sleep(4) tables = driver.find_elements_by_xpath('//table') table_names = [] for table in tables: try: table_name = table.find_element_by_xpath('./..//preceding-sibling::h2[contains(@class,"widget-table-headline")]').text except: table_name = '' if table_name.find(row['constituent_name']) != -1: table_name = table_name[ :table_name.find(row['constituent_name'])].strip() table_names.append(table_name) data = pd.read_html(driver.page_source) for each_df,table_name in zip(data,table_names): if not table_name: continue final_data[table_name] = each_df ## Call cleaner to cleanse and format data. cleaner = Cleaner(final_data) final_data = cleaner.clean() collection_date = datetime.datetime.now().strftime('%d/%m/%y') for table in final_data: ## Get the dataframe and filter out rows that are already present in database. latest_date = storage.get_date(table,row['wkn']) if not latest_date or collection_date > latest_date: print('Collecting data for {} for constituent {}({})'.format(table,row['constituent_name'],row['wkn'])) try: final_data[table]['collection_date'] = collection_date final_data[table]['constituent_name'] = row['constituent_name'] final_data[table]['wkn'] = row['wkn'] ## Insert the data to database. storage.insert_bulk(table,final_data[table]) except Exception as e: print(e) else: print('Already collected for {} for constituent {}({})'.format(table,row['constituent_name'],row['wkn'])) break except Exception as e: print(e) finally: driver.quit()
def main(p): start = time.time() # 选择文件名以'json.gz'结尾的记录 file_name_list = filter(lambda x: x.endswith('json.gz'), os.listdir(p)) # TODO 添加文件是否是24个的判断(glob模块) for file_name in file_name_list: with open(os.path.join(p, file_name), 'r') as f: raw_json_file = gzip.GzipFile(fileobj=f) record_cleaner = Cleaner() record_grouper = Grouper(db) record_normalizer = Normalizer(db) mongo_helper = MongoHelper(db) counter = ActorCounter() evaluater = Evaluater() # 数据清洗 record_cleaner.set_dirty_data(raw_json_file) record_cleaner.clean() clean_record = record_cleaner.get_clean_data() log.log('clean record %s' % len(clean_record)) # 数据处理 # 分组 record_grouper.set_records(clean_record) record_grouper.group() record_actor_exist = record_grouper.get_group_1() record_actor_new= record_grouper.get_group_2() log.log('record_actor_exist: %s' % len(record_actor_exist)) log.log('record_actor_new: %s' % len(record_actor_new)) # 处理记录的actor已存在的记录 log.log('Begin processing actor-exist records...') # 只需要删掉记录的actor_attrs即可 for record in record_actor_exist: del record['actor_attributes'] log.log('Finished.') # 处理记录的actor不存在的记录 record_normalizer.set_records(record_actor_new) record_normalizer.normalize() record_actor_new = record_normalizer.get_record_actor_new() new_actors = record_normalizer.get_new_actors() # 把本地的今日新增的Actor更新到数据库 actors = new_actors.values() mongo_helper.insert_new_actors(actors) # 对新增的Actor, 改变Redis中相应的计数 counter.count_actor_list(actors) # 计算每条记录的val evaluater.set_records(record_actor_exist) evaluater.evaluate() val_actor_exist = evaluater.get_val_cache() evaluater.set_records(record_actor_new) evaluater.evaluate() val_actor_new = evaluater.get_val_cache() # 将记录插入数据库 mongo_helper.insert_new_reocrds(record_actor_new) mongo_helper.insert_new_reocrds(record_actor_exist) # 将今日用户新增的val更新到数据库 mongo_helper.update_val(val_actor_new) mongo_helper.update_val(val_actor_exist) record_cleaner.free_mem() del record_cleaner del record_grouper del record_normalizer del mongo_helper del counter del evaluater # 生成CSV文件 util.grcount2csv() end = time.time() log.log('total: %s s' % (end - start))
""" from matplotlib import pyplot as plt import numpy as np import cv2 from loader import Loader from cleaner import Cleaner from features import Features import sys from params import Params if __name__ == '__main__': params = Params() params.decode(sys.argv[1:]) loader = Loader() loader.loadReferenceAndTentative() cleaner = Cleaner() target, imgReference = cleaner.clean(loader.imgGrayReference, "reference") imgTentative = cleaner.cleanWithExpectedCount(loader.imgGrayTentative, "tentative", target) features = Features(imgReference, loader.imgColorReference, imgTentative, loader.imgColorTentative) features.extractFeatures(params) print("Offset is:" + str(features.offset) + ", angle is: " + str(features.angle) + ", value is:" + str(features.value) + ", scale is:" + str(features.scaleFactor)) sys.exit()
def use_cleaner(html_snippet): cleaner = Cleaner(html_snippet) cleaner.clean() return str(cleaner)
def clean_invalid_glyphs_and_remove_hinting(fontfile, hinting, output): whitespace_list = get_whitespace_list() cleaner = Cleaner(fontfile, hinting, whitespace_list) cleaner.clean() cleaner.save(output) cleaner.close()