def crawl_pages(subcats): dirpath = "data/site/%s/%s/" % (config.wiki_lang, config.start_cat) pages = [] counter = 0 for subcat in subcats: counter += 1 pb.update(counter, len(subcats)) subcat_dirpath = dirpath + subcat + "/" misc.mkdir_p(subcat_dirpath) filepath = subcat_dirpath + "pages.txt" if os.path.exists(filepath): subcat_pages = misc.read_file(filepath) else: subcat_pages = get_subcat_pages(subcat) misc.write_file(filepath, subcat_pages) pages.extend(subcat_pages) pages = [ page for page in pages if not config.page_bl(page) and lang.can(page) ] pages = OrderedDict.fromkeys(pages).keys() # unique return pages
def read_parameter_file(path, name): try: raw = misc.read_file(path, name + filename_extension) except IOError: raw = "{}" cooked = ast.literal_eval(raw) return cooked
def translator(target='', detail=0): from vivaldi_translator import test if target != '': test_input = read_file(target) test(test_input, test_set=False, detail=detail) else: path = 'test_set/' tests = read_file(path+'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data(path+test_name, ['test_input','test_output','target']) print "TEST:", test_name flag = test(test_data, detail=detail) if flag == False: return False return True
def test_vi2cu_translator(target='', detail=0): from vi2cu_translator.main import test if target != '': test_input = read_file(target) flag = test(test_input, test_set=False, detail=detail) else: path = 'vi2cu_translator/test_set/' tests = read_file(path+'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data(path+test_name, ['test_input','test_output','dtype_dict','return_dtype']) print "TEST:", test_name flag = test(test_data) if flag == False: return False return True
def preprocessing(target='', detail=0): from preprocessing.main import test if target != '': test_input = read_file(target) flag = test(test_input, test_set=False, detail=detail) else: path = 'preprocessing/test_set/' tests = read_file(path+'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data(path+test_name, ['test_input','test_output']) print "TEST:", test_name flag = test(test_data) if flag == False: return False return True
def translator(target='', detail=0): from vivaldi_translator import test if target != '': test_input = read_file(target) test(test_input, test_set=False, detail=detail) else: path = 'test_set/' tests = read_file(path + 'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data(path + test_name, ['test_input', 'test_output', 'target']) print "TEST:", test_name flag = test(test_data, detail=detail) if flag == False: return False return True
def crawl_subcats(): dirpath = "data/site/%s/" % config.start_cat filepath = dirpath + "%s/subcats.txt" if os.path.exists(filepath): subcats = misc.read_file(filepath) else: subcats = get_subcats(config.start_cat) subcats = [subcat for subcat in subcats if lang.can_subcat(subcat)] misc.write_file(dirpath + "subcats.txt", subcats) return subcats
def get_test_vi2cu_translator(file_name): test = {} a = read_file(file_name) test = ast.literal_eval(a) if False: # print test for testing, read file for test_name in test: print test[test_name] exit() return test
def parse_main(target='', detail=0): from parse_main.main import test if target != '': test_input = read_file(target) flag = test(test_input, test_set=False, detail=detail) else: path = 'parse_main/test_set/' tests = read_file(path + 'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data(path + test_name, ['test_input', 'test_output']) print "TEST:", test_name flag = test(test_data, detail=detail) if flag == False: return False return True
def test_vi2cu_translator(target='', detail=0): from vi2cu_translator.main import test if target != '': test_input = read_file(target) flag = test(test_input, test_set=False, detail=detail) else: path = 'vi2cu_translator/test_set/' tests = read_file(path + 'test_list') test_list = get_test_list(tests) for test_name in test_list: test_data = get_test_data( path + test_name, ['test_input', 'test_output', 'dtype_dict', 'return_dtype']) print "TEST:", test_name flag = test(test_data) if flag == False: return False return True
def get_test(file_name): test = {} a = read_file(file_name) fin = a.find('test_input:') fout = a.find('test_output:') test['input'] = a[fin + len('test_input:'):fout] test['output'] = a[fout + len('test_output:'):] # remove first new line test['input'] = remove_first_endline(test['input']) test['output'] = remove_first_endline(test['output']) return test
def get_test(file_name): test = {} a = read_file(file_name) fin = a.find('test_input:') fout = a.find('test_output:') test['input'] = a[fin+len('test_input:'):fout] test['output'] = a[fout+len('test_output:'):] # remove first new line test['input'] = remove_first_endline(test['input']) test['output'] = remove_first_endline(test['output']) return test
def divide_line(): from functions.divide_line.divide_line import test path = 'functions/divide_line/test_set/' tests = read_file(path + 'test_list') test_list = tests.split('\n') for test_name in test_list: if test_name == '': continue test_data = get_test(path + test_name) print "TEST:", test_name test(test_data['input'], test_data['output']) return False
def divide_line(): from functions.divide_line.divide_line import test path = 'functions/divide_line/test_set/' tests = read_file(path+'test_list') test_list = tests.split('\n') for test_name in test_list: if test_name == '':continue test_data = get_test(path+test_name) print "TEST:", test_name test(test_data['input'], test_data['output']) return False
def code_to_line_list(): from functions.code_to_line_list.code_to_line_list import test path = 'functions/code_to_line_list/test_set/' tests = read_file(path + 'test_list') test_list = tests.split('\n') for test_name in test_list: if test_name == '': continue test_data = get_test(path + test_name) test_output = test_data['output'].split('\n') print "TEST:", test_name test(test_data['input'], test_output) return False
def code_to_line_list(): from functions.code_to_line_list.code_to_line_list import test path = 'functions/code_to_line_list/test_set/' tests = read_file(path+'test_list') test_list = tests.split('\n') for test_name in test_list: if test_name == '':continue test_data = get_test(path+test_name) test_output = test_data['output'].split('\n') print "TEST:", test_name test(test_data['input'], test_output) return False
def divide_line(target='',detail=0): from general.divide_line.divide_line import test path = 'general/divide_line/test_set/' tests = read_file(path+'test_list') test_list = get_test_list(tests) if target != '': test_list = [target] for test_name in test_list: test_data = get_test_data(path+test_name,['test_input','test_output']) print "TEST:", test_name flag = test(test_data, detail=detail) if flag == False: return False return True
def split_into_block_and_code(target=''): from functions.split_into_block_and_code.split_into_block_and_code import test path = 'functions/split_into_block_and_code/test_set/' tests = read_file(path+'test_list') test_list = tests.split('\n') if target != '': test_list = [target] for test_name in test_list: if len(test_name) > 0 and test_name[0] == '#':continue if test_name == '':continue test_data = get_test_data(path+test_name, ['test_input','test_output']) print "TEST:", test_name test(test_data) return True
def get_test_data(file_name, data_list=[]): # read file and make test data test_data = {} a = read_file(file_name) # make index list idx_list = [] for elem in data_list: idx = a.find(elem) idx_list.append(idx) idx_list.sort() # make dictionary m = len(idx_list) i = 0 while i < m: if i + 1 < m: idx = idx_list[i] colon = a.find(':', idx + 1) name = a[idx:colon] st = colon + 1 next = idx_list[i + 1] content = a[st:next] test_data[name] = content else: idx = idx_list[i] colon = a.find(':', idx + 1) name = a[idx:colon] st = colon + 1 content = a[st:] test_data[name] = content i += 1 # tab to space for name in test_data: test_data[name] = test_data[name].replace('\t', ' ') # remove space after line for name in test_data: test_data[name] = remove_space_after_line(test_data[name]) return test_data
def split_into_block_and_code(target=''): from functions.split_into_block_and_code.split_into_block_and_code import test path = 'functions/split_into_block_and_code/test_set/' tests = read_file(path + 'test_list') test_list = tests.split('\n') if target != '': test_list = [target] for test_name in test_list: if len(test_name) > 0 and test_name[0] == '#': continue if test_name == '': continue test_data = get_test_data(path + test_name, ['test_input', 'test_output']) print "TEST:", test_name test(test_data) return True
def divide_line(target='', detail=0): from general.divide_line.divide_line import test path = 'general/divide_line/test_set/' tests = read_file(path + 'test_list') test_list = get_test_list(tests) if target != '': test_list = [target] for test_name in test_list: test_data = get_test_data(path + test_name, ['test_input', 'test_output']) print "TEST:", test_name flag = test(test_data, detail=detail) if flag == False: return False return True
def get_test_data(file_name, data_list=[]): # read file and make test data test_data = {} a = read_file(file_name) # make index list idx_list = [] for elem in data_list: idx = a.find(elem) idx_list.append(idx) idx_list.sort() # make dictionary m = len(idx_list) i = 0 while i < m: if i+1 < m: idx = idx_list[i] colon = a.find(':', idx+1) name = a[idx:colon] st = colon+1 next = idx_list[i+1] content = a[st:next] test_data[name] = content else: idx = idx_list[i] colon = a.find(':', idx+1) name = a[idx:colon] st = colon+1 content = a[st:] test_data[name] = content i += 1 # tab to space for name in test_data: test_data[name] = test_data[name].replace('\t',' ') # remove space after line for name in test_data: test_data[name] = remove_space_after_line(test_data[name]) return test_data
def create_node_interstage(definitions: dict, node: BlogPost) -> None: # The interstage is the user markdown with the # node references mixed in. This interstage is # what will then be turned into html. markdown = read_file(node.markdown_path) for word, (regex, target_node_id) in definitions.items(): if word == node.name: continue # Turn word into syntax [word]({{ post: X }}). # We have to do some lambda magic to avoid silly overlapping issues. markdown = regex.sub( lambda match: rf"[{match.group(1)}]({{{{ post: {target_node_id} }}}})" if match.group(1) else match.group(0), markdown) write_file(node.interstage_path, markdown)
def spellcheck() -> None: en_checker = SpellChecker(language="en") de_checker = SpellChecker(language="de") for post in db.query(BlogPost): markdown = read_file(post.markdown_path) for i, line in enumerate(markdown.splitlines()): words = [re.sub(r"[^a-zA-Z ]", "", word) for word in line.replace("-", " ").split()] words = [word for word in words if word] unknown_en_words = en_checker.unknown(words) unknown_de_words = de_checker.unknown(words) unknown_words = [word for word in unknown_en_words if word in unknown_de_words] for unknown_word in unknown_words: print(f"In \"{post.name}\" (line {i + 1}): Unknown word \"{unknown_word}\".") done()
def crawl_pages(subcats): dirpath = "data/site/%s/" % config.start_cat pages = [] counter = 0 for subcat in subcats: counter += 1 pb.update(counter, len(subcats)) subcat_dirpath = dirpath + subcat + "/" misc.mkdir_p(subcat_dirpath) filepath = subcat_dirpath + "pages.txt" if os.path.exists(filepath): subcat_pages = misc.read_file(filepath) else: subcat_pages = get_subcat_pages(subcat) misc.write_file(filepath, subcat_pages) pages.extend(subcat_pages) pages = [page for page in pages if lang.can_page(page)] pages = OrderedDict.fromkeys(pages).keys() # unique return pages
) print(f"回测图像输出到{gdir}") # return res_pac, gdir return wrapper_df else: # return res_pac return wrapper_df if __name__ == "__main__": constant.reset_params() constant.check_dir() start_time = constant.BEGIN_DATE end_time = constant.END_DATE # 读入对象 codes = read_file(constant.CODE_FILE) # 创建全局回测对象 # global_backtest_obj = GlobalBacktest(start_time=start_time, # end_time=end_time) # global_backtest_obj.run_backtest(global_index="000300.SH", # func=runbacktest, # key_params=dict(begin=start_time, end=end_time, # dir=constant.GRAPH)) for code in codes: runbacktest( begin=start_time, end=end_time, dir=constant.GRAPH, codename=code, )
def convert_markdown_for_post(post: BlogPost) -> None: markdown_src = read_file(post.interstage_path) markdown_src = pre_process_markdown(markdown_src, post) html_src = markdown.markdown( markdown_src, extensions=["sane_lists", "md_in_html", "extra"]) write_file(post.html_path, html_src)