def test_permutation_decoder(): gutenberg = open_data("EN-text/gutenberg.txt").read() flatland = open_data("EN-text/flatland.txt").read() pd = PermutationDecoder(canonicalize(gutenberg)) assert pd.decode('aba') in ('ece', 'ete', 'tat', 'tit', 'txt') pd = PermutationDecoder(canonicalize(flatland)) assert pd.decode('aba') in ('ded', 'did', 'ece', 'ele', 'eme', 'ere', 'eve', 'eye', 'iti', 'mom', 'ses', 'tat', 'tit')
def test_permutation_decoder(): gutenberg = open_data("gutenberg.txt").read() flatland = open_data("EN-text/flatland.txt").read() pd = PermutationDecoder(canonicalize(gutenberg)) assert pd.decode('aba') in ('ece', 'ete', 'tat', 'tit', 'txt') pd = PermutationDecoder(canonicalize(flatland)) assert pd.decode('aba') in ('ded', 'did', 'ece', 'ele', 'eme', 'ere', 'eve', 'eye', 'iti', 'mom', 'ses', 'tat', 'tit')
def test_samples(): story = open_data("EN-text/flatland.txt").read() story += open_data("gutenberg.txt").read() wordseq = words(story) P1 = UnigramWordModel(wordseq) P2 = NgramWordModel(2, wordseq) P3 = NgramWordModel(3, wordseq) s1 = P1.samples(10) s2 = P3.samples(10) s3 = P3.samples(10) assert len(s1.split(' ')) == 10 assert len(s2.split(' ')) == 10 assert len(s3.split(' ')) == 10
def test_samples(): story = open_data("EN-text/flatland.txt").read() story += open_data("EN-text/gutenberg.txt").read() wordseq = words(story) P1 = UnigramWordModel(wordseq) P2 = NgramWordModel(2, wordseq) P3 = NgramWordModel(3, wordseq) s1 = P1.samples(10) s2 = P3.samples(10) s3 = P3.samples(10) assert len(s1.split(' ')) == 10 assert len(s2.split(' ')) == 10 assert len(s3.split(' ')) == 10
def upload(): if request.method == 'POST': training = request.files['training'] predict = request.files['predict'] if training and allowed_file( training.filename) and predict and allowed_file( predict.filename): filename = secure_filename(training.filename) training.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) predict_name = secure_filename(predict.filename) predict.save( os.path.join(app.config['UPLOAD_FOLDER'], predict_name)) useless_label = request.form['label'].strip().split(',') stock_id = request.form['id'] target = request.form['target'] if target not in useless_label: useless_label.append(target) if stock_id not in useless_label: useless_label.append(stock_id) train_data = utils.open_data( os.path.join(app.config['UPLOAD_FOLDER'], filename)) predict_data = utils.open_data( os.path.join(app.config['UPLOAD_FOLDER'], predict_name)) task_name = str(request.form['task'] + time.strftime("%m%d%Y%H%M")).strip() train_data_col = train_data.columns.tolist() predict_data_col = predict_data.columns.tolist() for i in useless_label: if ' ' not in i and len(i) > 0: try: train_data_col.remove(i) predict_data_col.remove(i) except ValueError: pass if not utils.check_colums(train_data_col, predict_data_col): print "error" new_training = trainning_state( taskName=task_name, state="still training", ) new_training.save() training = threading.Thread(target=process.main, args=(train_data, predict_data, target, train_data_col, task_name, stock_id)) training.start() return redirect("/") return ""
def __init__(self, examples=None, attrs=None, attrnames=None, target=-1, inputs=None, values=None, distance=mean_boolean_error, name='', source='', exclude=()): """Accepts any of DataSet's fields. Examples can also be a string or file from which to parse examples using parse_csv. Optional parameter: exclude, as documented in .setproblem(). >>> DataSet(examples='1, 2, 3') <DataSet(): 1 examples, 3 attributes> """ self.name = name self.source = source self.values = values self.distance = distance if values is None: self.got_values_flag = False else: self.got_values_flag = True # Initialize .examples from string or list or data directory if isinstance(examples, str): self.examples = parse_csv(examples) elif examples is None: self.examples = parse_csv(open_data(name + '.csv').read()) else: self.examples = examples # Attrs are the indices of examples, unless otherwise stated. if attrs is None and self.examples is not None: attrs = list(range(len(self.examples[0]))) self.attrs = attrs # Initialize .attrnames from string, list, or by default if isinstance(attrnames, str): self.attrnames = attrnames.split() else: self.attrnames = attrnames or attrs self.setproblem(target, inputs=inputs, exclude=exclude)
def test_viterbi_segmentation(): flatland = open_data("EN-text/flatland.txt").read() wordseq = words(flatland) P = UnigramWordModel(wordseq) text = "itiseasytoreadwordswithoutspaces" s, p = viterbi_segment(text, P) assert s == [ 'it', 'is', 'easy', 'to', 'read', 'words', 'without', 'spaces']
def test_text_models(): flatland = open_data("EN-text/flatland.txt").read() wordseq = words(flatland) P1 = UnigramWordModel(wordseq) P2 = NgramWordModel(2, wordseq) P3 = NgramWordModel(3, wordseq) # Test top assert P1.top(5) == [(2081, 'the'), (1479, 'of'), (1021, 'and'), (1008, 'to'), (850, 'a')] assert P2.top(5) == [(368, ('of', 'the')), (152, ('to', 'the')), (152, ('in', 'the')), (86, ('of', 'a')), (80, ('it', 'is'))] assert P3.top(5) == [(30, ('a', 'straight', 'line')), (19, ('of', 'three', 'dimensions')), (16, ('the', 'sense', 'of')), (13, ('by', 'the', 'sense')), (13, ('as', 'well', 'as'))] # Test isclose assert isclose(P1['the'], 0.0611, rel_tol=0.001) assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01) assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001) # Test cond_prob.get assert P2.cond_prob.get(('went',)) is None assert P3.cond_prob['in', 'order'].dictionary == {'to': 6} # Test dictionary test_string = 'unigram' wordseq = words(test_string) P1 = UnigramWordModel(wordseq) assert P1.dictionary == {('unigram'): 1} test_string = 'bigram text' wordseq = words(test_string) P2 = NgramWordModel(2, wordseq) assert P2.dictionary == {('bigram', 'text'): 1} test_string = 'test trigram text here' wordseq = words(test_string) P3 = NgramWordModel(3, wordseq) assert ('test', 'trigram', 'text') in P3.dictionary assert ('trigram', 'text', 'here') in P3.dictionary
def test_shift_decoding(): flatland = open_data("EN-text/flatland.txt").read() ring = ShiftDecoder(flatland) msg = ring.decode('Kyzj zj r jvtivk dvjjrxv.') assert msg == 'This is a secret message.'
def test_text_models(): flatland = open_data("EN-text/flatland.txt").read() wordseq = words(flatland) P1 = UnigramTextModel(wordseq) P2 = NgramTextModel(2, wordseq) P3 = NgramTextModel(3, wordseq) # The most frequent entries in each model assert P1.top(10) == [(2081, 'the'), (1479, 'of'), (1021, 'and'), (1008, 'to'), (850, 'a'), (722, 'i'), (640, 'in'), (478, 'that'), (399, 'is'), (348, 'you')] assert P2.top(10) == [(368, ('of', 'the')), (152, ('to', 'the')), (152, ('in', 'the')), (86, ('of', 'a')), (80, ('it', 'is')), (71, ('by', 'the')), (68, ('for', 'the')), (68, ('and', 'the')), (62, ('on', 'the')), (60, ('to', 'be'))] assert P3.top(10) == [(30, ('a', 'straight', 'line')), (19, ('of', 'three', 'dimensions')), (16, ('the', 'sense', 'of')), (13, ('by', 'the', 'sense')), (13, ('as', 'well', 'as')), (12, ('of', 'the', 'circles')), (12, ('of', 'sight', 'recognition')), (11, ('the', 'number', 'of')), (11, ('that', 'i', 'had')), (11, ('so', 'as', 'to'))] assert isclose(P1['the'], 0.0611, rel_tol=0.001) assert isclose(P2['of', 'the'], 0.0108, rel_tol=0.01) assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001) assert isclose(P3['', '', 'but'], 0.0, rel_tol=0.001) assert isclose(P3['so', 'as', 'to'], 0.000323, rel_tol=0.001) assert P2.cond_prob.get(('went', )) is None assert P3.cond_prob['in', 'order'].dictionary == {'to': 6} test_string = 'unigram' wordseq = words(test_string) P1 = UnigramTextModel(wordseq) assert P1.dictionary == {('unigram'): 1} test_string = 'bigram text' wordseq = words(test_string) P2 = NgramTextModel(2, wordseq) assert (P2.dictionary == { ('', 'bigram'): 1, ('bigram', 'text'): 1 } or P2.dictionary == { ('bigram', 'text'): 1, ('', 'bigram'): 1 }) test_string = 'test trigram text' wordseq = words(test_string) P3 = NgramTextModel(3, wordseq) assert ('', '', 'test') in P3.dictionary assert ('', 'test', 'trigram') in P3.dictionary assert ('test', 'trigram', 'text') in P3.dictionary assert len(P3.dictionary) == 3
def test_rot13_decoding(): flatland = open_data("EN-text/flatland.txt").read() ring = ShiftDecoder(flatland) msg = ring.decode(rot13('Hello, world!')) assert msg == 'Hello, world!'
def build(): # STEP: clone FruityMod if not os.path.exists(mod_dir): print("Downloading {}".format("FruityMod")) fruity_url = r"https://github.com/gskleres/FruityMod-StS/archive/v0.6.2b.zip" utils.mkdir("cache") download_file = tempfile.NamedTemporaryFile(suffix=".zip", dir="cache", delete=False).name with urllib.request.urlopen(fruity_url) as response, open( download_file, "wb") as out_file: shutil.copyfileobj(response, out_file) utils.unzip(download_file, mod_dir, shift=1, remove=True) # STEP: fetch libs mod_jar = os.path.join(spire_dir, "ModTheSpire.jar") if not os.path.exists(mod_jar): print("Downloading ModTheSpire") download_file = tempfile.NamedTemporaryFile(suffix=".zip", dir="..", delete=False).name urllib.request.urlretrieve( "https://github.com/kiooeht/ModTheSpire/releases/download/v2.6.0/ModTheSpire.zip", download_file) with zipfile.ZipFile(download_file, "r") as archive, open(mod_jar, "wb") as file: jar_data = archive.read("ModTheSpire.jar") file.write(jar_data) os.remove(download_file) base_jar = os.path.join(spire_dir, "mods", "BaseMod.jar") if not os.path.exists(base_jar): print("Downloading BaseMod") urllib.request.urlretrieve( "https://github.com/daviscook477/BaseMod/releases/download/v2.9.1/BaseMod.jar", base_jar) from spire import name_id import textwrap import io import json print("Generating data") image_dir = os.path.join("assets", "images") if os.path.exists(os.path.join("cache", "DEBUG")): image_dir = os.path.join("todo", "images") # STEP: generate cards from engi_mod import cards with open(os.path.join("templates", "card.java"), encoding="utf-8") as file: card_template = file.read() for card in cards: with open(os.path.join(mod_dir, *r"src\main\java\fruitymod\cards".split("\\"), name_id(card["name"]) + ".java"), "w", encoding="utf-8") as file: file.write(format(card_template, card)) # STEP: patch code templates_cache = os.path.join("cache", "templates") if not os.path.exists(templates_cache): utils.mkdir(templates_cache) shutil.copy( os.path.join( mod_dir, *r"src\main\java\fruitymod\FruityMod.java".split("\\")), os.path.join(templates_cache, "FruiyMod.java")) shutil.copy( os.path.join( mod_dir, *r"src\main\java\fruitymod\characters\TheSeeker.java".split( "\\")), os.path.join(templates_cache, "TheSeeker.java")) shutil.copy( os.path.join( mod_dir, *r"src\main\resources\localization\FruityMod-CardStrings.json". split("\\")), os.path.join(templates_cache, "FruityMod-CardStrings.json")) image_code = io.StringIO() add_code = io.StringIO() unlock_code = io.StringIO() for card in cards: id = name_id(card["name"], upper=True).lower() image_file = os.path.join(image_dir, id + ".png") image_file = "cards/{}.png".format( id if os.path.exists(image_file) else "runic_binding") image_code.write( format( 'public static final String {{ name_id(card["name"], upper=True) }} = "{{ image_file }}";' ) + "\n") if card["rarity"] != "special": add_code.write( format('BaseMod.addCard(new {{ name_id(card["name"]) }}());') + "\n") unlock_code.write( format('UnlockTracker.unlockCard("{{ card["name"] }}");') + "\n") with open(os.path.join(templates_cache, "FruiyMod.java"), encoding="utf-8") as file: fruity_lines = [line for line in file] for i, line in enumerate(fruity_lines): if "public static final String PHASE_COIL" in line: fruity_lines.insert( i + 1, "\n" + textwrap.indent(image_code.getvalue(), " " * 4)) break for i, line in enumerate(fruity_lines): if "BaseMod.addCard(new Nexus())" in line: fruity_lines.insert( i + 1, "\n" + textwrap.indent(add_code.getvalue(), " " * 4 * 2)) fruity_lines.insert( i + 2, "\n" + textwrap.indent(unlock_code.getvalue(), " " * 4 * 2)) break with open(os.path.join( mod_dir, *r"src\main\java\fruitymod\FruityMod.java".split("\\")), "w", encoding="utf-8") as file: file.write("".join(fruity_lines)) with open(os.path.join(templates_cache, "TheSeeker.java"), encoding="utf-8") as file: seeker_lines = [line for line in file] # STEP: starting relic from engi_mod import relic for i, line in enumerate(seeker_lines): if "Arcanosphere" in line: del seeker_lines[i:i + 2] seeker_lines.insert( i, "\n{}\n\n".format( textwrap.indent( textwrap.dedent( format(""" retVal.add("{{ relic }}"); UnlockTracker.markRelicAsSeen("{{ relic }}"); """)).strip(), " " * 4 * 2))) break # STEP: starting deck from engi_mod import deck if not deck: deck = [card["name"] for card in cards if card["rarity"] != "special"] for i, line in enumerate(seeker_lines): if "Strike_P" in line: for j, line in enumerate(seeker_lines): if "AstralHaze" in line: break del seeker_lines[i:j + 1] seeker_lines.insert( i, "\n{}\n\n".format( textwrap.indent( "\n".join('retVal.add("{}");'.format(card) for card in deck), " " * 4 * 2))) break with open(os.path.join( mod_dir, *r"src\main\java\fruitymod\characters\TheSeeker.java".split("\\")), "w", encoding="utf-8") as file: file.write("".join(seeker_lines)) card_strings = json.load( open(os.path.join(templates_cache, "FruityMod-CardStrings.json"), encoding="utf-8")) for card in cards: data = { "NAME": card["name"], "DESCRIPTION": card["desc"], } desc = card.get("upgrade_desc") if desc: data["UPGRADE_DESCRIPTION"] = desc card_strings[card["name"]] = data json.dump( card_strings, open(os.path.join( mod_dir, *r"src\main\resources\localization\FruityMod-CardStrings.json". split("\\")), "w", encoding="utf-8"), sort_keys=True, indent=4) # STEP: generate powers from engi_mod import powers with open(os.path.join("templates", "power.java"), encoding="utf-8") as file: power_template = file.read() for power in powers: with open(os.path.join(mod_dir, *r"src\main\java\fruitymod\powers".split("\\"), power["id"] + ".java"), "w", encoding="utf-8") as file: file.write(format(power_template, power)) # STEP: generate actions from engi_mod import actions with open(os.path.join("templates", "action.java"), encoding="utf-8") as file: action_template = file.read() for action in actions: with open(os.path.join( mod_dir, *r"src\main\java\fruitymod\actions\unique".split("\\"), action["id"] + ".java"), "w", encoding="utf-8") as file: file.write(format(action_template, action)) # STEP: generate java files from engi_mod import javas with open(os.path.join("templates", "java.java"), encoding="utf-8") as file: java_template = file.read() for java in javas: with open(os.path.join(mod_dir, *r"src\main\java".split("\\"), *java["package"], java["name"] + ".java"), "w", encoding="utf-8") as file: file.write(format(java_template, java)) # STEP: card images print("Generating images") import numpy as np portrait_masks = {} for type in "attack skill power".split(): image = utils.open_data( os.path.join("templates", "1024Portraits_{}_mask.png".format(type))) image = image / 255 image = np.repeat(image[:, :, :1], 4, axis=-1) portrait_masks[type] = image for card in cards: id = name_id(card["name"], upper=True).lower() image_file = os.path.join(image_dir, id + ".png") target_p_file = os.path.join( mod_dir, *r"src\main\resources\img\cards".split("\\"), id + "_p" + ".png") target_file = os.path.join( mod_dir, *r"src\main\resources\img\cards".split("\\"), id + ".png") if os.path.exists(target_p_file): continue if os.path.exists(image_file): image = utils.open_data(image_file) from skimage.transform import resize target = 500, 380 r = image.shape[0] / image.shape[1] if r >= target[0] / target[1]: size = np.ceil(target[1] * r).astype("int"), target[1] x = np.round((size[0] - target[0]) / 2).astype("int") image = resize(image, size, mode="edge")[x:x + target[0]] else: size = target[0], np.ceil(target[0] / r).astype("int") image = resize(image, size, mode="edge")[:, :target[1]] image *= portrait_masks[card["type"]] from PIL import Image img = Image.fromarray( np.round(image * 255).astype("uint8").transpose((1, 0, 2))) img.save(target_p_file) target = 250, 190 image = resize(image, target, mode="edge") img = Image.fromarray( np.round(image * 255).astype("uint8").transpose((1, 0, 2))) img.save(target_file) # STEP: card borders utils.sync( os.path.join("assets", "512"), os.path.join(mod_dir, *r"src\main\resources\img\512".split("\\"))) utils.sync( os.path.join("assets", "1024"), os.path.join(mod_dir, *r"src\main\resources\img\1024".split("\\"))) # STEP: keywords from engi_mod import keywords keyword_code = io.StringIO() for name, keyword in keywords.items(): words = ", ".join('"{}"'.format(word) for word in [name.lower()] + keyword["words"]) keyword_code.write( format( 'BaseMod.addKeyword(new String[] {"{{ name }}", {{ words }}}, "{{ keyword["desc"] }}");' ) + "\n") with open(os.path.join( mod_dir, *r"src\main\java\fruitymod\FruityMod.java".split("\\")), encoding="utf-8") as file: fruity_lines = [line for line in file] for i, line in enumerate(fruity_lines): if '{"intangible", "Intangible"}, "All damage and HP loss you suffer is reduced to 1."' in line: fruity_lines.insert( i + 1, "\n" + textwrap.indent(keyword_code.getvalue(), " " * 4 * 2)) break with open(os.path.join( mod_dir, *r"src\main\java\fruitymod\FruityMod.java".split("\\")), "w", encoding="utf-8") as file: file.write("".join(fruity_lines)) # STEP: mod info old_info = os.path.join( mod_dir, *r"src\main\resources\ModTheSpire.config".split("\\")) if os.path.exists(old_info): os.remove(old_info) from engi_mod import info json.dump(info, open(os.path.join( mod_dir, *r"src\main\resources\ModTheSpire.json".split("\\")), "w", encoding="utf-8"), indent=4) # STEP: maven project pom_template = os.path.join(templates_cache, "pom.xml") if not os.path.exists(pom_template): shutil.copy(os.path.join(mod_dir, "pom.xml"), pom_template) with open(pom_template, encoding="utf-8") as file: pom = file.read() pom = pom.replace( "${basedir}/../lib/ModTheSpire.jar", "/".join(spire_dir.split(os.path.sep) + ["ModTheSpire.jar"])) pom = pom.replace( "${basedir}/../lib/BaseMod.jar", "/".join(spire_dir.split(os.path.sep) + ["mods", "BaseMod.jar"])) pom = pom.replace( "${basedir}/../lib/desktop-1.0.jar", "/".join(spire_dir.split(os.path.sep) + ["desktop-1.0.jar"])) jar_file = os.path.join(spire_dir, "mods", "EngiMod.jar") pom = pom.replace("../_ModTheSpire/mods/FruityMod.jar", "/".join(jar_file.split(os.path.sep))) with open(os.path.join(mod_dir, "pom.xml"), "w", encoding="utf-8") as file: file.write(pom) # STEP: compile if os.path.exists(jar_file): os.remove(jar_file) with utils.cd(mod_dir): os.system("mvn package") if not os.path.exists(jar_file): print("Compilation failed") return # STEP: test with utils.cd(spire_dir): os.system("ModTheSpire.jar")
def test_parse_csv(): Iris = open_data('iris.csv').read() assert parse_csv(Iris)[0] == [5.1, 3.5, 1.4, 0.2, 'setosa']
def __init__(self, board=None): if BoggleFinder.wordlist is None: BoggleFinder.wordlist = Wordlist(open_data("EN-text/wordlist.txt")) self.found = {} if board: self.set_board(board)
with open(path, 'w', encoding='utf-8') as f: for i in range(len(x)): content = x[i] + '\t' + y[i] + '\n' f.write(content) if __name__ == '__main__': # # “每个类别一个文件” 的 版本 # questions, classes = open_data('data_orig/train_data.txt') # for i in range(len(questions)): # c = classes[i] # with open('data/'+c+'.txt', 'a', encoding='utf-8') as f: # f.write(questions[i] + '\n') # 加载原始数据 x_train, c_train = open_data('data_orig/train_data.txt') x_dev, c_dev = open_data('data_orig/dev_data.txt') x_test, c_test = open_data('data_orig/test_data.txt') # 识别所有类,生成类别列表和字典,并保存类别列表 if not os.path.exists('data'): os.mkdir('data') class_list = list(set(c_train)) with open('data/class.txt', 'w', encoding='utf-8') as f: f.writelines(content + '\n' for content in class_list) class_dict = {} for i, item in enumerate(class_list): class_dict[item] = str(i) # 类别转换为标签, 打乱顺序, 保存 x_train, y_train = pro_data(x_train, c_train, 'train')