def load_all(dataset_dir, doc_ids=None, filter_types=None, filter_senses=None): """Load whole CoNLL16st dataset by document id.""" # load all provided files untouched parses = load_parses(dataset_dir, doc_ids=doc_ids) doc_ids = sorted(parses.keys()) raws = load_raws(dataset_dir, doc_ids=doc_ids) relations_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=True, filter_types=filter_types, filter_senses=filter_senses) if relations_gold: relationsnos_gold = relations_gold else: relationsnos_gold = load_relations_gold(dataset_dir, doc_ids=doc_ids, with_senses=False, filter_types=filter_types, filter_senses=filter_senses) # extract data by document id and token id words = get_words(parses) pos_tags = get_pos_tags(parses) word_metas = get_word_metas(parses, raws) # extract data by document id and token id pairs dependencies = get_dependencies(parses) # extract data by document id parsetrees = get_parsetrees(parses) # extract data by relation id rel_parts = get_rel_parts(relationsnos_gold) rel_ids = sorted(rel_parts.keys()) rel_types = get_rel_types(relations_gold) rel_senses = get_rel_senses(relations_gold) # add extra fields add_relation_tags(word_metas, rel_types, rel_senses) return doc_ids, words, word_metas, pos_tags, dependencies, parsetrees, rel_ids, rel_parts, rel_types, rel_senses, relations_gold
def extract_svm_features(vidx, data): x = scipy.sparse.lil_matrix((len(data), len(vidx))) for i in range(len(data.BodyMarkdown)): for w in get_words(data.BodyMarkdown[i]): if w in vidx: x[i, vidx[w]] = 1.0 return scipy.sparse.csr_matrix(x)
def extract_words_from_row(row): """ This function turns a given row of data for this particular type of problem into a simple vector of words. """ title = row["Title"] body = row["BodyMarkdown"] tags = " ".join(filter(istag, [row["Tag%d" % t] for t in range(1, 6)])) postid = row["PostId"] text = " ".join([title, body, tags]) return get_words(text)
def get_svm_word_indices(vidx, text): x = np.zeros(len(vidx), dtype=numpy.double) for w in get_words(text): if w in vidx: x[vidx[w]] = 1.0 return scipy.sparse.csr_matrix(x)
#filename = 'scan\\Jablanica0096_crop.tif' #filename = 'scan\\paragraph_635.tif' #filename = 'scan\\problem_italic.tif' #filename = 'scan\\paragraph_265.tif' #filename = 'scan\\test1.tif' #filename = 'scan\\Dubrovnik0018_crop.tif' #filename = 'scan\\Jablanica0096_crop.tif' #filename = 'scan\\JuznaSrbija0052_crop.tif' #filename = 'scan\\Piva0015_crop.tif' #filename = 'scan\\Proscenje0064_crop.tif' #filename = 'scan\\Uskoci0080_crop.tif' #filename = 'scan\\Vasojevici0033_crop.tif' #filename = 'scan\\line_165.tif' #filename = 'scan\\line_177.tif' filename = 'scan\\Vasojevici0033_crop.tif' cl.clean_all() wp.get_words(filename) el.extract_letters(filename) rl.replace_diacritics(filename) cmp.compare(filename, 'initial') script_end = datetime.datetime.now() print(script_end - script_start)
import os import words clear = lambda: os.system("cls" if os.name == "nt" else "clear") secret_word = list(words.get_words()) hangman_word = ["___" for letter in secret_word] wrong_guesses = [] num_guesses = 10 while num_guesses > 0 and hangman_word != secret_word: print(" ".join(hangman_word)) print(f"Du har {num_guesses} forsøk igjen.") print("\nDu har brukt: " + " ".join(wrong_guesses)) guessed_letter = input("Guess a letter: ").lower() if (guessed_letter in hangman_word) or (guessed_letter in wrong_guesses): continue for index, letter in enumerate(secret_word): if guessed_letter == letter: hangman_word[index] = guessed_letter if guessed_letter not in secret_word: wrong_guesses.append(guessed_letter) num_guesses -= 1 clear() if hangman_word == secret_word:
#See more at https://www.geeksforgeeks.org/python-program-to-print-emojis/ import words print(words.get_words()) # grinning face print("\U0001f600") # grimacing face print("\N{grimacing face}") print("😢") # grinning squinting face print("\U0001F606") # rolling on the floor laughing print("\U0001F923") \ #face with rolling eyes print("\N{face with rolling eyes}") print(u"\U0001F97A")