def test_csv(): deck = getEmptyDeck() file = unicode(os.path.join(testDir, "support/text-2fields.txt")) i = TextImporter(deck, file) i.initMapping() i.run() # four problems - too many & too few fields, a missing front, and a # duplicate entry assert len(i.log) == 5 assert i.total == 5 # if we run the import again, it should update instead i.run() assert len(i.log) == 5 assert i.total == 5 # but importing should not clobber tags if they're unmapped n = deck.getNote(deck.db.scalar("select id from notes")) n.addTag("test") n.flush() i.run() n.load() assert n.tags == ['test'] # if add-only mode, count will be 0 i.importMode = 1 i.run() assert i.total == 0 # and if dupes mode, will reimport everything assert deck.cardCount() == 5 i.importMode = 2 i.run() # includes repeated field assert i.total == 6 assert deck.cardCount() == 11 deck.close()
def test_csv(): deck = getEmptyDeck() file = unicode(os.path.join(testDir, "support/text-2fields.txt")) i = TextImporter(deck, file) i.initMapping() i.run() # four problems - too many & too few fields, a missing front, and a # duplicate entry assert len(i.log) == 5 assert i.total == 5 # if we run the import again, it should update instead i.run() assert len(i.log) == 10 assert i.total == 5 # but importing should not clobber tags if they're unmapped n = deck.getNote(deck.db.scalar("select id from notes")) n.addTag("test") n.flush() i.run() n.load() assert n.tags == ['test'] # if add-only mode, count will be 0 i.importMode = 1 i.run() assert i.total == 0 # and if dupes mode, will reimport everything assert deck.cardCount() == 5 i.importMode = 2 i.run() # includes repeated field assert i.total == 6 assert deck.cardCount() == 11 deck.close()
def vocabulous(): words_list, words_dict = get_entries() wordswritten = write_definitions("vocabulous.csv", words_list, words_dict) file = "vocabulous.csv" # select deck did = mw.col.decks.id("Default") mw.col.decks.select(did) # set note type for deck m = mw.col.models.byName("Basic") deck = mw.col.decks.get(did) deck['mid'] = m['id'] mw.col.decks.save(deck) # import into the collection ti = TextImporter(mw.col, file) ti.allowHTML = True ti.importMode = 1 ti.initMapping() ti.run() # get the number of cards in the current collection, which is stored in # the main window cardCount = mw.col.cardCount() # show a message box showInfo("Added %d new words. Now %d words in total." % (wordswritten, cardCount))
def ImportToAnki(model_name, import_to_deck, *args, **kwargs): # get file file = kwargs.get("file", None) if not file: file = getFile(mw, _("Import"), None, key="import", filter=Importers[0][0]) if not file: return file = str(file) # check default model try: model = mw.col.models.byName(model_name) if not model: raise Exception("没有找到【{}】".format(model_name)) except: importFile(mw, settings.deck_template_file) try: model = mw.col.models.byName(model_name) except: model = None importer = TextImporter(mw.col, file) importer.delimiter = "\t" importer.importMode = 0 importer.allowHTML = True importer.model = model did = mw.col.decks.id(import_to_deck) mw.col.conf['curDeck'] = did importer.model['did'] = did mw.col.decks.select(did) importer.mapping = [kwargs.get("first")] importer.run() mw.reset() txt = _("Importing complete.") + "\n" if importer.log: txt += "\n".join(importer.log) showText(txt)
def import_data(): mw.progress.start(immediate=True) mw.checkpoint(_("Importing...")) txt = '' url = GOOGLE_SHEETS_URL response = urllib2.urlopen(url) data = csv.reader(response) for entry in data: deck_name, doc_title, import_flag, doc_url = entry # Exclude header if (deck_name == 'Deck Name') or (import_flag == 'FALSE'): continue # Update progress mw.checkpoint(_("Importing " + str(deck_name))) request = urllib2.urlopen(doc_url) soup = BeautifulSoup(request) # remove scripts for script in soup.findAll('script'): script.extract() # showText(unicode(soup)) # inline_unicode_html = pynliner.fromString(str(soup)) inline_html = ClaSS2Style(unicode(soup)).transform() # #replace undesirable style that hides bullet points # undesirable = "list-style-type:none" # cleaned_inline_html = string.replace(inline_unicode_html, undesirable , "") # clean html stling inline_soup = BeautifulSoup(inline_html) # showText(unicode(inline_soup)) output = parse_html(inline_soup) if output.find("\t") == -1: # runs if there are no card rows detected for importing continue # Write media to collection, write output to temp file output = write_img_to_media_col(output, doc_url) temp_html_path = write_output_to_html_file(output) # select deck by name deck_id = mw.col.decks.id(deck_name) mw.col.decks.select(deck_id) # set model id model = mw.col.models.byName("Basic") deck = mw.col.decks.get(deck_id) deck['mid'] = model['id'] mw.col.decks.save(deck) # import into the collection ti = TextImporter(mw.col, temp_html_path) ti.delimiter = '\t' ti.allowHTML = True ti.importMode = 2 mw.pm.profile['importMode'] = 2 # #check if deck model and TextImporter model matches if deck_id != ti.model['did']: ti.model['did'] = deck_id mw.col.models.save(ti.model) # run text importer ti.initMapping() ti.run() txt += "Import Complete for " + deck_name + ".\n" if ti.log: # manipulate log to show only necessary fields for i in ti.log: if i.find("added") != -1: txt += i + "\n\n" # txt += "".join(ti.log) + "\n" # Cleanup os.remove(temp_html_path) temp_dir = os.path.dirname(temp_html_path) os.rmdir(temp_dir) mw.col.save() mw.reset() del_log = '' del_log += updated_oldest_card_and_remove_new_duplicates() del_log += delete_empty_cards() txt += del_log mw.progress.finish() showText(txt) mw.reset()
def do_import_notes(bcol, deck_name, data_file, note_type, delimiter="\t", import_mode=0): col = bcol.col existingNotes = {} #load existing notes bdeck = None for dummy, bdk in sorted(bcol.bdecks.items()): if deck_name == bdk.name: bdeck = bdk assert bdeck notes = bdeck.queryNotes() for n in notes: note_id, note_subject, note_content, note_tags = n note_content = note_content.split("\x1f")[1] note_tags = note_tags.strip() existingNotes[note_subject] = [ note_id, note_subject, note_content, note_tags ] nochangeNotes = {} toBeImportNotes = {} toBeUpdatedNotes = {} #load data fiel fp = open(data_file, "r") for line in fp.readlines(): line = line.strip() parts = line.split("\t") subject = parts[0] content = parts[1] if len(content) > int(131072 * 0.8): #131072 is limit of ANKI field logging.error("Content too long to import: %d, note: %s", len(content), subject) sys.exit(1) if len(parts) == 3: tags = parts[2] tags = tags.strip() else: tags = "" if subject in existingNotes: #compare content and tags exist_note = existingNotes[subject] if content == exist_note[2] and tags == exist_note[3]: #doesn't need to be updated nochangeNotes[subject] = True pass else: logging.info("Updated note: %s", subject) toBeUpdatedNotes[subject] = [ subject, content, tags, exist_note[0] ] else: logging.info("New note: %s", subject) toBeImportNotes[subject] = [subject, content, tags] fp.close() logging.info("%d notes wll be kept without any change", len(nochangeNotes)) logging.info("%d notes need to be updated.", len(toBeUpdatedNotes)) logging.info("%d notes need to be added.", len(toBeImportNotes)) if not toBeUpdatedNotes and not toBeImportNotes: col.close() logging.info("No new note need to be imported! Bye!") sys.exit(1) new_data_file = filter_import_data_file(data_file, toBeImportNotes, toBeUpdatedNotes) assert new_data_file #set current model logging.info("setting current deck name: %s", deck_name) deck_id = col.decks.id(deck_name) logging.info("setting current deck id: %s", deck_id) logging.info("setting note_type : %s", note_type) model = col.models.byName(note_type) #select deck col.decks.select(deck_id) #update deck deck = col.decks.get(deck_id) deck['mid'] = model['id'] col.decks.save(deck) #update model model['did'] = deck_id col.models.save(model) col.models.setCurrent(model) logging.info("directly import: %s", new_data_file) ti = TextImporter(col, new_data_file) ti.allowHTML = True ti.needDelimiter = True ti.delimiter = "\t" ti.importMode = import_mode #0, UPDATE_MODE; 1, IGNORE_MODE; 2, ADD_MODE ti.initMapping() ti.run() col.save() col.close() logging.info("Total %d imported,%d updated successfully." % (len(toBeImportNotes), len(toBeUpdatedNotes))) return