def foreignCards(self): lines = '' with open("test.txt", 'r') as f: for line in f: lines += line exams = [x.strip() for x in re.split(r'^Exam [A-Z]$', lines, flags=re.MULTILINE)[1:]] exams = [re.split(r'^QUESTION [0-9][0-9]?[0-9]?', x, flags=re.MULTILINE)[1:] for x in exams] exams = [[q.strip() for q in x] for x in exams] #orda = ord('A') hashes = [] exams = [[self.format_question(q) for q in e] for e in exams] n = '\n' br = '<br />' cards = [] for e in exams: for q in e: the_hash = hashlib.sha1(q['question']).digest() if the_hash in hashes: continue hashes.append(the_hash) front = q['question'] + n*3 + 'Answers:\n\n' + (n*2).join([chr(x + self.orda) + '. ' + q['answers'][x] for x in range(len(q['answers']))]) back = (n*2).join([chr(a + self.orda) + '. ' + q['answers'][a] for a in q['answer']]) + n*3 + q['explanation'] card = ForeignCard() card.fields = [unicode(front.replace(n, br), "utf-8"), unicode(back.replace(n, br), "utf-8")] cards.append(card) return cards
def foreignCards(self): lines = '' with open("test.txt", 'r') as f: for line in f: lines += line exams = [ x.strip() for x in re.split(r'^Exam [A-Z]$', lines, flags=re.MULTILINE)[1:] ] exams = [ re.split(r'^QUESTION [0-9][0-9]?[0-9]?', x, flags=re.MULTILINE)[1:] for x in exams ] exams = [[q.strip() for q in x] for x in exams] #orda = ord('A') hashes = [] exams = [[self.format_question(q) for q in e] for e in exams] n = '\n' br = '<br />' cards = [] for e in exams: for q in e: the_hash = hashlib.sha1(q['question']).digest() if the_hash in hashes: continue hashes.append(the_hash) front = q['question'] + n * 3 + 'Answers:\n\n' + (n * 2).join([ chr(x + self.orda) + '. ' + q['answers'][x] for x in range(len(q['answers'])) ]) back = (n * 2).join([ chr(a + self.orda) + '. ' + q['answers'][a] for a in q['answer'] ]) + n * 3 + q['explanation'] card = ForeignCard() card.fields = [ unicode(front.replace(n, br), "utf-8"), unicode(back.replace(n, br), "utf-8") ] cards.append(card) return cards
def foreignCards(self): "Return a list of foreign cards for importing." if self.iHaveSeenTheEnd: return None self.cards = [] curCard = ForeignCard() while True: logger.debug("here we go (again); state is " + self.curState) match = re.search("_reinterpret$", self.curState) if match: logger.debug("it's a reinterpret, so, pre-sub: " + self.curState) self.curState = re.sub("_reinterpret", "", self.curState) logger.debug("post-sub: " + self.curState) else: line = self.fh.readline() logger.debug("pristine line: '''" + line + "'''") if line == "": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError(type="systemError", info="file terminated unexpectedly in state " + self.curState) break line = re.sub("#.*$", "", line) logger.debug("line with comments removed: '''" + line + "'''") line = line.rstrip() logger.debug("stripped line: '''" + line + "'''") if line == "__END__": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError(type="systemError", info="file terminated unexpectedly in state " + self.curState) # return whatever we've gathered so far, but flag so that we don't re-enter the state machine next time we get called self.iHaveSeenTheEnd = True break logger.debug("state is " + self.curState) if self.curState == 'collecting_globals': if re.search("^\s*$",line): continue match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: self.curState = 'between_facts_reinterpret' logger.debug("found an m: line; reinterpreting in state " + self.curState) continue raise ImportFormatError(type="systemError", info="expected m: or st: but got " + line) if self.curState == 'between_facts': logger.debug("between facts handler") match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: logger.debug("found an m: line") self.curModel = None for m in self.deck.models: logger.debug("looking for model named '" + match.group('modelname') + "' ok") if m.name == match.group('modelname'): self.curModel = m logger.debug("chose model " + self.curModel.name + " with " + str(self.numFields) + " fields, ok") break if not self.curModel: raise ImportFormatError(type="systemError", info="model " + match.group('modelname') + " doesn't exist; choose from " + self.deck.models.fieldModels) continue if line != "": # special case: go back around and reinterpret same line in different context logger.debug("non-empty, reinterpreting...") self.curState = 'in_fact_reinterpret' continue if self.curState == 'in_fact': logger.debug("in_fact handler") # Sub-state - are we in a multiline field? logger.debug("are we in multiline?") if self.endOfMultilineFieldMarker != "": logger.debug("we are in multiline") match = re.search(re.escape(self.endOfMultilineFieldMarker) + "$", line) if match: logger.debug("ending multiline") self.endOfMultilineFieldMarker = "" else: logger.debug("still collecting in multiline") curCard.fields[-1] += line + self.multilineEOL continue logger.debug("not in multiline; perhaps terminating fact?") if line == ".": # Fact terminator logger.debug("we are terminating fact") # Not sure why but ForeignCard takes *both* the contents of .tags # *and* contents of a special member of .fields and concatenates # those to get overall tags. If you don't give the special field # it gets angry. So, provide an empty one. Default mapping will # expect to find this in the last field. curCard.tags += self.sharedTags curCard.fields = [ unicode(nonUTF) for nonUTF in curCard.fields ] # Pad out any remaining fields with empty strings. # Perhaps at a later point we'll support syntax for specifying a field by "^<s>: " # where s is some unambiguous prefix of the field name, at which point we'll need # to specify our own mappings. logger.debug("purely simply collected fields: " + str(curCard.fields)) have = len(curCard.fields) want = len(self.curModel.fieldModels) logger.debug("numFields: %d, have: %d, want: %d" % (self.numFields, have, want)) curCard.fields[have:want] = [ u"" for i in range(have,want) ] # special tags field curCard.fields.append(u"") logger.debug("full set of fields with trailing null strings and empty tags: " + str(curCard.fields)) self.numFields = len(curCard.fields) # Setting model computes mapping. self.model = self.curModel self.cards.append(curCard) self.curState = 'between_facts' # Fresh card curCard = ForeignCard() # Stop here and return what we have. break else: logger.debug("not terminating fact; perhaps starting multiline?") # Single-line field, or start of a multiline one. # ` # this is a # multiline string # ` # => # this is a\nmultiline string match = re.search("^(?P<eof>`)(?P<flags>[b]?)$", line) if match: logger.debug("starting `multiline") self.endOfMultilineFieldMarker = match.group('eof') # FIXME: Anki seems to f**k with this if not set to <br/> #if match.group('flags') == "b": #self.multilineEOL = "<br/>" #else: self.multilineEOL = "\n" # Start off with new empty field, added to as we collect constituent lines curCard.fields.append("") continue match = re.search("^t:(?P<facttags> .*)$", line) if match: logger.debug("adding per-fact tags") curCard.tags += match.group('facttags') continue # Single-line. logger.debug("collecting single line into single field") curCard.fields.append(line) continue continue raise ImportFormatError(type="systemError", info="ended up in unhandled state") logger.debug("stick a fork in me: ") logger.debug(self.cards) return self.cards
def foreignCards(self): "Return a list of foreign cards for importing." if self.iHaveSeenTheEnd: return None self.cards = [] curCard = ForeignCard() while True: logger.debug("here we go (again); state is " + self.curState) match = re.search("_reinterpret$", self.curState) if match: logger.debug("it's a reinterpret, so, pre-sub: " + self.curState) self.curState = re.sub("_reinterpret", "", self.curState) logger.debug("post-sub: " + self.curState) else: line = self.fh.readline() logger.debug("pristine line: '''" + line + "'''") if line == "": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError( type="systemError", info="file terminated unexpectedly in state " + self.curState) break line = re.sub("#.*$", "", line) logger.debug("line with comments removed: '''" + line + "'''") line = line.rstrip() logger.debug("stripped line: '''" + line + "'''") if line == "__END__": if self.curState not in ('collecting_globals', 'between_facts'): raise ImportFormatError( type="systemError", info="file terminated unexpectedly in state " + self.curState) # return whatever we've gathered so far, but flag so that we don't re-enter the state machine next time we get called self.iHaveSeenTheEnd = True break logger.debug("state is " + self.curState) if self.curState == 'collecting_globals': if re.search("^\s*$", line): continue match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: self.curState = 'between_facts_reinterpret' logger.debug("found an m: line; reinterpreting in state " + self.curState) continue raise ImportFormatError(type="systemError", info="expected m: or st: but got " + line) if self.curState == 'between_facts': logger.debug("between facts handler") match = re.search("^st: (?P<tags>.*)$", line) if match: self.sharedTags += " " + match.group('tags') continue match = re.search("^m: (?P<modelname>.+)$", line) if match: logger.debug("found an m: line") self.curModel = None for m in self.deck.models: logger.debug("looking for model named '" + match.group('modelname') + "' ok") if m.name == match.group('modelname'): self.curModel = m logger.debug("chose model " + self.curModel.name + " with " + str(self.numFields) + " fields, ok") break if not self.curModel: raise ImportFormatError( type="systemError", info="model " + match.group('modelname') + " doesn't exist; choose from " + self.deck.models.fieldModels) continue if line != "": # special case: go back around and reinterpret same line in different context logger.debug("non-empty, reinterpreting...") self.curState = 'in_fact_reinterpret' continue if self.curState == 'in_fact': logger.debug("in_fact handler") # Sub-state - are we in a multiline field? logger.debug("are we in multiline?") if self.endOfMultilineFieldMarker != "": logger.debug("we are in multiline") match = re.search( re.escape(self.endOfMultilineFieldMarker) + "$", line) if match: logger.debug("ending multiline") self.endOfMultilineFieldMarker = "" else: logger.debug("still collecting in multiline") curCard.fields[-1] += line + self.multilineEOL continue logger.debug("not in multiline; perhaps terminating fact?") if line == ".": # Fact terminator logger.debug("we are terminating fact") # Not sure why but ForeignCard takes *both* the contents of .tags # *and* contents of a special member of .fields and concatenates # those to get overall tags. If you don't give the special field # it gets angry. So, provide an empty one. Default mapping will # expect to find this in the last field. curCard.tags += self.sharedTags curCard.fields = [ unicode(nonUTF) for nonUTF in curCard.fields ] # Pad out any remaining fields with empty strings. # Perhaps at a later point we'll support syntax for specifying a field by "^<s>: " # where s is some unambiguous prefix of the field name, at which point we'll need # to specify our own mappings. logger.debug("purely simply collected fields: " + str(curCard.fields)) have = len(curCard.fields) want = len(self.curModel.fieldModels) logger.debug("numFields: %d, have: %d, want: %d" % (self.numFields, have, want)) curCard.fields[have:want] = [ u"" for i in range(have, want) ] # special tags field curCard.fields.append(u"") logger.debug( "full set of fields with trailing null strings and empty tags: " + str(curCard.fields)) self.numFields = len(curCard.fields) # Setting model computes mapping. self.model = self.curModel self.cards.append(curCard) self.curState = 'between_facts' # Fresh card curCard = ForeignCard() # Stop here and return what we have. break else: logger.debug( "not terminating fact; perhaps starting multiline?") # Single-line field, or start of a multiline one. # ` # this is a # multiline string # ` # => # this is a\nmultiline string match = re.search("^(?P<eof>`)(?P<flags>[b]?)$", line) if match: logger.debug("starting `multiline") self.endOfMultilineFieldMarker = match.group('eof') # FIXME: Anki seems to f**k with this if not set to <br/> #if match.group('flags') == "b": #self.multilineEOL = "<br/>" #else: self.multilineEOL = "\n" # Start off with new empty field, added to as we collect constituent lines curCard.fields.append("") continue match = re.search("^t:(?P<facttags> .*)$", line) if match: logger.debug("adding per-fact tags") curCard.tags += match.group('facttags') continue # Single-line. logger.debug("collecting single line into single field") curCard.fields.append(line) continue continue raise ImportFormatError(type="systemError", info="ended up in unhandled state") logger.debug("stick a fork in me: ") logger.debug(self.cards) return self.cards