def getTextDBDictRepr(self): """ Entry.getTextDBDictRepr Return a string representing the data in the TextDBDict format. """ res = [] #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # title : title = self.entrydata.title if title is not None: hlevel = HierarchicalLevel(errors = self.errors, formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by writing"]) hlevel.setData( self.entrydata.hlevel ) # entry to-be-duplicated ? # If so, let's removing the symbols before and after the string to-be-duplicated : if self.entrydata.entry_to_be_duplicated is not None: if self.entrydata.important_entry_to_be_dup: # 'important' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT else: # 'normal' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT title = title.replace( self.entrydata.entry_to_be_duplicated, dup_symbol + \ self.entrydata.entry_to_be_duplicated + \ dup_symbol ) # result added to <res> : string = "{0} {1}" res.append( string.format(hlevel.getTextDBDictRepr(), title)) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # text after the title : text = self.entrydata.text for line in text: if line != "": res.append(line) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # if no extract, we add an empty line after the title and the text : if len(self.entrydata)==0: res.append("") #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # extracts : for extractdata in self.entrydata: extract = Extract(errors = self.errors, logotherasdata = self.logotherasdata) extract.setData( extractdata ) res.append( extract.getTextDBDictRepr() ) return NEWLINE.join(res)
def initFromStr(self, informationsdata, title_hlevel, title_pending_text, str_content, reading_position, srclanguage): """ Entry.initFromStr() informationsdata : InformationsData object title_hlevel : HierarchicalLevel object title_pending_text : None or a string. str_content : a list of strings. reading_position : ReadingPosition object """ self.reset() self.reading_position = reading_position #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # hierarchical level : self.entrydata.hlevel = title_hlevel.hleveldata #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # title : if title_pending_text is None: self.entrydata.title = None else: # links are forbidden in the entries' title : if LINK_START in title_pending_text: msg = "(ERR059) Link found in an entry's title; title={0} -> {1}" self.errors.error(msg.format(title_pending_text, self.reading_position)) # to-be-duplicated entry ? if BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT in title_pending_text or \ BODY_ARTICLE_TO_DUPLICATED_IMPORTANT in title_pending_text: if BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT in title_pending_text: # 'normal' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT else: # 'important' symbol : dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT # only one result expected : tbm = TextBetweenMarkers(marker_start=dup_symbol, marker_end=dup_symbol) results = tbm.getExtracts( title_pending_text ) if len(results) != 1: msg = "(ERR058) Wrong format : " \ "the to-be-duplicated entry can't be read; -> {0}" self.errors.error(msg.format(self.reading_position)) else: self.entrydata.entry_to_be_duplicated = results[0].substring self.entrydata.important_entry_to_be_dup = (dup_symbol == \ BODY_ARTICLE_TO_DUPLICATED_IMPORTANT) # we erase the markers linked to the 'to-be-duplicated' entry : title_pending_text = title_pending_text.replace(dup_symbol, "") self.entrydata.title = title_pending_text.strip() # links in <self.entrydata.title> ? self.entrydata.links = Link().getExtracts(source = self.entrydata.title, context = "entry.title+" + \ self.reading_position.getShortDescription()) #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # text and extract(s) : current_position = None # None or "extract" current_extract = [] # list of strings for line in str_content: if line.strip() == "": if current_position == "extract": # end of the current extract : new_extract = Extract( errors = self.errors, logotherasdata = self.logotherasdata ) new_extract.initFromStr( informationsdata = informationsdata, src = current_extract, reading_position = reading_position, srclanguage = srclanguage) extract_freeness = \ informationsdata.getTheMinFreenessOfAnExtract(new_extract.extractdata) if extract_freeness >= logotheras.options.OPTIONS["minimal freeness"]: self.entrydata.append( new_extract.extractdata ) else: msg = "skipping an extract due to its freeness value; extract={0}; -> {1}" self.errors.info(msg.format(new_extract.extractdata, reading_position)) current_position = None current_extract = [] elif not line.startswith(BODY_PREFIX_BEFORE_EXTRACT): self.entrydata.text.append(line) else: if current_position is None: # first line of an extract : current_position = "extract" current_extract.append( line[len(BODY_PREFIX_BEFORE_EXTRACT):] ) else: # next line of the current extract : current_extract.append( line[len(BODY_PREFIX_BEFORE_EXTRACT):] ) if current_position is not None: # we add the last extract : new_extract = Extract( errors = self.errors, logotherasdata = self.logotherasdata ) new_extract.initFromStr( informationsdata = informationsdata, src = current_extract, reading_position = reading_position, srclanguage = srclanguage) extract_freeness = \ informationsdata.getTheMinFreenessOfAnExtract(new_extract.extractdata) if extract_freeness >= logotheras.options.OPTIONS["minimal freeness"]: self.entrydata.append( new_extract.extractdata ) else: msg = "skipping an extract due to its freeness value; extract={0}; -> {1}" self.errors.info(msg.format(new_extract.extractdata, reading_position)) return self