Пример #1
0
    def getTextDBDictRepr(self):
        """
                Entry.getTextDBDictRepr

                Return a string representing the data in the TextDBDict format.
        """
        res = []

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # title :
        title = self.entrydata.title

        if title is not None:
            hlevel = HierarchicalLevel(errors = self.errors,
                formatstr = logotheras.options.OPTIONS["textdbdict::HLEVELformatrst by writing"])
            hlevel.setData( self.entrydata.hlevel )

            # entry to-be-duplicated ?
            # If so, let's removing the symbols before and after the string to-be-duplicated :
            if self.entrydata.entry_to_be_duplicated is not None:

                if self.entrydata.important_entry_to_be_dup:
                    # 'important' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT
                else:
                    # 'normal' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT

                title = title.replace( self.entrydata.entry_to_be_duplicated,
                                       dup_symbol + \
                                       self.entrydata.entry_to_be_duplicated + \
                                       dup_symbol )

            # result added to <res> :
            string = "{0} {1}"
            res.append( string.format(hlevel.getTextDBDictRepr(),
                                      title))

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # text after the title :
        text = self.entrydata.text

        for line in text:
            if line != "":
                res.append(line)

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # if no extract, we add an empty line after the title and the text :
        if len(self.entrydata)==0:
            res.append("")

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # extracts :
        for extractdata in self.entrydata:

            extract = Extract(errors = self.errors,
                              logotherasdata = self.logotherasdata)
            extract.setData( extractdata )

            res.append( extract.getTextDBDictRepr() )

        return NEWLINE.join(res)
Пример #2
0
    def initFromStr(self,
                    informationsdata,
                    title_hlevel,
                    title_pending_text,
                    str_content,
                    reading_position,
                    srclanguage):
        """
                Entry.initFromStr()

                informationsdata        : InformationsData object
                title_hlevel            : HierarchicalLevel object
                title_pending_text      : None or a string.
                str_content             : a list of strings.
                reading_position        : ReadingPosition object
        """
        self.reset()
        self.reading_position = reading_position

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # hierarchical level :
        self.entrydata.hlevel = title_hlevel.hleveldata

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # title :
        if title_pending_text is None:
            self.entrydata.title = None

        else:

            # links are forbidden in the entries' title :
            if LINK_START in title_pending_text:
                msg = "(ERR059) Link found in an entry's title; title={0} -> {1}"
                self.errors.error(msg.format(title_pending_text,
                                             self.reading_position))

            # to-be-duplicated entry ?
            if BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT in title_pending_text or \
               BODY_ARTICLE_TO_DUPLICATED_IMPORTANT in title_pending_text:

                if BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT in title_pending_text:
                    # 'normal' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_NOTIMPORTANT
                else:
                    # 'important' symbol :
                    dup_symbol = BODY_ARTICLE_TO_DUPLICATED_IMPORTANT

                # only one result expected :
                tbm = TextBetweenMarkers(marker_start=dup_symbol,
                                         marker_end=dup_symbol)

                results = tbm.getExtracts( title_pending_text )

                if len(results) != 1:
                    msg = "(ERR058) Wrong format : " \
                          "the to-be-duplicated entry can't be read; -> {0}"
                    self.errors.error(msg.format(self.reading_position))
                else:
                    self.entrydata.entry_to_be_duplicated = results[0].substring

                    self.entrydata.important_entry_to_be_dup = (dup_symbol == \
                                                            BODY_ARTICLE_TO_DUPLICATED_IMPORTANT)

                # we erase the markers linked to the 'to-be-duplicated' entry :
                title_pending_text = title_pending_text.replace(dup_symbol, "")

            self.entrydata.title = title_pending_text.strip()

            # links in <self.entrydata.title> ?
            self.entrydata.links = Link().getExtracts(source = self.entrydata.title,
                                                      context = "entry.title+" + \
                                                      self.reading_position.getShortDescription())

        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        # text and extract(s) :
        current_position = None # None or "extract"
        current_extract = []    # list of strings

        for line in str_content:

            if line.strip() == "":
                if current_position == "extract":
                    # end of the current extract :
                    new_extract = Extract( errors = self.errors,
                                           logotherasdata = self.logotherasdata )
                    new_extract.initFromStr( informationsdata = informationsdata,
                                             src = current_extract,
                                             reading_position = reading_position,
                                             srclanguage = srclanguage)

                    extract_freeness = \
                      informationsdata.getTheMinFreenessOfAnExtract(new_extract.extractdata)

                    if extract_freeness >= logotheras.options.OPTIONS["minimal freeness"]:
                        self.entrydata.append( new_extract.extractdata )
                    else:
                        msg = "skipping an extract due to its freeness value; extract={0}; -> {1}"
                        self.errors.info(msg.format(new_extract.extractdata, reading_position))

                    current_position = None
                    current_extract = []

            elif not line.startswith(BODY_PREFIX_BEFORE_EXTRACT):
                self.entrydata.text.append(line)

            else:
                if current_position is None:
                    # first line of an extract :
                    current_position = "extract"
                    current_extract.append( line[len(BODY_PREFIX_BEFORE_EXTRACT):] )
                else:
                    # next line of the current extract :
                    current_extract.append( line[len(BODY_PREFIX_BEFORE_EXTRACT):] )

        if current_position is not None:
            # we add the last extract :
            new_extract = Extract( errors = self.errors,
                                   logotherasdata = self.logotherasdata )
            new_extract.initFromStr( informationsdata = informationsdata,
                                     src = current_extract,
                                     reading_position = reading_position,
                                     srclanguage = srclanguage)

            extract_freeness = \
                informationsdata.getTheMinFreenessOfAnExtract(new_extract.extractdata)

            if extract_freeness >= logotheras.options.OPTIONS["minimal freeness"]:
                self.entrydata.append( new_extract.extractdata )
            else:
                msg = "skipping an extract due to its freeness value; extract={0}; -> {1}"
                self.errors.info(msg.format(new_extract.extractdata, reading_position))

        return self