示例#1
0
    def test_Parse(self):
        text = """data_no_comments_here

save_comment
   _Saveframe_category  comment
   loop_
        _comment
        _every_flag
        _category

'#It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm'
;
#######################
#  BOGUS              #
#######################

;

    BOGUS_CATEGORY

     stop_
save_
"""
        self.assertFalse(self.strf.parse(text = text))
        st = self.strf.star_text()
#            print "unparsed text:[" +st+ "]"

        exp = """data_no_comments_here
save_comment   _Saveframe_category  comment   loop_
        _comment
        _every_flag
        _category
;
#It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm
;
;
#######################
#  BOGUS              #
#######################

;    BOGUS_CATEGORY     stop_ save_
"""
        self.assertTrue(Utils.equalIgnoringWhiteSpace(exp, st))
示例#2
0
文件: File.py 项目: VuisterLab/cing
    def parse (self, text='', nmrView_type = 0):
        """
        - Parses text into save frames and tagtables.
        - Input text should start at position given with non-white space character
        - Appends a list of datanodes(save frames or tagtables)
        """

        if self.verbosity > 2:
            nTdebug('Parsing STAR file: %s' % self.filename)

#        '"Begin at the beginning," the King said, gravely,
#        "and go on till you come to the end; then stop."' (LC)

#        print "DEBUG taking care of EOL variations"
        text = Utils.dos2unix(text)# \r\n -> \n
        text = Utils.mac2unix(text)# \r   -> \n

        text = comments_strip(text)

        ## Collapse the semicolon block for ease of parsing
        text = semicolon_block_collapse(text)


        ## For nmrView 'nmrStar' also compress {  } into {}
        ## Wim 05/03/2003
        if nmrView_type:
            text = nmrView_compress(text)

        ## TITLE
        match_data_tag = re.search(r'\s*data_(\S+)\s+', text, 0)
        if not match_data_tag:
            print "ERROR: found no 'data_title' string in "
            print "ERROR: file's text (first 100 chars):[%s] " % text[0:100]
            return 1
        self.title = match_data_tag.group(1)
        pos = match_data_tag.end()


        ## Four quick searches for possible continuations
        next_sf_begin   = None      # SAVE FRAME BEGIN
        next_sf_end     = None      # SAVE FRAME END
        next_free_tt    = None      # FREE TAGTABLE
        next_loop_tt    = None      # LOOP TAGTABLE
        sf_open         = None      # When a saveframe is open
        text_length     = len(text)

        ## Only break when parsed to the eof
        while pos < text_length:
            if self.verbosity >= 9:
                print 'Parse text from position:%s : [%s]' % (
                    pos, text[pos:pos+10])

            match_save_begin_nws = pattern_save_begin_nws.search(text, pos, pos+len('save_1'))
            if match_save_begin_nws:
                if match_save_begin_nws.start() == pos:
                    next_sf_begin = 1
            if not next_sf_begin:
                match_save_end_nws = pattern_save_end_nws.search(text, pos, pos+len('save_ '))
                if match_save_end_nws:
                    if match_save_end_nws.start() == pos:
                        next_sf_end = 1
            if not (next_sf_begin or next_sf_end):
                match_tag_name_nws = pattern_tag_name_nws.search(text, pos, pos+len(' _X'))
                if match_tag_name_nws:
                    if match_tag_name_nws.start() == pos:
                        next_free_tt = 1
            if not (next_sf_begin or next_sf_end or next_free_tt):
                match_tagtable_loop_nws = pattern_tagtable_loop_nws.search(text, pos, pos+len('loop_ '))
                if match_tagtable_loop_nws:
                    if match_tagtable_loop_nws.start() == pos:
                        next_loop_tt = 1

            ## Just checking
            if not (next_sf_begin or next_sf_end or next_free_tt or next_loop_tt):
                nTerror(' No new item found in data_nodes_parse.')
                print 'Items looked for are a begin or end of a saveframe, or'
                print 'a begin of a tagtable(free or looped).'
                print
                print "At text (before pos=" , pos , "):"
                start = pos-70
                if start < 0:
                    start = 0
                print "[" + text[start:pos] + "]"
                print "At text (starting pos=" , pos , "):"
                print "[" + text[pos:pos+70]+ "]"
                return None

            ## SAVE FRAME BEGIN
            if next_sf_begin:
                if sf_open:
                    print "ERROR: Found the beginning of a saveframe but"
                    print "ERROR: saveframe before is still open(not closed;-)"
                    return None
                match_save_begin = pattern_save_begin.search(text, pos)
                if not match_save_begin:
                    print "ERROR: Code error (no second match on sf begin)"
                    return None
                if match_save_begin.start() != pos:
                    print "ERROR: Code error (wrong second match on sf begin)"
                    return None
                self.datanodes.append(SaveFrame(tagtables    = [])) # Need resetting ?
                self.datanodes[-1].title = match_save_begin.group(1)
                sf_open         = 1
                next_sf_begin   = None
                pos             = match_save_begin.end()
                continue

            ## SAVE FRAME END
            if next_sf_end:
                if not sf_open:
                    print "ERROR: Found the end of a saveframe but"
                    print "ERROR: saveframe was not open"
                    return None
                match_save_end = pattern_save_end.search(text, pos)
                if not match_save_end:
                    print "ERROR: Code error (no second match on sf end)"
                    return None
                if match_save_end.start() != pos:
                    print "ERROR: Code error (wrong second match on sf end)"
                    return None
                sf_open     = None
                next_sf_end = None
                pos         = match_save_end.end()
                continue

            ## FREE or LOOP TAGTABLE
            if next_free_tt:
                free            = 1
                next_free_tt    = None
            else: # next_loop_tt must be true as this was checked before
                if not next_loop_tt:
                    nTerror(' code bug in File.parse()')
                    return None
                free            = None
                next_loop_tt    = None

                match_tagtable_loop = pattern_tagtable_loop.search(text, pos)
                if not match_tagtable_loop:
                    nTerror(' Code error, no second match on tagtable_loop')
                    return None
                if match_tagtable_loop.start() != pos:
                    print "ERROR: Code error (wrong second match on tagtable_loop)"
                    return None
                pos = match_tagtable_loop.end()

            if sf_open:
                dn = self.datanodes[-1].tagtables # Insert in last saveframes' tagtables
            else:
                dn = self.datanodes

            dn.append(
                    TagTable(free      = free,
                                tagnames  = [],
                                tagvalues = [],
                                verbosity = self.verbosity))
            tt = dn[-1] # Just to be explicit for the beloved reader
            pos = tt.parse(text=text, pos=pos)

            if pos ==  None:
                print "ERROR: In parsing tagtable"
                return None
            if self.verbosity >=9:
                print 'Parsed tagtable up to pos: [%s]' % pos

        if self.verbosity > 2:
            print 'DEBUG Parsed: [%s] datanodes (top level count only)' % \
                  len(self.datanodes)

        if self.check_integrity(recursive = 0):
            print "ERROR: integrity not ok"
            return 1

        # Save some memory
        text = ''
        return 0