def test_Parse(self): text = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category '#It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm' ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertFalse(self.strf.parse(text=text)) st = self.strf.star_text() # print "unparsed text:[" +st+ "]" exp = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category ; #It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm ; ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertTrue(Utils.equalIgnoringWhiteSpace(exp, st))
def test_Parse(self): text = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category '#It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm' ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertFalse(self.strf.parse(text = text)) st = self.strf.star_text() # print "unparsed text:[" +st+ "]" exp = """data_no_comments_here save_comment _Saveframe_category comment loop_ _comment _every_flag _category ; #It has very upfield-shifted H5', H5" @ 3.935,4.012 ppm ; ; ####################### # BOGUS # ####################### ; BOGUS_CATEGORY stop_ save_ """ self.assertTrue(Utils.equalIgnoringWhiteSpace(exp, st))
def parse (self, text='', nmrView_type = 0): """ - Parses text into save frames and tagtables. - Input text should start at position given with non-white space character - Appends a list of datanodes(save frames or tagtables) """ if self.verbosity > 2: nTdebug('Parsing STAR file: %s' % self.filename) # '"Begin at the beginning," the King said, gravely, # "and go on till you come to the end; then stop."' (LC) # print "DEBUG taking care of EOL variations" text = Utils.dos2unix(text)# \r\n -> \n text = Utils.mac2unix(text)# \r -> \n text = comments_strip(text) ## Collapse the semicolon block for ease of parsing text = semicolon_block_collapse(text) ## For nmrView 'nmrStar' also compress { } into {} ## Wim 05/03/2003 if nmrView_type: text = nmrView_compress(text) ## TITLE match_data_tag = re.search(r'\s*data_(\S+)\s+', text, 0) if not match_data_tag: print "ERROR: found no 'data_title' string in " print "ERROR: file's text (first 100 chars):[%s] " % text[0:100] return 1 self.title = match_data_tag.group(1) pos = match_data_tag.end() ## Four quick searches for possible continuations next_sf_begin = None # SAVE FRAME BEGIN next_sf_end = None # SAVE FRAME END next_free_tt = None # FREE TAGTABLE next_loop_tt = None # LOOP TAGTABLE sf_open = None # When a saveframe is open text_length = len(text) ## Only break when parsed to the eof while pos < text_length: if self.verbosity >= 9: print 'Parse text from position:%s : [%s]' % ( pos, text[pos:pos+10]) match_save_begin_nws = pattern_save_begin_nws.search(text, pos, pos+len('save_1')) if match_save_begin_nws: if match_save_begin_nws.start() == pos: next_sf_begin = 1 if not next_sf_begin: match_save_end_nws = pattern_save_end_nws.search(text, pos, pos+len('save_ ')) if match_save_end_nws: if match_save_end_nws.start() == pos: next_sf_end = 1 if not (next_sf_begin or next_sf_end): match_tag_name_nws = pattern_tag_name_nws.search(text, pos, pos+len(' _X')) if match_tag_name_nws: if match_tag_name_nws.start() == pos: next_free_tt = 1 if not (next_sf_begin or next_sf_end or next_free_tt): match_tagtable_loop_nws = pattern_tagtable_loop_nws.search(text, pos, pos+len('loop_ ')) if match_tagtable_loop_nws: if match_tagtable_loop_nws.start() == pos: next_loop_tt = 1 ## Just checking if not (next_sf_begin or next_sf_end or next_free_tt or next_loop_tt): nTerror(' No new item found in data_nodes_parse.') print 'Items looked for are a begin or end of a saveframe, or' print 'a begin of a tagtable(free or looped).' print print "At text (before pos=" , pos , "):" start = pos-70 if start < 0: start = 0 print "[" + text[start:pos] + "]" print "At text (starting pos=" , pos , "):" print "[" + text[pos:pos+70]+ "]" return None ## SAVE FRAME BEGIN if next_sf_begin: if sf_open: print "ERROR: Found the beginning of a saveframe but" print "ERROR: saveframe before is still open(not closed;-)" return None match_save_begin = pattern_save_begin.search(text, pos) if not match_save_begin: print "ERROR: Code error (no second match on sf begin)" return None if match_save_begin.start() != pos: print "ERROR: Code error (wrong second match on sf begin)" return None self.datanodes.append(SaveFrame(tagtables = [])) # Need resetting ? self.datanodes[-1].title = match_save_begin.group(1) sf_open = 1 next_sf_begin = None pos = match_save_begin.end() continue ## SAVE FRAME END if next_sf_end: if not sf_open: print "ERROR: Found the end of a saveframe but" print "ERROR: saveframe was not open" return None match_save_end = pattern_save_end.search(text, pos) if not match_save_end: print "ERROR: Code error (no second match on sf end)" return None if match_save_end.start() != pos: print "ERROR: Code error (wrong second match on sf end)" return None sf_open = None next_sf_end = None pos = match_save_end.end() continue ## FREE or LOOP TAGTABLE if next_free_tt: free = 1 next_free_tt = None else: # next_loop_tt must be true as this was checked before if not next_loop_tt: nTerror(' code bug in File.parse()') return None free = None next_loop_tt = None match_tagtable_loop = pattern_tagtable_loop.search(text, pos) if not match_tagtable_loop: nTerror(' Code error, no second match on tagtable_loop') return None if match_tagtable_loop.start() != pos: print "ERROR: Code error (wrong second match on tagtable_loop)" return None pos = match_tagtable_loop.end() if sf_open: dn = self.datanodes[-1].tagtables # Insert in last saveframes' tagtables else: dn = self.datanodes dn.append( TagTable(free = free, tagnames = [], tagvalues = [], verbosity = self.verbosity)) tt = dn[-1] # Just to be explicit for the beloved reader pos = tt.parse(text=text, pos=pos) if pos == None: print "ERROR: In parsing tagtable" return None if self.verbosity >=9: print 'Parsed tagtable up to pos: [%s]' % pos if self.verbosity > 2: print 'DEBUG Parsed: [%s] datanodes (top level count only)' % \ len(self.datanodes) if self.check_integrity(recursive = 0): print "ERROR: integrity not ok" return 1 # Save some memory text = '' return 0