def all_nodes ( text ): "Returns all the nodes in the text" tokens = tokenizer.tokenize( text ) return tokenizer.get_nodes ( tokens )
def __init__(self, mode, text): """ Main parser baseclass for all models """ # check the validity of modes if mode not in VALID_MODES: util.error('mode parameter must be one of %s' % VALID_MODES) # initialize the parsers self.parser = yacc.yacc(write_tables=0, debug=0) # set the mode self.parser.mode = mode # optimization: this check is used very often self.parser.sync = (self.parser.mode == SYNC or self.parser.mode == TIME) # define default functions def get_value(state, name, p): return getattr(state, name) def set_value(state, name, value, p): setattr(state, name, value) return value # # setting the default rules # self.parser.RULE_AND = lambda a, b, p: a and b self.parser.RULE_OR = lambda a, b, p: a or b self.parser.RULE_NOT = lambda a, p: not a self.parser.RULE_SETVALUE = set_value self.parser.RULE_GETVALUE = get_value self.parser.RULE_START_ITERATION = lambda index, model: index # # internally we'll maintain a full list of tokens # self.tokens = tokenizer.tokenize(text) self.nodes = tokenizer.get_nodes(self.tokens) # isolate various types of tokens self.init_tokens = tokenizer.init_tokens(self.tokens) self.update_tokens = tokenizer.update_tokens(self.tokens) self.label_tokens = tokenizer.label_tokens(self.update_tokens) self.async_tokens = tokenizer.async_tokens(self.update_tokens) # finding the initial and update nodes self.init_nodes = tokenizer.get_nodes(self.init_tokens) self.update_nodes = tokenizer.get_nodes(self.update_tokens) # find uninizitalized nodes self.uninit_nodes = self.update_nodes - self.init_nodes # populate the initializer lines self.init_lines = map(tokenizer.tok2line, self.init_tokens) # populate the body by the ranks labelmap = {} for tokens in self.async_tokens: labelmap.setdefault(1, []).append(tokens) # overwrite the label token's value in nolabel modes if self.parser.mode in NOLABEL_MODE: for token in self.label_tokens: token[0].value = 1 # for all PLDE, SYNC and ASYNC modes all ranks will be set to 1 for tokens in self.label_tokens: rank = tokens[0].value short = tokens[1:] labelmap.setdefault(rank, []).append(short) # will iterate over the ranks in order self.ranks = list(sorted(labelmap.keys())) # build another parseable text, as lines stored for rank keys # by shuffling, sorting or reorganizing this body we can # implement various updating rule selection strategies self.update_lines = {} for key, values in labelmap.items(): self.update_lines.setdefault(key, []).extend( map(tokenizer.tok2line, values))
def all_nodes(text): "Returns all the nodes in the text" tokens = tokenizer.tokenize(text) return tokenizer.get_nodes(tokens)
def __init__(self, mode, text ): """ Main parser baseclass for all models """ # check the validity of modes if mode not in VALID_MODES: util.error( 'mode parameter must be one of %s' % VALID_MODES) # initialize the parsers self.parser = yacc.yacc( write_tables=0, debug=0 ) # set the mode self.parser.mode = mode # optimization: this check is used very often self.parser.sync = (self.parser.mode == SYNC or self.parser.mode == TIME) # define default functions def get_value(state, name, p): return getattr( state, name ) def set_value(state, name, value, p): setattr( state, name, value ) return value # # setting the default rules # self.parser.RULE_AND = lambda a, b, p: a and b self.parser.RULE_OR = lambda a, b, p: a or b self.parser.RULE_NOT = lambda a, p: not a self.parser.RULE_SETVALUE = set_value self.parser.RULE_GETVALUE = get_value self.parser.RULE_START_ITERATION = lambda index, model: index # # internally we'll maintain a full list of tokens # self.tokens = tokenizer.tokenize( text ) self.nodes = tokenizer.get_nodes( self.tokens ) # isolate various types of tokens self.init_tokens = tokenizer.init_tokens( self.tokens ) self.update_tokens = tokenizer.update_tokens( self.tokens ) self.label_tokens = tokenizer.label_tokens( self.update_tokens ) self.async_tokens = tokenizer.async_tokens( self.update_tokens ) # finding the initial and update nodes self.init_nodes = tokenizer.get_nodes( self.init_tokens ) self.update_nodes = tokenizer.get_nodes( self.update_tokens ) # find uninizitalized nodes self.uninit_nodes = self.update_nodes - self.init_nodes # populate the initializer lines self.init_lines = map( tokenizer.tok2line, self.init_tokens ) # populate the body by the ranks labelmap = {} for tokens in self.async_tokens: labelmap.setdefault( 1, []).append( tokens ) # overwrite the label token's value in nolabel modes if self.parser.mode in NOLABEL_MODE: for token in self.label_tokens: token[0].value = 1 # for all PLDE, SYNC and ASYNC modes all ranks will be set to 1 for tokens in self.label_tokens: rank = tokens[0].value short = tokens[1:] labelmap.setdefault( rank, []).append( short ) # will iterate over the ranks in order self.ranks = list(sorted(labelmap.keys())) # build another parseable text, as lines stored for rank keys # by shuffling, sorting or reorganizing this body we can # implement various updating rule selection strategies self.update_lines = {} for key, values in labelmap.items(): self.update_lines.setdefault(key, []).extend( map(tokenizer.tok2line, values))