Пример #1
0
    def parse(self, filename):
        model = None
        alpha = None
        lnl = None
        freq = None
        rates = None

        with open(filename) as fl:
            s = fl.read()

        try:
            model, lnl, alpha = self.common.parseString(s).asList()

        except ParseException as err:
            logger.error(err)

        if model == 'JC69':
            freq = [0.25, 0.25, 0.25, 0.25]
            rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

        elif model == 'K80':
            freq = [0.25, 0.25, 0.25, 0.25]
            try:
                tstv = self.tstv.parseString(s).asList()
            except ParseException as err:
                logger.error(err)
            
            rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0]

        elif model == 'F81':
            try:
                freq = self.freq.parseString(s).asList()
            except ParseException as err:
                logger.error(err)
            rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

        elif model == 'F84' or model == 'HKY85' or model == 'TN93':
            parser = Group(self.tstv) + Group(self.freq)
            try:
                tstv, freq = parser.parseString(s).asList()
            except ParseException as err:
                logger.error(err)
            if model == 'TN93':
                rates = [1.0, tstv[0], 1.0, 1.0, tstv[1], 1.0]
            else:
                rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0]

        elif model == 'GTR':
            parser = Group(self.freq) + Group(self.rates)
            try:
                freq, rates = parser.parseString(s).asList()
            except ParseException as err:
                logger.error(err)

        return model, alpha, lnl, freq, rates
Пример #2
0
def parseReactions(reaction):
    
    grammar = Group(Word(alphanums) + Suppress('()') + Optional(Suppress('+') + Word(alphanums) 
    + Suppress("()"))) + Suppress("->") + Group(Word(alphanums) + Suppress("()") + Optional(Suppress('+') 
    + Word(alphanums) + Suppress("()"))) + Suppress(Word(alphanums + "()")) 
   
    return grammar.parseString(reaction).asList()
Пример #3
0
def _read_solution(scenario,log,task_to_id,id_to_resource) :
	
	S = scenario

	# parse output 
	from pyparsing import Keyword,Literal,Word,alphas,nums,printables,OneOrMore,ZeroOrMore,dblQuotedString,Group
	INT = Word( nums )
	int_row = Group( INT + Literal(",").suppress() + \
                         INT + Literal(",").suppress() + \
			 INT + Literal(";").suppress() )
	plan = Group( Group( ZeroOrMore(int_row) ) )

	start_str, end_str = '##START_SOLUTION##', '##END_SOLUTION##'
	start_i, end_i = log.index(start_str)+len(start_str), log.index(end_str)
	opl_plan = plan.parseString(log[start_i:end_i])
	int_plan = opl_plan[0][0]

	# get starts and resource assignments
	starts = dict()
	assign = dict()
	for row in int_plan :
		task_id = int(row[0])
		starts[task_id] = int(row[2])
		if task_id not in assign :
			assign[task_id] = list()
		assign[task_id].append(int(row[1]))

	# add to scenario
	for T in S.tasks() :
		T.start = starts[task_to_id[T]] #second column is start
		if T.resources is None :
			T.resources = list()
		T.resources += [ id_to_resource[j] for j in assign[task_to_id[T]] ]
Пример #4
0
    def _get_infos_from_file(self, info):
        # {object}:  file format elf32-xtensa-le
        object_line = SkipTo(':').setResultsName('object') + Suppress(restOfLine)

        # Sections:
        # Idx Name ...
        section_start = Suppress(Literal('Sections:'))
        section_header = Suppress(OneOrMore(Word(alphas)))

        # 00 {section} 0000000 ...
        #              CONTENTS, ALLOC, ....
        section_entry = Suppress(Word(nums)) + SkipTo(' ') + Suppress(restOfLine) + \
            Suppress(ZeroOrMore(Word(alphas) + Literal(',')) + Word(alphas))

        content = Group(object_line + section_start + section_header + Group(OneOrMore(section_entry)).setResultsName('sections'))
        parser = Group(ZeroOrMore(content)).setResultsName('contents')

        results = None

        try:
            results = parser.parseString(info.content, parseAll=True)
        except ParseException as p:
            raise ParseException('Unable to parse section info file ' + info.filename + '. ' + p.msg)

        return results
Пример #5
0
    def _get_infos_from_file(self, info):
        # Object file line: '{object}:  file format elf32-xtensa-le'
        object = Fragment.ENTITY.setResultsName("object") + Literal(":").suppress() + Literal("file format elf32-xtensa-le").suppress()

        # Sections table
        header = Suppress(Literal("Sections:") + Literal("Idx") + Literal("Name") + Literal("Size") + Literal("VMA") +
                          Literal("LMA") + Literal("File off") + Literal("Algn"))
        entry = Word(nums).suppress() + Fragment.ENTITY + Suppress(OneOrMore(Word(alphanums, exact=8)) +
                                                                   Word(nums + "*") + ZeroOrMore(Word(alphas.upper()) +
                                                                   Optional(Literal(","))))

        # Content is object file line + sections table
        content = Group(object + header + Group(ZeroOrMore(entry)).setResultsName("sections"))

        parser = Group(ZeroOrMore(content)).setResultsName("contents")

        sections_info_text = info.content
        results = None

        try:
            results = parser.parseString(sections_info_text)
        except ParseException as p:
            raise ParseException("Unable to parse section info file " + info.filename + ". " + p.msg)

        return results
Пример #6
0
def parse_showhdinfo(stdout, stderr):
    """
    """
    uuid_prefix = Suppress(Word('UUID:'))
    id_uuid = Word(alphanums + '-').setResultsName('uuid')
    accessible_prefix = Suppress(Word('Accessible:'))
    id_accessible = Word(alphas).setResultsName('accessible')
    logical_size_prefix = Suppress(Word('Logical size:'))
    id_logical_size = Word(alphanums + ' ').setResultsName('logical_size')
    current_size_prefix = Suppress(Word('Current size on disk:'))
    id_current_size = Word(alphanums + ' ').setResultsName('current_size')
    type_prefix = Suppress(Word('Type:'))
    id_type = Word(alphas + ' ()').setResultsName('type')
    prefix_storage_format = Suppress(Word('Storage format:'))
    id_storage_format = Word(alphas).setResultsName('storage_format')
    prefix_format_variant = Suppress(Word('Format variant:'))
    id_format_variant = Word(alphanums + ' ').setResultsName('format_variant')
    prefix_location = Suppress(Word('Location:'))
    id_location = Word(alphanums + ' /.').setResultsName('location')

    hd_info = Group(uuid_prefix + id_uuid + EOL + accessible_prefix +
            id_accessible + EOL + logical_size_prefix + id_logical_size + EOL +
            current_size_prefix + id_current_size + EOL + type_prefix +
            id_type + EOL + prefix_storage_format + id_storage_format + EOL +
            prefix_format_variant + id_format_variant + EOL + prefix_location +
            id_location + EOL)
    out = hd_info.parseString(stdout)[0]

    return {'uuid': out.uuid, 'accessible': out.accessible,
            'logical_size': out.logical_size, 'current_size': out.current_size,
            'type': out.type, 'storage_format': out.storage_format,
            'format_variant': out.storage_variant, 'location': out.location}
Пример #7
0
def process(path):
    print("Input:", path)
    with open(path) as f:
        rules = []
        messages = []
        for line in f:
            if line.strip():
                spec = Line.parseString(line)
                if "rule" in spec:
                    rules.append(spec)
                else:
                    messages.append(spec[0])
    grammar = build_grammar(rules)
    valid = []
    invalid = []
    part_2_grammar = Group(OneOrMore(Group(grammar[42]))) + Group(
        OneOrMore(Group(grammar[31])))
    for message in messages:
        try:
            result = part_2_grammar.parseString(message, parseAll=True)
            if len(result[1]) < len(result[0]):
                valid.append(message)
        except:
            invalid.append(message)
    print("\tNumber Valid:", len(valid))
    print(valid)
    print(invalid)
Пример #8
0
def parse_connection_str(connstr):
    ## Grammar for connection syntax
    digits = "0123456789"
    othervalid = "_.@"
    identifier = Word(alphas + digits + othervalid)
    nodename = identifier.setResultsName('nodename')

    outputnames = delimitedList(identifier).setResultsName('outputnames')
    inputnames = delimitedList(identifier).setResultsName('inputnames')

    # middle nodes have both inputs and outputs
    middlenode = Group(nodename + Suppress('(') + inputnames +
                       Optional("|" + outputnames) +
                       Suppress(")")).setResultsName('middlenode')
    # first node has only outputs
    headnode = (nodename + Suppress("(") + outputnames +
                Suppress(")")).setResultsName('headnode')
    # last node has only inputs
    tailnode = (nodename + Suppress("(") + inputnames +
                Suppress(")")).setResultsName('tailnode')

    # connect head -> [middle ->] tail
    connect= Group( headnode
                    + Group(ZeroOrMore(Suppress("->") \
                        + middlenode + FollowedBy("->") )).setResultsName('middlenodes')
                    + Suppress("->")+tailnode).setResultsName('nodes')

    connectlist = Group( connect + ZeroOrMore( Suppress(";")\
                        + connect )).setResultsName('connects')

    parsed = connectlist.parseString(connstr)
    check_numconnections(parsed)
    return parsed
class ListedVerses(BaseFormatClass):
    def __init__(self, stream):

        BaseFormatClass.__init__(self, stream)
        logging.debug("Initializing ListedVerses...")
        # NOTE: self.verse_word is defined in the base class
        # NOTE: self.verse is defined in the base class
        # Grammers defined here
        # The verses for this formatter are listed numerically
        # The identifier this formatter will use the number.
        self.number = Word(nums + ".")
        # This is the dataset we will mainly be operating on, since
        # it will contain the verse and the identifier
        self.verse_reference = Group(self.number) + Group(self.verse)

    # Must return a dataset with these
    def get_result(self):
        # Reading input stream line by line
        for line in self.stream:
            logging.debug("Parsing line {}".format(line))
            parsed_line = self.verse_reference.parseString(line)
            self.result['body'] = parsed_line[1]
            logging.debug("Body for this line is: {}".format(
                self.result['body']))
            # The identifier for each line when this file format is used
            # is the prepended number (The verse number)
            self.result['identifier'] = ''.join(parsed_line[0])
            logging.debug("Identifier for this line is {}".format(
                self.result['identifier']))
            # There are no extras for this formatter
            self.result['extras'] = {}
            yield self.result
Пример #10
0
def parse_connection_str(connstr):
    ## Grammar for connection syntax
    digits="0123456789"
    othervalid="_.@"
    identifier= Word(alphas+digits+othervalid)
    nodename=identifier.setResultsName('nodename')

    outputnames = delimitedList( identifier ).setResultsName('outputnames')
    inputnames  = delimitedList( identifier ).setResultsName('inputnames')

    # middle nodes have both inputs and outputs
    middlenode= Group( nodename + Suppress('(') + inputnames
                       + Optional( "|" + outputnames)
                       + Suppress(")") ).setResultsName('middlenode')
    # first node has only outputs
    headnode = (nodename + Suppress("(") + outputnames
                + Suppress(")")).setResultsName('headnode')
    # last node has only inputs
    tailnode = (nodename + Suppress("(") + inputnames
                + Suppress(")")).setResultsName('tailnode')

    # connect head -> [middle ->] tail
    connect= Group( headnode
                    + Group(ZeroOrMore(Suppress("->") \
                        + middlenode + FollowedBy("->") )).setResultsName('middlenodes')
                    + Suppress("->")+tailnode).setResultsName('nodes')

    connectlist = Group( connect + ZeroOrMore( Suppress(";")\
                        + connect )).setResultsName('connects')

    parsed=connectlist.parseString(connstr)
    check_numconnections(parsed)
    return parsed
Пример #11
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" +
                             ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        return self.parser.parseString(ofc).asDict()
Пример #12
0
    def _get_infos_from_file(self, info):
        # Object file line: '{object}:  file format elf32-xtensa-le'
        object = Fragment.ENTITY.setResultsName("object") + Literal(":").suppress() + Literal("file format elf32-xtensa-le").suppress()

        # Sections table
        header = Suppress(Literal("Sections:") + Literal("Idx") + Literal("Name") + Literal("Size") + Literal("VMA") +
                          Literal("LMA") + Literal("File off") + Literal("Algn"))
        entry = Word(nums).suppress() + Fragment.ENTITY + Suppress(OneOrMore(Word(alphanums, exact=8)) +
                                                                   Word(nums + "*") + ZeroOrMore(Word(alphas.upper()) +
                                                                   Optional(Literal(","))))

        # Content is object file line + sections table
        content = Group(object + header + Group(ZeroOrMore(entry)).setResultsName("sections"))

        parser = Group(ZeroOrMore(content)).setResultsName("contents")

        sections_info_text = info.content
        results = None

        try:
            results = parser.parseString(sections_info_text)
        except ParseException as p:
            raise ParseException("Unable to parse section info file " + info.filename + ". " + p.message)

        return results
Пример #13
0
def parseReactions(reaction):
    components = (Word(alphanums + "_") + Optional(Group('~' + Word(alphanums+"_")))
    + Optional(Group('!' + Word(alphanums+'+?'))))
    molecule = (Word(alphanums + "_")
    + Optional(Suppress('(') + Group(components) + ZeroOrMore(Suppress(',') + Group(components))    
    +Suppress(')')))
    
    species = Group(molecule) + ZeroOrMore(Suppress('.') + Group(molecule))

    result = species.parseString(reaction).asList()
    
    return result
Пример #14
0
def read_tgf(path):
    """Generates an alias.ArgumentationFramework from a Trivial Graph Format (.tgf) file.

    Trivial Graph Format (TGF) is a simple text-based file format for describing graphs. \
    It consists of a list of node definitions, which map node IDs to labels, followed by \
    a list of edges, which specify node pairs and an optional edge label. \
    Node IDs can be arbitrary identifiers, whereas labels for both nodes and edges are plain strings.

    Parameters
    ----------
    path : file or string
        File, directory or filename to be read.

    Returns
    -------
    framework : alias ArgumentationFramework

    Examples
    --------

    References
    ----------
    http://en.wikipedia.org/wiki/Trivial_Graph_Format 
    """

    try:
        from pyparsing import Word, alphanums, ZeroOrMore, White, Suppress, Group, ParseException, Optional
    except ImportError:
        raise ImportError("read_tgf requires pyparsing")

    if not isinstance(path, str):
        return

    # Define tgf grammar
    s = White(" ")
    tag = Word(alphanums)
    arg = Word(alphanums)
    att = Group(arg + Suppress(s) + arg + Optional(Suppress(s) + tag))
    nl = Suppress(White("\n"))

    graph = Group(ZeroOrMore(arg + nl)) + Suppress("#") + nl + Group(ZeroOrMore(att + nl) + ZeroOrMore(att))
    
    
    f = open(path, 'r')
    f = f.read()

    head, tail = ntpath.split(path)
    framework = al.ArgumentationFramework(tail)

    try:
        parsed = graph.parseString(f)
    except ParseException, e:
        raise al.ParsingException(e)
def parseReactions(reaction):
    components = (Word(alphanums + "_") + Optional(Group('~' + Word(alphanums+"_")))
    + Optional(Group('!' + Word(alphanums+'+?'))))
    molecule = (Word(alphanums + "_")
    + Optional(Suppress('(')) + Group(components) + ZeroOrMore(Suppress(',') + Group(components))    
    +Suppress(')'))
    
    species = Group(molecule) + ZeroOrMore(Suppress('.') + Group(molecule))

    result = species.parseString(reaction).asList()
    
    return result
Пример #16
0
def parseReactions(reaction):
    components = (Word(alphanums + "_") +
                  Optional(Group("~" + Word(alphanums + "_"))) +
                  Optional(Group("!" + Word(alphanums + "+?"))))
    molecule = Word(alphanums + "_") + Optional(
        Suppress("(") + Group(components) +
        ZeroOrMore(Suppress(",") + Group(components)) + Suppress(")"))

    species = Group(molecule) + ZeroOrMore(Suppress(".") + Group(molecule))

    result = species.parseString(reaction).asList()

    return result
Пример #17
0
class Outline(BaseFormatClass):
    def __init__(self, stream):
        BaseFormatClass.__init__(self, stream)
        logging.debug("Initializing Outline...")
        # Grammers defined here
        self.roman_letters = "IVX"
        self.roman_letters_small = 'ivx'
        self.points = Word(self.roman_letters + alphas + nums +
                           self.roman_letters_small)
        self.point_identifier = self.points + "."

        self.chapter_and_verse = Word(nums) + \
                                 ":" + \
                                 Word(nums) + \
                                 Optional("-") + \
                                 Optional(Word(nums)) + \
                                 Optional(Word(";,"))
        self.listed_verse_grammer = Word(nums) + Optional(Word(";,"))
        self.abbriv = Word(alphas + ".")
        self.verse_list = OneOrMore(self.listed_verse_grammer
                                    ^ self.chapter_and_verse)
        self.verses = Optional(Word(nums)) + \
                      self.abbriv + \
                      self.verse_list

        # Grammer to match the line on an outline
        self.line_grammer = Group(Optional(White())) + \
                            Group(Optional(self.point_identifier)) + \
                            Group(self.verse) + \
                            Optional("-") + \
                            Group(ZeroOrMore(self.verses))

    def get_result(self):
        for line in self.stream:
            logging.debug("Parsing line {}".format(line))
            parsed_line = self.line_grammer.parseString(line)
            self.result['body'] = parsed_line[2]
            logging.debug("Body for this line is: {}".format(
                self.result['body']))
            # The identifier for each line when this file format is
            # used is the point identifier
            self.result['identifier'] = ''.join(parsed_line[1])
            logging.debug("Identifier for this line is {}".format(
                self.result['identifier']))
            self.result['extras'] = {
                'verse_references': parsed_line[-1],
                'leading_white': ''.join(parsed_line[0])
            }
            logging.debug("Extras for this line are: {}".format(
                self.result['extras']))
            yield self.result
Пример #18
0
def _extra_deps(value, entry, depends_target, exclude=None):
    if entry in value:
        pattern = Group(
            Optional(Group(Literal('@') + Word(alphanums + '_-')).suppress()) +
            Optional(Literal('//').suppress()) +
            Optional(Word(alphanums + '_-/').suppress()) +
            Optional(Literal(':').suppress()) + Word(alphanums + '_-'))

        for dep in value.get(entry):
            try:
                extract_name = pattern.parseString(dep)[0][0]
                if extract_name != exclude:  # exclude self references
                    depends_target.update({extract_name})
            except Exception as e:
                logger.warning('No valid Build content %s' % dep)
Пример #19
0
def parse_startvm(stdout, stderr):
    """
    """
    waiting_prefix = Word('Waiting for VM')
    waiting_uuid = UUID_STRING.setResultsName('waiting_uuid')
    waiting_postfix = Word('to power on...')
    success_prefix = Word('VM')
    success_uuid = UUID_STRING.setResultsName('success_uuid')
    success_postfix = Word("has been successfully started.")
    total = Group(waiting_prefix + DBLQUOTE + waiting_uuid + DBLQUOTE +
            waiting_postfix + EOL + success_prefix + DBLQUOTE + success_uuid +
            DBLQUOTE + success_postfix)
    out = total.parseString(stdout)[0]

    return {'uuid': out.success_uuid}
Пример #20
0
def parse_config_file(filepath):
    """
    This function defines that to parsed the netscalar input file
    :param filepath: path of netscalar input configuration
    :return: return parsed dict
    """

    EOL = LineEnd().suppress()
    comment = Suppress("#") + Suppress(restOfLine) + EOL
    SOL = LineStart().suppress()
    blank_line = SOL + EOL
    result = []
    hyphen = Literal("-")
    not_hyphen_sign = ''.join(c for c in printables if c != '-')
    text = Word(not_hyphen_sign, printables)
    key = Word('-',
               printables).setParseAction(lambda t: t[0].replace('-', '', 1))
    val = originalTextFor(Optional(ZeroOrMore(text), default=None))
    option = Group(key + val)
    multi_word_names = quotedString
    q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}")))
    command = Group(
        OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option))
    command.ignore(comment | blank_line)
    with open(filepath) as infile:
        line_no = 1
        print("Parsing Input Configuration...")
        lines = infile.readlines()
        total_lines = len(lines)
        for line in lines:
            try:
                tmp = command.parseString(line)
                tokens = tmp.asList()
                if tokens:
                    tokens[0].append(['line_no', str(line_no)])
                result += tokens
                line_no += 1
            except Exception as exception:
                line_no += 1
                LOG.error("Parsing error: " + line)
            msg = "Parsing started..."
            if line_no <= total_lines:
                ns_util.print_progress_bar(line_no,
                                           total_lines,
                                           msg,
                                           prefix='Progress',
                                           suffix='')
        return result
Пример #21
0
def parse_createvm(stdout, stderr):
    """
    """
    single_quote = Suppress(Literal('\''))
    name_prefix = Suppress(Word('Virtual machine'))
    id_name = Word(alphanums).setResultsName('name')
    name_postfix = Suppress(Word('is created and registered.'))
    uuid_prefix = Suppress(Word('UUID:'))
    id_vm_uuid = Word(srange("[a-zA-Z0-9_\-]")).setResultsName('uuid')
    file_prefix = Suppress(Word('Settings file:'))
    id_file_path = Word(alphanums + " /.").setResultsName('file_path')
    vm_info = Group(name_prefix + single_quote + id_name +
        single_quote + name_postfix + EOL + uuid_prefix + id_vm_uuid + EOL +
        file_prefix + single_quote + id_file_path + single_quote + EOL)
    out = vm_info.parseString(stdout)[0]
    return {'name': out.name, 'uuid': out.uuid, 'file_path': out.file_path}
Пример #22
0
def parse_p4(spec):
    p4header = p4_header()
    p4parser, p4starter = p4_parser()
    p4action = p4_action()
    p4 = Group(ZeroOrMore(p4header) & ZeroOrMore(p4action) & ZeroOrMore(p4parser) & Optional(p4starter))
    parsed = p4.parseString(spec)[0]
    grouped = {'headers':[], 'parsers':[], 'actions':[]}
    for group in parsed:
        if 'header' in group:
            grouped['headers'].append(group)
        elif 'parser' in group.asList():
            grouped['parsers'].append(group[1:])
        elif 'action' in group.asList():
            grouped['actions'].append(group[1:])
        else:
            raise Exception("Part of P4 description not categoriased!")
    return grouped
Пример #23
0
def parsing(input):
    parse_time = Suppress("[") + Word("[" + "-" +
                                      nums) + Word(nums + ":" + "." +
                                                   "+") + Suppress("]")
    # Правило для парсинга уникалного номера
    parse_number = Suppress("[") + Word(nums) + Suppress("]") + Suppress(
        Word(":"))
    # Правило для парсинга события, состояещего только из слов( например,pdaemon is already running)
    parse_only_alphas_event = OneOrMore(Word(alphas))
    # Правило для парсинга события,состоящего из всякого(
    # например, /var/www/mooc-linux-programming/status/task exists: 9eb35974-c960-43e6-a8fe-1a8fc7d5a1bf)
    parse_mooc_event = Word("/" + alphas + "-" +
                            ":") + Word(alphas + ":") + Word(alphas + nums +
                                                             "-")
    # Правило для парсинга событий лога любого типа
    parse_event = ZeroOrMore(parse_only_alphas_event) + ZeroOrMore(
        parse_mooc_event)
    # Конечное правило для парсинга строк. Составляюие даты сгруппируются в 1 список,
    # уникальный номер во второй список,событие в третий список
    parse_module = Group(parse_time) + Group(parse_number) + Group(parse_event)
    # Результат парсинга(
    parse_result = parse_module.parseString(input)
    # Список, содержащий результаты разбора одной строки
    single_list = []
    """
    Описание цикла:
    Для каждого элемента,кроме последнего, каждого списка
    добавить пробел(при парсинге он теряется).
    Затем элементы каждого отдельного списка собираются в отдельную строку.
    Затем каждая строка(всего их 3 добавляется в список).
    Итог:
        Входные данные:строка,которую нам нужно разобрать
        ( например,[2016-05-14 17:25:01.642065713+03:00][4281]:pdaemon is already running.)
        Выходные данные: Список,состоящий из 3-х строк-элементов
    """
    for elem in parse_result:
        elem[len(elem) - 1] = elem[len(elem) - 1][:15]
    for elem in parse_result:
        for i in range(len(elem) - 1):
            elem[i] += " "
        complete_string = ''
        for x in elem:
            complete_string += x
        single_list.append(complete_string)
    # single_list[0] = datetime.strptime(single_list[0], '%Y-%m-%d %H:%M:%S.%f')
    return single_list
Пример #24
0
class CommandLineParser(object):
    def __init__(self):
        dash = Word("-",max=2)
        operator = oneOf(": =")
        
        argValueType1 = quotedString.setParseAction(removeQuotes)
        argValueType2 = Regex("[a-zA-Z0-9_\./]+")
        
        positionalArgument = (argValueType1 | argValueType2)
        regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2))
        novalueArgument = Combine(dash + Word(alphas))
        
        arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument)
        
        self.parser = Group(Word(alphas) + arguments).setResultsName("command")
        
    def parseString(self, querystring):
        return self.parser.parseString(querystring)
Пример #25
0
def parse_config_file(filepath):
    """
    This function defines that to parsed the netscalar input file
    :param filepath: path of netscalar input configuration
    :return: return parsed dict
    """

    EOL = LineEnd().suppress()
    comment = Suppress("#") + Suppress(restOfLine) + EOL
    SOL = LineStart().suppress()
    blank_line = SOL + EOL
    result = []
    hyphen = Literal("-")
    not_hyphen_sign = ''.join(c for c in printables if c != '-')
    text = Word(not_hyphen_sign, printables)
    key = Word('-', printables).setParseAction(
        lambda t: t[0].replace('-', '', 1))
    val = originalTextFor(Optional(ZeroOrMore(text), default=None))
    option = Group(key + val)
    multi_word_names = quotedString
    q_obj = originalTextFor(Keyword('q{')+SkipTo(Keyword("}")))
    command = Group(OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option))
    command.ignore(comment | blank_line)
    with open(filepath) as infile:
        line_no = 1
        print "Parsing Input Configuration..."
        lines = infile.readlines()
        total_lines = len(lines)
        for line in lines:
            try:
                tmp = command.parseString(line)
                tokens = tmp.asList()
                if tokens:
                    tokens[0].append(['line_no', str(line_no)])
                result += tokens
                line_no += 1
            except Exception as exception:
                line_no += 1
                LOG.error("Parsing error: " + line)
            msg = "Parsing started..."
            if line_no <= total_lines:
                ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress',
                                 suffix='')
        return result
Пример #26
0
def _read_solution(scenario, log, task_to_id, id_to_resource, msg=0):
    S = scenario

    # parse output
    from pyparsing import Keyword, Literal, Word, alphas, nums, printables, OneOrMore, ZeroOrMore, dblQuotedString, Group
    INT = Word(nums)
    int_row = Group( INT + Literal(",").suppress() + \
                            INT + Literal(",").suppress() + \
       INT + Literal(";").suppress() )
    plan = Group(Group(ZeroOrMore(int_row)))

    try:
        start_str, end_str = '##START_SOLUTION##', '##END_SOLUTION##'
        start_i, end_i = log.index(start_str) + len(start_str), log.index(
            end_str)
    except:
        print(log)
        if msg:
            print('ERROR: no solution found')
        return 0

    opl_plan = plan.parseString(log[start_i:end_i])
    int_plan = opl_plan[0][0]

    # get starts and resource assignments
    starts = dict()
    assign = dict()
    for row in int_plan:
        task_id = int(row[0])
        starts[task_id] = int(row[2])
        if task_id not in assign:
            assign[task_id] = list()
        assign[task_id].append(int(row[1]))

    # add to scenario
    for T in S.tasks():
        T.start_value = starts[task_to_id[T]]
        if T.resources is None:
            T.resources = list()
        T.resources = [id_to_resource[j] for j in assign[task_to_id[T]]]

    return 1
Пример #27
0
class ReferencedVerses(BaseFormatClass):
    def __init__(self, stream):

        BaseFormatClass.__init__(self, stream)
        logging.debug("Initializing ReferencedVerses...")
        # Grammers defined here
        #NOTE: self.verse_word is defined in the base class
        #NOTE: self.verse is defined in the base class
        self.book_num = Optional(Word(nums, exact=1))
        self.book_name = Word(alphas)
        self.book_chapter = Word(nums)
        self.book_verse = Word(nums)
        self.reference = (self.book_num + self.book_name +
                          Group(self.book_chapter + ":" + self.book_verse))
        # This is the dataset we will mainly be operating on, since
        # it will contain the verse and the reference seperated out
        self.verse_reference = Group(self.verse) + Suppress('-') + Group(
            self.reference)

    # Must return a dataset with these
    def get_result(self):
        # Reading input stream line by line
        for line in self.stream:
            logging.debug("Parsing line: {}".format(line))
            parsed_line = self.verse_reference.parseString(line)
            self.result['body'] = parsed_line[0]
            logging.debug("Body for this line is: {}".format(
                self.result['body']))
            # The identifier for each line when this file format is used
            # is the verse reference
            self.result['identifier'] = "{} {}".format(
                ' '.join(parsed_line[1][:-1]), ''.join(parsed_line[1][-1]))
            logging.debug("Identifier for this line is {}".format(
                self.result['identifier']))
            # The extra attribute 'identifier_position' is so that the
            # consumer has a way of knowing if the identifer should
            # go before the line starts, or after the line starts.
            self.result['extras'] = {'identifier_position': 'off'}
            self.result['extras']['verse_references'] = {
                self.result['identifier']
            }
            yield self.result
Пример #28
0
    def parse_pabl(self, raw_pabl):
        INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(
            self.check_sub_indent)
        UNDENT = FollowedBy(empty).setParseAction(self.check_unindent)
        UNDENT.setParseAction(self.unindent)

        terminator = Literal(';').suppress()
        comment = Literal('#') + restOfLine
        item_name = Word(alphas, alphanums + '_')
        variable = Word(alphas, alphanums + '_.')
        variable_as = (variable + 'as' + item_name)

        stmt = Forward()
        suite = Group(
            OneOrMore(empty + stmt.setParseAction(self.check_peer_indent)))
        suite.ignore(comment)

        item_start = Literal('@item').suppress()
        item_end = Literal(':').suppress()
        permission_start = Literal('@permissions')

        item_decl = (item_start + item_name.setResultsName('item') + item_end)
        item_defn = Group(item_decl + INDENT + suite + UNDENT)

        permission_decl = (permission_start + Group(
            delimitedList(item_name).setResultsName('permissions')) + item_end)
        permission_defn = Group(permission_decl + INDENT + suite + UNDENT)

        fieldList = delimitedList(
            Group(variable_as) | variable
        ).setResultsName('fields') + terminator

        stmt << (item_defn | fieldList | Group(permission_defn))

        parseTree = suite.parseString(raw_pabl)

        return parseTree
    def read_tgf(path):
        try:
            from pyparsing import Word, alphanums, ZeroOrMore, White, Suppress, Group, ParseException, Optional
        except ImportError:
            raise ImportError("read_tgf requires pyparsing")

        if not isinstance(path, str):
            return

        # Define tgf grammar
        s = White(" ")
        tag = Word(alphanums)
        arg = Word(alphanums)
        att = Group(arg + Suppress(s) + arg + Optional(Suppress(s) + tag))
        nl = Suppress(White("\n"))

        graph = Group(ZeroOrMore(arg + nl)) + Suppress("#") + nl + Group(
            ZeroOrMore(att + nl) + ZeroOrMore(att))

        f = open(path, 'r')
        f = f.read()

        head, tail = ntpath.split(path)
        framework = ArgumentationFramework(tail)

        try:
            parsed = graph.parseString(f)
        except ParseException as e:
            raise e

        for arg in parsed[0]:
            framework.add_argument(arg)

        for att in parsed[1]:
            framework.add_attack(att[0], att[1])

        return framework
Пример #30
0
class JSONCommandLineParser(object):
    def __init__(self):
        dash = Word("-",max=2)
        operator = oneOf(": =")
        
        argValueType1 = quotedString.setParseAction(removeQuotes)
        argValueType2 = Regex("[a-zA-Z0-9_\./]+")
        
        positionalArgument = (argValueType1 | argValueType2)
        regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2))
        novalueArgument = Combine(dash + Word(alphas))
        
        arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument)
        
        self.parser = Group(Word(alphas) + arguments).setResultsName("command") + SkipTo(Word("{[")).setParseAction(self.jsonData)
        
    def jsonData(self, commandtext, location, tokens):
        startindex = location + len("".join(tokens))
        return commandtext[startindex:]
    
    def parseString(self, querystring):
        result = self.parser.parseString(querystring)
        return {"command":result.command,
                "filedata":result[1]}
Пример #31
0
class SExpressionParser(object):
    def __init__(self):
        self.lpar = Literal('(')
        self.rpar = Literal(')')

        self.word_chars = ''.join(c for c in printables if c not in ('()'))
        self.word = Word(self.word_chars) | quotedString
        self.atom = self.word

        self.expression = Forward()

        self.composite_expression = (
            Suppress(self.lpar) +
            ZeroOrMore(self.expression) +
            Suppress(self.rpar))('composite_expression')
        self.composite_expression.addParseAction(
            self._composite_expression_to_tuple)

        self.expression << (self.atom | self.composite_expression)

        self.expressions = Group(ZeroOrMore(self.expression))('expressions')
        self.expressions.addParseAction(self._expressions_to_tuple)

    def parse_expression(self, instring):
        return self.expression.parseString(instring, parseAll=True)[0]

    def parse_expressions(self, instring):
        return self.expressions.parseString(instring, parseAll=True)[0]

    @staticmethod
    def _composite_expression_to_tuple(toks):
        return SExpression(toks.composite_expression)

    @staticmethod
    def _expressions_to_tuple(toks):
        return SExpressionList(toks.expressions)
Пример #32
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.remove_inline_closing_tags(ofc)
        try:
          return self.parser.parseString(ofc).asDict()
        except ParseException:
          fixed_ofc = self.fix_ofc(ofc)
          return self.parser.parseString(fixed_ofc).asDict()

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc)
        filled_ofc = re.sub(expression % 'CHKNUM', replacement % 'CHKNUM' , ofc)

        return filled_ofc

    def _inject_tags(self, ofc):
        tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
Пример #33
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" +
                             ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.add_zero_to_empty_ledger_tag(ofc)
        ofc = self.remove_inline_closing_tags(ofc)
        ofc = ofxtools.util.strip_empty_tags(ofc)
        ofc = self._translate_chknum_to_checknum(ofc)
        # if you don't have a good stomach, skip this part
        # XXX:needs better solution
        import sys
        sys.setrecursionlimit(5000)
        try:
            return self.parser.parseString(ofc).asDict()
        except ParseException:
            fixed_ofc = self.fix_ofc(ofc)
            return self.parser.parseString(fixed_ofc).asDict()

    def add_zero_to_empty_ledger_tag(self, ofc):
        """
        Fix an OFC, by adding zero to LEDGER blank tag
        """
        return re.compile(r'<LEDGER>(\D*\n)',
                          re.UNICODE).sub(r'<LEDGER>0\1', ofc)

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID', ofc)
        filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM',
                            ofc)

        return filled_ofc

    def _translate_chknum_to_checknum(self, ofc):
        """
        Some banks put an CHKNUM instead of CHECKNUM. this method translates
        CHKNUM to CHECKNUM in order to parse this information correctly
        """
        return re.sub('CHKNUM', 'CHECKNUM', ofc)

    def _inject_tags(self, ofc):
        tags = "<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>',
                                                      '</ACCTSTMT>\n</OFC>')
Пример #34
0
 def import_file(self, file):
     triple = Group(_literal + _literal + _literal + Literal('.').suppress())
     
     for line in file:
         tokens = triple.parseString(line)
         self.add_triples(*[tuple(t) for t in tokens])
Пример #35
0
class Parser(object):

    """A parser class for solving simple
    data accesses and super-indexing data

    :param data: Trace Object
    :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child
        class (like :mod:`trappy.ftrace.FTrace`)

    :param pvars: A dictionary of variables that need to be
        accessed from within the grammar
    :type pvars: dict

    :param method: The method to be used for reindexing data
        This can be one of the standas :mod:`pandas.DataFrame`
        methods (eg. pad, bfill, nearest). The default is pad
        or use the last valid observation.
    :type method: str

    :param limit: The number of indices a value will be propagated
        when reindexing. The default is None
    :type limit: int

    :param fill: Whether to fill the NaNs in the data.
        The default value is True.
    :type fill: bool

    :param window: A window of time in which to apply the data
        accesses.  By default the data accesses happen accross the
        whole trace.  With the window parameter you can limit it to a
        window of time inside the trace.  The first element of the
        tuple is the starting time and the second the ending time (set
        to None for end of trace).

    :type window: tuple

    :param filters: Restrict the parsing to the rows that match the
        specified criteria. For Example:
        ::

            filters =
                    {
                        "pid": 3338,
                        "cpu": [0, 2, 4],
                    }

        will only consider rows whose pid column is 3338 and cpu is
        either 0, 2 or 4.
    :type filters: dict

    - **Operators**

        +----------------+----------------------+---------------+
        | Operation      |      operator        | Associativity |
        +================+======================+===============+
        | Exponentiation | \*\*                 |    Left       |
        +----------------+----------------------+---------------+
        |Unary           | \-                   |    Right      |
        +----------------+----------------------+---------------+
        | Multiply/Divide| \*, /, //, %         |    Left       |
        +----------------+----------------------+---------------+
        | Add/Subtract   | +, \-,               |    Left       |
        +----------------+----------------------+---------------+
        | Comparison     | >, <, >=, <=, ==, != |    Left       |
        +----------------+----------------------+---------------+
        | Logical        | &&, ||, \|, &        |    Left       |
        +----------------+----------------------+---------------+

    - **Data Accessors**

        Since the goal of the grammar is to provide an
        easy language to access and compare data
        from a :mod:`trappy.trace.FTrace` object. The parser provides
        a simple notation to access this data.

        *Statically Defined Events*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("trappy.thermal.Thermal:temp * 2")

        *Aliasing*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            pvars = {"THERMAL": trappy.thermal.Thermal}
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace, pvars=pvars)
            parser.solve("THERMAL:temp * 2")

        *Using Event Name*
        ::

            import trappy
            from trappy.stats.grammar import Parser
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("thermal:temp * 2")

        The event :mod:`trappy.thermal.Thermal` is aliased
        as **thermal** in the grammar

        *Dynamic Events*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            # Register Dynamic Event
            cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name")

            pvars = {"CUSTOM": cls}
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace, pvars=pvars)
            parser.solve("CUSTOM:col * 2")

        .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace`

    """

    def __init__(self, data, pvars=None, window=(0, None), filters=None, **kwargs):
        if pvars is None:
            pvars = {}

        self.data = data
        self._pvars = pvars
        self._accessor = Group(
            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process)
        self._inspect = Group(
            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info)
        self._parse_expr = get_parse_expression(
            self._parse_func, self._parse_var_id)
        self._agg_df = pd.DataFrame()
        self._pivot_set = set()
        self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT)
        self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT)
        self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT)
        self._window = window
        self._filters = filters

    def solve(self, expr):
        """Parses and solves the input expression

        :param expr: The input expression
        :type expr: str

        :return: The return type may vary depending on
            the expression. For example:

            **Vector**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("trappy.thermal.Thermal:temp * 2")

            **Scalar**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("numpy.mean(trappy.thermal.Thermal:temp)")

            **Vector Mask**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("trappy.thermal.Thermal:temp > 65000")
        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]


        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]


        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]

    def _pivot(self, cls, column):
        """Pivot Data for concatenation"""

        data_frame = self._get_data_frame(cls)
        if data_frame.empty:
            raise ValueError("No events found for {}".format(cls.name))

        data_frame = handle_duplicate_index(data_frame)
        new_index = self._agg_df.index.union(data_frame.index)

        if hasattr(cls, "pivot") and cls.pivot:
            pivot = cls.pivot
            pivot_vals = list(np.unique(data_frame[pivot].values))
            data = {}


            for val in pivot_vals:
                data[val] = data_frame[data_frame[pivot] == val][[column]]
                if len(self._agg_df):
                    data[val] = data[val].reindex(
                        index=new_index,
                        method=self._method,
                        limit=self._limit)

            return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)

        if len(self._agg_df):
            data_frame = data_frame.reindex(
                index=new_index,
                method=self._method,
                limit=self._limit)

        return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
                         [column]]}, axis=1).swaplevel(0, 1, axis=1)

    def _pre_process(self, tokens):
        """Pre-process accessors for super-indexing"""

        params = tokens[0]
        if params[1] in self._agg_df.columns:
            return self._agg_df[params[1]]

        event = params[0]
        column = params[1]

        if event in self._pvars:
            cls = self._pvars[event]
        elif event in self.data.class_definitions:
            cls = self.data.class_definitions[event]
        else:
            try:
                cls = str_to_attr(event)
            except KeyError:
                raise ValueError(
                    "Can't find parser class for event {}".format(event))

        data_frame = self._pivot(cls, column)
        self._agg_df = pd.concat(
            [self._agg_df, data_frame], axis=1)

        if self._fill:
            self._agg_df = self._agg_df.fillna(method="pad")

        return self._agg_df[params[1]]

    def _parse_for_info(self, tokens):
        """Parse Action for inspecting data accessors"""

        params = tokens[0]
        cls = params[0]
        column = params[1]
        info = {}
        info["pivot"] = None
        info["pivot_values"] = None

        if cls in self._pvars:
            cls = self._pvars[cls]
        elif cls in self.data.class_definitions:
            cls = self.data.class_definitions[cls]
        else:
            cls = str_to_attr(cls)

        data_frame = self._get_data_frame(cls)

        info["class"] = cls
        info["length"] = len(data_frame)
        if hasattr(cls, "pivot") and cls.pivot:
            info["pivot"] = cls.pivot
            info["pivot_values"] = list(np.unique(data_frame[cls.pivot]))
        info["column"] = column
        info["column_present"] = column in data_frame.columns
        return info

    def _parse_var_id(self, tokens):
        """A function to parse a variable identifier
        """

        params = tokens[0]
        try:
            return float(params)
        except (ValueError, TypeError):
            try:
                return self._pvars[params]
            except KeyError:
                return self._agg_df[params[1]]

    def _parse_func(self, tokens):
        """A function to parse a function string"""

        params = tokens[0]
        func_name = params[0]
        if func_name in self._pvars and isinstance(
                self._pvars[func_name],
                types.FunctionType):
            func = self._pvars[func_name]
        else:
            func = str_to_attr(params[0])
        return func(*params[1])

    def _get_data_frame(self, cls):
        """Get the data frame from the BareTrace object, applying the window
        and the filters"""

        data_frame = getattr(self.data, cls.name).data_frame

        if data_frame.empty:
            return data_frame
        elif self._window[1] is None:
            data_frame = data_frame.loc[self._window[0]:]
        else:
            data_frame = data_frame.loc[self._window[0]:self._window[1]]

        if self._filters:
            criterion = pd.Series([True] * len(data_frame),
                                  index=data_frame.index)

            for filter_col, wanted_vals in self._filters.iteritems():
                try:
                    dfr_col = data_frame[filter_col]
                except KeyError:
                    continue

                criterion &= dfr_col.isin(listify(wanted_vals))

            data_frame = data_frame[criterion]

        return data_frame

    def ref(self, mask):
        """Reference super indexed data with a boolean mask

        :param mask: A boolean :mod:`pandas.Series` that
            can be used to reference the aggregated data in
            the parser
        :type mask: :mod:`pandas.Series`

        :return: aggregated_data[mask]
        """

        return self._agg_df[mask]

    def inspect(self, accessor):
        """A function to inspect the accessor for information

        :param accessor: A data accessor of the format
            <event>:<column>
        :type accessor: str

        :return: A dictionary of information
        """
        return self._inspect.parseString(accessor)[0]
Пример #36
0
class DefinitionParser(object):
    _str = set([str, unicode])

    lb = "["
    rb = "]"
    lp = "("
    rp = ")"
    left_defa = '<'
    right_defa = '>'

    clause_sep = ","
    part_sep = ";"
    prime = "'"
    hyphen = "-"
    langspec_pre = "$"  # starts langspec deep case
    unary_p = re.compile("^[a-z_#\-/0-9]+(/[0-9]+)?$")
    binary_p = re.compile("^[A-Z_0-9]+(/[0-9]+)?$")

    def __init__(self, plur_dict):
        self.plur_dict = plur_dict
        self.init_parser()

    @classmethod
    def _is_binary(cls, s):
        return ((type(s) in cls._str and cls.binary_p.match(s)) or
                (type(s) is list and s[0] == deep_pre and s[1] == "REL"))

    @classmethod
    def _is_unary(cls, s):
        return ((type(s) in cls._str and cls.unary_p.match(s) is not None) or
                (type(s) is list and (
                    (s[0] == deep_pre) or
                    (s[0] == cls.langspec_pre) or
                    (s[0] == enc_pre) or
                    (s[0] == cls.left_defa)
                )))

    @classmethod
    def _is_deep_case(cls, s):
        return s in deep_cases

    def init_parser(self):
        self.lb_lit = Literal(DefinitionParser.lb)
        self.rb_lit = Literal(DefinitionParser.rb)
        self.lp_lit = Literal(DefinitionParser.lp)
        self.rp_lit = Literal(DefinitionParser.rp)
        self.left_defa_lit = Literal(DefinitionParser.left_defa)
        self.right_defa_lit = Literal(DefinitionParser.right_defa)

        self.clause_sep_lit = Literal(DefinitionParser.clause_sep)
        self.part_sep_lit = Literal(DefinitionParser.part_sep)
        self.prime_lit = Literal(DefinitionParser.prime)
        self.hyphen_lit = Literal(DefinitionParser.hyphen)
        self.enc_pre_lit = Literal(enc_pre)
        self.deep_pre_lit = Literal(deep_pre)
        self.avm_pre_lit = Literal(avm_pre)
        self.langspec_pre_lit = Literal(DefinitionParser.langspec_pre)
        self.id_sep_lit = Literal(id_sep)

        self.disambig_id = self.id_sep_lit + Word(nums)

        self.deep_cases = Group(self.deep_pre_lit + Word(string.uppercase))

        self.unary = Forward()
        self.unary << (Combine(Optional("-") +
                       Word(string.lowercase + "_" + nums) +
                       Optional(self.disambig_id))
                       | self.deep_cases
                       | Group(self.langspec_pre_lit +
                               Word(string.uppercase + "_"))
                       | Group(self.avm_pre_lit +
                               Word(string.ascii_letters + "_"))
                       | Group(self.enc_pre_lit + Word(alphanums + "_-"))
                       | Group(self.left_defa_lit + self.unary +
                               self.right_defa_lit))

        self.binary = (Combine(Word(string.uppercase + "_" + nums) +
                       Optional(self.disambig_id))
                       | Group(self.deep_pre_lit + 'REL'))
        self.dontcare = SkipTo(LineEnd())

        # main expression
        self.expression = Forward()
        self.binexpr = Forward()
        self.unexpr = Forward()
        self.argexpr = Forward()

        # "enumerable expression"
        # D -> E | E, D
        self.definition = Group(delimitedList(self.expression,
                                delim=DefinitionParser.clause_sep))

        self.expression << Group(
            # E -> UE
            (self.unexpr) ^

            # E -> BE
            (self.binexpr) ^

            # E -> U ( E )
            (self.unary + self.lp_lit + self.expression + self.rp_lit) ^

            # E -> < E >
            (self.left_defa_lit + self.expression + self.right_defa_lit)
        )

        self.binexpr << Group(
            # BE -> A B
            (self.argexpr + self.binary) ^

            # BE -> B A
            (self.binary + self.argexpr) ^

            # BE -> A B A
            (self.argexpr + self.binary + self.argexpr) ^

            # BE -> B [ E; E ]
            (self.binary + self.lb_lit + self.expression + self.part_sep_lit
             + self.expression + self.rb_lit)
        )

        self.unexpr << Group(
            # UE -> U
            (self.unary) ^

            # UE -> U [ D ]
            (self.unary + self.lb_lit + self.definition + self.rb_lit) ^

            # UE -> U ( U )
            (self.unary + self.lp_lit + self.unary + self.rp_lit)
        )

        self.argexpr << Group(
            # A -> UE
            (self.unexpr) ^

            # A -> [ D ]
            (self.lb_lit + self.definition + self.rb_lit) ^

            # A -> < A >
            (self.left_defa_lit + self.argexpr + self.right_defa_lit) ^

            # A -> '
            (self.prime_lit)
        )

        self.hu, self.pos, self.en, self.lt, self.pt = (
            Word(alphanums + "#-/_.'"),) * 5
        self.defid = Word(nums)
        self.word = Group(self.hu + self.pos + self.en + self.lt + self.pt)

        # S -> W : D | W : D % _
        #self.sen = self.definition + LineEnd()

    def parse(self, s):
        return self.definition.parseString(s, parseAll=True).asList()

    def create_machine(self, name, partitions):
        # lists are accepted because of ["=", "AGT"]
        if type(name) is list:
            name = "".join(name)

        # HACK until we find a good solution for defaults
        name = name.strip('<>')

        is_plur = name in self.plur_dict
        if is_plur:
            name = self.plur_dict[name]

        m = Machine(decode_from_proszeky(name),
                    ConceptControl(), partitions)
        if is_plur:
            m.append(self.create_machine('more', 1), 0)

        return m

    def unify(self, machine):
        def __collect_machines(m, machines, is_root=False):
            # cut the recursion
            key = m.printname(), __has_other(m)
            if (key in machines and m in machines[key]):
                return

            if not is_root:
                machines[m.printname(), __has_other(m)].append(m)
            for partition in m.partitions:
                for m_ in partition:
                    __collect_machines(m_, machines)

        def __has_other(m):
            for m_ in m.partitions[0]:
                if m_.printname() == "other":
                    return True
            return False

        def __get_unified(machines, res=None):
            # if nothing to unify, don't
            if len(machines) == 1:
                return machines[0]

            # if a return machine is given, don't create a new one
            if res is None:
                prototype = machines[0]
                res = self.create_machine(prototype.printname(),
                                          len(prototype.partitions))
            for m in machines:
                # if the same machine, don't add anything
                if id(m) == id(res):
                    continue

                for p_i, p in enumerate(m.partitions):
                    for part_m in p:
                        if part_m.printname() != "other":
                            res.partitions[p_i].append(part_m)

                            part_m.del_parent_link(m, p_i)
                            part_m.add_parent_link(res, p_i)

            return res

        def __replace(where, for_what, is_other=False, visited=None):
            if visited is None:
                visited = set()

            if id(where) in visited:
                return

            visited.add(id(where))

            pn = for_what.printname()
            for p_i, p in enumerate(where.partitions):
                # change the partition machines
                for part_m_i, part_m in enumerate(p):
                    if part_m.printname() == pn and __has_other(
                            part_m) == is_other:
                        where.partitions[p_i][part_m_i] = for_what
                        for_what.add_parent_link(where, p_i)
                    __replace(where.partitions[p_i][part_m_i],
                              for_what, is_other, visited)

                # unification if there is a machine more than once on the same
                # partition
                where.partitions[p_i] = list(set(p))

        machines = defaultdict(list)
        __collect_machines(machine, machines, is_root=True)
        for k, machines_to_unify in machines.iteritems():

            if len(machines_to_unify[0].partitions) > 1:
                continue

            printname, is_other = k
            #if unification affects the root (machine),
            #be that the result machine
            if printname == machine.printname():
                unified = __get_unified(machines_to_unify, machine)
            else:
                unified = __get_unified(machines_to_unify)
            __replace(machine, unified, is_other)

    def __parse_expr(self, expr, root, loop_to_defendum=True,
                     three_parts=False):
        """
        creates machines from a parse node and its children
        there should be one handler for every rule
        """

        logging.debug("Parsing expression: {0}".format(expr))

        # name shortening for classmethods
        cls = DefinitionParser

        is_binary = cls._is_binary
        is_unary = cls._is_unary
        is_tree = lambda r: type(r) == list

        left_part = 0 + int(three_parts)
        right_part = 1 + int(three_parts)
        most_part = 2 + int(three_parts)

        if (len(expr) == 1):
            # UE -> U
            if (is_unary(expr[0])):
                logging.debug("Parsing {0} as a unary.".format(expr[0]))
                return [self.create_machine(expr[0], 1)]

            # E -> UE | BE, A -> UE
            if (is_tree(expr[0])):
                logging.debug("Parsing {0} as a tree.".format(expr[0]))
                return self.__parse_expr(expr[0], root, loop_to_defendum,
                                         three_parts)

        if (len(expr) == 2):
            # BE -> A B
            if (is_tree(expr[0]) and
                    is_binary(expr[1])):
                m = self.create_machine(expr[1], most_part)
                if expr[0] != ["'"]:
                    m.append_all(
                        self.__parse_expr(expr[0], root, loop_to_defendum,
                                          three_parts),
                        left_part)
                if loop_to_defendum:
                    m.append(root, right_part)
                return [m]

            # BE -> B A
            if (is_binary(expr[0]) and
                    is_tree(expr[1])):
                m = self.create_machine(expr[0], most_part)
                if expr[1] != ["'"]:
                    m.append_all(
                        self.__parse_expr(expr[1], root, loop_to_defendum,
                                          three_parts),
                        right_part)
                if loop_to_defendum:
                    m.append(root, left_part)
                return [m]

            # BE -> 'B
            if (expr[0] == ["'"] and
                    is_binary(expr[1])):
                m = self.create_machine(expr[1], most_part)
                #m.append(parent, 1)
                if loop_to_defendum:
                    m.append(root, right_part)
                return [m]

            # BE -> B'
            if (is_binary(expr[0]) and
                    expr[1] == ["'"]):
                m = self.create_machine(expr[0], most_part)
                # m.append(parent, 0)
                if loop_to_defendum:
                    m.append(root, left_part)
                return [m]

            # U -> =AGT
            if expr[0] == deep_pre:
                return [self.create_machine(deep_pre + expr[1], 1)]

            # U -> $HUN_FROM
            if (expr[0] == cls.langspec_pre):
                return [self.create_machine(cls.langspec_pre + expr[1], 1)]

            # U -> #AVM
            if (expr[0] == avm_pre):
                return [self.create_machine(avm_pre + expr[1], 1)]

            # U -> @External_url
            if (expr[0] == enc_pre):
                return [self.create_machine(enc_pre + expr[1], 1)]

        if (len(expr) == 3):
            # UB -> A B A
            if (is_tree(expr[0]) and
                    is_binary(expr[1]) and
                    is_tree(expr[2])):
                m = self.create_machine(expr[1], most_part)
                logging.debug(expr[1])
                if expr[0] != [DefinitionParser.prime]:
                    logging.debug(expr[0])
                    m.append_all(
                        self.__parse_expr(expr[0], root, loop_to_defendum,
                                          three_parts),
                        left_part)
                if expr[2] != [DefinitionParser.prime]:
                    m.append_all(
                        self.__parse_expr(expr[2], root, loop_to_defendum,
                                          three_parts),
                        right_part)
                return [m]

            # A -> [ D ]
            if (expr[0] == "[" and
                    is_tree(expr[1]) and
                    expr[2] == "]"):
                logging.debug(
                    "Parsing expr {0} as an embedded definition".format(expr))
                res = list(
                    self.__parse_definition(expr[1], root, loop_to_defendum,
                                            three_parts))
                return res

            # E -> < E >, U -> < U >
            if expr[0] == '<' and expr[2] == '>':
                logging.debug('E -> < E >' + str(expr[1]))
                return list(self.__parse_expr(expr[1], root, loop_to_defendum,
                                              three_parts))

        if (len(expr) == 4):
            # UE -> U ( U )
            # E -> U ( BE ) provisional
            if (is_unary(expr[0]) and
                    expr[1] == "(" and
                    expr[3] == ")"):
                logging.debug('X -> U ( Y )')
                if is_unary(expr[2]):
                    m = self.create_machine(expr[2], 1)
                else:
                    m = self.__parse_expr(expr[2], root, loop_to_defendum,
                                          three_parts)[0]
                    if not three_parts:
                        logging.warning(
                            "for 0th partition of binary machines, " +
                            "set three_parts=True, "+str(expr))
                m.append(self.create_machine(expr[0], 1), 0)
                return [m]

            # UE -> U [ D ]
            if (is_unary(expr[0]) and
                    expr[1] == "[" and
                    is_tree(expr[2]) and
                    expr[3] == "]"):
                m = self.create_machine(expr[0], 1)
                for parsed_expr in self.__parse_definition(expr[2], root,
                                                           loop_to_defendum,
                                                           three_parts):
                    m.append(parsed_expr, 0)
                return [m]

            # E -> U ( BE )
            #if (is_unary(expr[0]) and
            #        expr[1] == "(" and
            #        is_tree(expr[2]) and
            #        expr[3] == ")"):
            #    ms = self.__parse_expr(expr[2], root, loop_to_defendum,
            #                           three_parts)
            #    # if BE was an expression with an apostrophe, then
            #    # return of __parse_expr() is None
            #    if len(ms) != 0:
            #        ms[0].append(self.create_machine(expr[0], 1), 0)
            #    # if len(ms) == 3 and ms[0] == '<':
            #    #        ms = ms[1]
            #    if len(ms) != 1:
            #        logging.warning("0th partition of binary machines " +
            #                        "is not implemented "+str(ms))
            #    return ms
            logging.warning('machine cannot be built '+str(expr))

        if (len(expr) == 6):
            # BE -> B [E; E]
            if (is_binary(expr[0]) and
                    expr[1] == "[" and
                    is_tree(expr[2]) and
                    expr[3] == ";" and
                    is_tree(expr[4]) and
                    expr[5] == "]"):
                m = self.create_machine(expr[0], 2)
                m.append_all(
                    self.__parse_expr(expr[2], m, root, loop_to_defendum,
                                      three_parts),
                    0)
                m.append_all(
                    self.__parse_expr(expr[4], m, root, loop_to_defendum,
                                      three_parts),
                    1)
                return [m]

        pe = ParserException(
            "Unknown expression in definition: {0} (len={1})".format(
                expr,
                len(expr)))
        logging.debug(str(pe))
        logging.debug(expr)
        raise pe

    def __parse_definition(self, definition, root, loop_to_defendum=True,
                           three_parts=False):
        logging.debug(str(definition))
        for d in definition:
            yield self.__parse_expr(d, root, loop_to_defendum, three_parts)[0]

    def parse_into_machines(self, string, printname_index=0, add_indices=False,
                            loop_to_defendum=True, three_parts=False):
        printname = string.split('\t')[printname_index]
        try:
            id_, urob, pos, def_, comment = string.split('\t')[4:]
        except:
            raise Exception(string.split('\t'))

        machine = self.create_machine(printname.lower(), 1)
        #TODO =AGT -> partition 1, =PAT -> partition 2, =TO -> ?

        if add_indices:
            machine.printname_ = machine.printname() + id_sep + id_

        if def_ != '':
            logging.debug(def_)
            parsed = self.parse(def_)
            logging.debug(parsed)
            for parsed_expr in self.__parse_definition(
                    parsed[0], machine, loop_to_defendum, three_parts):
                machine.append(parsed_expr, 0)

        self.unify(machine)
        return machine
Пример #37
0
    curCol = col(l,s)
    if not(curCol < indentStack[-1] and curCol <= indentStack[-2]):
        raise ParseException(s,l,"not an unindent")

def doUnindent():
    indentStack.pop()
    
INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(checkSubIndent)
UNDENT = FollowedBy(empty).setParseAction(checkUnindent)
UNDENT.setParseAction(doUnindent)

stmt = Forward()
suite = Group( OneOrMore( empty + stmt.setParseAction( checkPeerIndent ) )  )

identifier = Word(alphas, alphanums)
funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
funcDef = Group( funcDecl + INDENT + suite + UNDENT )

rvalue = Forward()
funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
rvalue << (funcCall | identifier | Word(nums))
assignment = Group(identifier + "=" + rvalue)
stmt << ( funcDef | assignment | identifier )

print(data)
parseTree = suite.parseString(data)

import pprint
pprint.pprint( parseTree.asList() )

Пример #38
0
class Parser(object):

    """A parser class for solving simple
    data accesses and super-indexing data

    :param data: Trace Object
    :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child
        class (like :mod:`trappy.ftrace.FTrace`)

    :param pvars: A dictionary of variables that need to be
        accessed from within the grammar
    :type pvars: dict

    :param method: The method to be used for reindexing data
        This can be one of the standas :mod:`pandas.DataFrame`
        methods (eg. pad, bfill, nearest). The default is pad
        or use the last valid observation.
    :type method: str

    :param limit: The number of indices a value will be propagated
        when reindexing. The default is None
    :type limit: int

    :param fill: Whether to fill the NaNs in the data.
        The default value is True.
    :type fill: bool

    :param window: A window of time in which to apply the data
        accesses.  By default the data accesses happen accross the
        whole trace.  With the window parameter you can limit it to a
        window of time inside the trace.  The first element of the
        tuple is the starting time and the second the ending time (set
        to None for end of trace).

    :type window: tuple

    - **Operators**

        +----------------+----------------------+---------------+
        | Operation      |      operator        | Associativity |
        +================+======================+===============+
        | Exponentiation | \*\*                 |    Left       |
        +----------------+----------------------+---------------+
        |Unary           | \-                   |    Right      |
        +----------------+----------------------+---------------+
        | Multiply/Divide| \*, /, //, %         |    Left       |
        +----------------+----------------------+---------------+
        | Add/Subtract   | +, \-,               |    Left       |
        +----------------+----------------------+---------------+
        | Comparison     | >, <, >=, <=, ==, != |    Left       |
        +----------------+----------------------+---------------+
        | Logical        | &&, ||, \|, &        |    Left       |
        +----------------+----------------------+---------------+

    - **Data Accessors**

        Since the goal of the grammar is to provide an
        easy language to access and compare data
        from a :mod:`trappy.trace.FTrace` object. The parser provides
        a simple notation to access this data.

        *Statically Defined Events*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("trappy.thermal.Thermal:temp * 2")

        *Aliasing*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            pvars = {}
            pvars["THERMAL"] = trappy.thermal.Thermal
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("THERMAL:temp * 2")

        *Using Event Name*
        ::

            import trappy
            from trappy.stats.grammar import Parser
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("thermal:temp * 2")

        The event :mod:`trappy.thermal.Thermal` is aliased
        as **THERMAL** in the grammar

        *Dynamic Events*
        ::

            import trappy
            from trappy.stats.grammar import Parser

            # Register Dynamic Event
            cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name")

            pvars = {}
            pvars["CUSTOM"] = cls
            trace = trappy.FTrace("path/to/trace/file")
            parser = Parser(trace)
            parser.solve("CUSTOM:col * 2")

        .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace`

    """

    def __init__(self, data, pvars=None, window=(0, None), **kwargs):
        if pvars is None:
            pvars = {}

        self.data = data
        self._pvars = pvars
        self._accessor = Group(
            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process)
        self._inspect = Group(
            FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info)
        self._parse_expr = get_parse_expression(
            self._parse_func, self._parse_var_id)
        self._agg_df = pd.DataFrame()
        self._pivot_set = set()
        self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT)
        self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT)
        self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT)
        self._window = window

    def solve(self, expr):
        """Parses and solves the input expression

        :param expr: The input expression
        :type expr: str

        :return: The return type may vary depending on
            the expression. For example:

            **Vector**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("trappy.thermal.Thermal:temp * 2")

            **Scalar**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("numpy.mean(trappy.thermal.Thermal:temp)")

            **Vector Mask**
            ::

                import trappy
                from trappy.stats.grammar import Parser

                trace = trappy.FTrace("path/to/trace/file")
                parser = Parser(trace)
                parser.solve("trappy.thermal.Thermal:temp > 65000")
        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]


        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]


        """

        # Pre-process accessors for indexing
        self._accessor.searchString(expr)
        return self._parse_expr.parseString(expr)[0]

    def _pivot(self, cls, column):
        """Pivot Data for concatenation"""

        data_frame = self._get_data_frame(cls)
        data_frame = handle_duplicate_index(data_frame)
        new_index = self._agg_df.index.union(data_frame.index)

        if hasattr(cls, "pivot") and cls.pivot:
            pivot = cls.pivot
            pivot_vals = list(np.unique(data_frame[pivot].values))
            data = {}


            for val in pivot_vals:
                data[val] = data_frame[data_frame[pivot] == val][[column]]
                if len(self._agg_df):
                    data[val] = data[val].reindex(
                        index=new_index,
                        method=self._method,
                        limit=self._limit)

            return pd.concat(data, axis=1).swaplevel(0, 1, axis=1)

        if len(self._agg_df):
            data_frame = data_frame.reindex(
                index=new_index,
                method=self._method,
                limit=self._limit)

        return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[
                         [column]]}, axis=1).swaplevel(0, 1, axis=1)

    def _pre_process(self, tokens):
        """Pre-process accessors for super-indexing"""

        params = tokens[0]
        if params[1] in self._agg_df.columns:
            return self._agg_df[params[1]]

        cls = params[0]
        column = params[1]

        if cls in self._pvars:
            cls = self._pvars[cls]
        elif cls in self.data.class_definitions:
            cls = self.data.class_definitions[cls]
        else:
            cls = str_to_attr(cls)

        data_frame = self._pivot(cls, column)
        self._agg_df = pd.concat(
            [self._agg_df, data_frame], axis=1)

        if self._fill:
            self._agg_df = self._agg_df.fillna(method="pad")

        return self._agg_df[params[1]]

    def _parse_for_info(self, tokens):
        """Parse Action for inspecting data accessors"""

        params = tokens[0]
        cls = params[0]
        column = params[1]
        info = {}
        info["pivot"] = None
        info["pivot_values"] = None

        if cls in self._pvars:
            cls = self._pvars[cls]
        elif cls in self.data.class_definitions:
            cls = self.data.class_definitions[cls]
        else:
            cls = str_to_attr(cls)

        data_frame = self._get_data_frame(cls)

        info["class"] = cls
        info["length"] = len(data_frame)
        if hasattr(cls, "pivot") and cls.pivot:
            info["pivot"] = cls.pivot
            info["pivot_values"] = list(np.unique(data_frame[cls.pivot]))
        info["column"] = column
        info["column_present"] = column in data_frame.columns
        return info

    def _parse_var_id(self, tokens):
        """A function to parse a variable identifier
        """

        params = tokens[0]
        try:
            return float(params)
        except (ValueError, TypeError):
            try:
                return self._pvars[params]
            except KeyError:
                return self._agg_df[params[1]]

    def _parse_func(self, tokens):
        """A function to parse a function string"""

        params = tokens[0]
        func_name = params[0]
        if func_name in self._pvars and isinstance(
                self._pvars[func_name],
                types.FunctionType):
            func = self._pvars[func_name]
        else:
            func = str_to_attr(params[0])
        return func(*params[1])

    def _get_data_frame(self, cls):
        """Get the data frame from the BareTrace object, applying the window
        if set"""

        data_frame = getattr(self.data, cls.name).data_frame

        if self._window[1] is None:
            data_frame = data_frame.loc[self._window[0]:]
        else:
            data_frame = data_frame.loc[self._window[0]:self._window[1]]

        return data_frame

    def ref(self, mask):
        """Reference super indexed data with a boolean mask

        :param mask: A boolean :mod:`pandas.Series` that
            can be used to reference the aggregated data in
            the parser
        :type mask: :mod:`pandas.Series`

        :return: aggregated_data[mask]
        """

        return self._agg_df[mask]

    def inspect(self, accessor):
        """A function to inspect the accessor for information

        :param accessor: A data accessor of the format
            <event>:<column>
        :type accessor: str

        :return: A dictionary of information
        """
        return self._inspect.parseString(accessor)[0]
Пример #39
0
class QifParser:
    def __init__(self, debug=False):
        account_items       = { 'N' : "Name",
                                'T' : "AccountType",
                                'D' : "Description",
                                'L' : "CreditLimit",
                                'X' : "UnknownField",
                                'B' : "Balance",
                                '/' : "BalanceDate",
                                '$' : "Balance" }
        
        noninvestment_items = { 'D' : "Date",
                                'T' : "Amount",
                                'U' : "Amount2",
                                'C' : "Cleared",
                                'N' : "Number",
                                'P' : "Payee",
                                'M' : "Memo",
                                'L' : "Category",
                                'A' : "Address",
                                'S' : "SplitCategory",
                                'E' : "SplitMemo",
                                '$' : "SplitAmount",
                                '-' : "NegativeSplitAmount" }
        
        investment_items    = { 'D' : "Date",
                                'N' : "Action",
                                'Y' : "Security",
                                'I' : "Price",
                                'Q' : "Quantity",
                                'T' : "Amount",
                                'C' : "Cleared",
                                'P' : "Text",
                                'M' : "Memo",
                                'O' : "Commission",
                                'L' : "TransferAccount",
                                '$' : "TransferAmount" }
        
        category_items      = { 'N' : "Name",
                                'D' : "Description",
                                'T' : "TaxRelated",
                                'I' : "IncomeCategory",
                                'E' : "ExpenseCategory",
                                'B' : "BudgetAmount",
                                'R' : "TaxSchedule" }
        
        class_items         = { 'N' : "Name",
                                'D' : "Description" }
        
        options   = Group(CaselessLiteral('!Option:') + restOfLine).suppress()
        
        banktxns  = Group(CaselessLiteral('!Type:Bank').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("BankTransactions")
        
        cashtxns  = Group(CaselessLiteral('!Type:Cash').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CashTransactions")
        
        ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(),
                              CaselessLiteral('!Type!CCard').suppress()]) + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")
        
        liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + 
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")
        
        invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + 
                          ZeroOrMore(self._items(investment_items))
                          ).setResultsName("InvestmentTransactions")
        
        acctlist  = Group(CaselessLiteral('!Account').suppress() +
                          ZeroOrMore(Or([self._items(account_items, name="AccountInfo")]))
                          ).setResultsName("AccountList")
        
        category  = Group(CaselessLiteral('!Type:Cat').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("CategoryList")
        
        classlist = Group(CaselessLiteral('!Type:Class').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("ClassList")
        
        self.parser = Group(ZeroOrMore(White()).suppress() +
                            ZeroOrMore(acctlist).suppress() +
                            OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) +
                            ZeroOrMore(White()).suppress()
                            ).setResultsName("QifStatement")
        
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, 
                                        ofxtools._ofxtoolsSuccessDebugAction, 
                                        ofxtools._ofxtoolsExceptionDebugAction)
        
    
    def _items(self, items, name="Transaction"):
        item_list = []
        for (code, name) in items.iteritems():
            item = self._item(code, name)
            item_list.append(item)
        return Group(OneOrMore(Or(item_list)) +
                     oneOf('^EUR ^').setResultsName('Currency') +
                     LineEnd().suppress()
                     ).setResultsName(name)
    
    def _item(self, code, name):
        return CaselessLiteral(code).suppress() + \
               restOfLine.setResultsName(name) + \
               LineEnd().suppress()
    
    def parse(self, qif):
        return self.parser.parseString(qif)
Пример #40
0
ViaTok = _paren_stmt(
    "via", OptionalList(AtTok, Size1DTok, DrillTok, LayersTok, NetNumberTok))
SegmentTok = _paren_stmt(
    "segment",
    OptionalList(StartTok, EndTok, WidthTok, LayerTok, NetNumberTok, TStamp))

PCBElementTok = GR_CircleTok | GR_ArcTok | ModuleTok | ViaTok | SegmentTok | NetTok | Net_ClassTok | \
    PageTok | LayerListTok | GeneralSettingsTok | SetupTok
PCBElements = ZeroOrMore(PCBElementTok)

VersionTok = _uint_param("version")
HostTok = _paren_stmt("host", AnystringTok("name"), AnystringTok("version"))
KiCAD_PCBTok = _paren_stmt("kicad_pcb", VersionTok, HostTok, PCBElements)

if __name__ == "__main__":
    result = StartTok.parseString("(start 123.456 789)")
    print(result)
    print(result.x, result.y)
    result = FP_LineTok.parseString(
        "(fp_line (start 1 1) (end 1 2) (layer F.Cu) (width 0.1))")
    print(result)
    print(result.start, result.end, result.layer, result.width)

    test_str = """(pad 1 thru_hole rect (at -0.95 0) (size 0.7 1.3) (drill 0.3) (layers *.Cu *.Mask) (net 2 VO))"""
    result = PadTok.parseString(test_str)
    print(result)

    test_str = """(via (at 150.7 106.1) (size 0.6) (drill 0.4) (layers F.Cu B.Cu) (net 3))"""
    result = ViaTok.parseString(test_str)
    print(result)
Пример #41
0
class DefinitionParser(object):
    _str = set([str, unicode])

    lb = "["
    rb = "]"
    lp = "("
    rp = ")"
    left_defa = '<'
    right_defa = '>'

    clause_sep = ","
    part_sep = ";"
    prime = "'"
    hyphen = "-"
    langspec_pre = "$"  # starts langspec deep case
    unary_p = re.compile("^[a-z_#\-/0-9]+(/[0-9]+)?$")
    binary_p = re.compile("^[A-Z_0-9]+(/[0-9]+)?$")

    def __init__(self, plur_dict):
        self.plur_dict = plur_dict
        self.init_parser()

    @classmethod
    def _is_binary(cls, s):
        return ((type(s) in cls._str and cls.binary_p.match(s)) or
                (type(s) is list and s[0] == deep_pre and s[1] == "REL"))

    @classmethod
    def _is_unary(cls, s):
        return ((type(s) in cls._str and cls.unary_p.match(s) is not None) or
                (type(s) is list and (
                    (s[0] == deep_pre) or
                    (s[0] == cls.langspec_pre) or
                    (s[0] == enc_pre) or
                    (s[0] == cls.left_defa)
                )))

    @classmethod
    def _is_deep_case(cls, s):
        return s in deep_cases

    def init_parser(self):
        self.lb_lit = Literal(DefinitionParser.lb)
        self.rb_lit = Literal(DefinitionParser.rb)
        self.lp_lit = Literal(DefinitionParser.lp)
        self.rp_lit = Literal(DefinitionParser.rp)
        self.left_defa_lit = Literal(DefinitionParser.left_defa)
        self.right_defa_lit = Literal(DefinitionParser.right_defa)

        self.clause_sep_lit = Literal(DefinitionParser.clause_sep)
        self.part_sep_lit = Literal(DefinitionParser.part_sep)
        self.prime_lit = Literal(DefinitionParser.prime)
        self.hyphen_lit = Literal(DefinitionParser.hyphen)
        self.enc_pre_lit = Literal(enc_pre)
        self.deep_pre_lit = Literal(deep_pre)
        self.avm_pre_lit = Literal(avm_pre)
        self.langspec_pre_lit = Literal(DefinitionParser.langspec_pre)
        self.id_sep_lit = Literal(id_sep)

        self.disambig_id = self.id_sep_lit + Word(nums)

        self.deep_cases = Group(self.deep_pre_lit + Word(string.uppercase))

        self.unary = Forward()
        self.unary << (Combine(Optional("-") +
                       Word(string.lowercase + "_" + nums) +
                       Optional(self.disambig_id))
                       | self.deep_cases
                       | Group(self.langspec_pre_lit +
                               Word(string.uppercase + "_"))
                       | Group(self.avm_pre_lit +
                               Word(string.ascii_letters + "_"))
                       | Group(self.enc_pre_lit + Word(alphanums + "_-"))
                       | Group(self.left_defa_lit + self.unary +
                               self.right_defa_lit))

        self.binary = (Combine(Word(string.uppercase + "_" + nums) +
                       Optional(self.disambig_id))
                       | Group(self.deep_pre_lit + 'REL'))
        self.dontcare = SkipTo(LineEnd())

        # main expression
        self.expression = Forward()
        self.binexpr = Forward()
        self.unexpr = Forward()
        self.argexpr = Forward()

        # "enumerable expression"
        # D -> E | E, D
        self.definition = Group(delimitedList(self.expression,
                                delim=DefinitionParser.clause_sep))

        self.expression << Group(
            # E -> UE
            (self.unexpr) ^

            # E -> BE
            (self.binexpr) ^

            # E -> U ( E )
            (self.unary + self.lp_lit + self.expression + self.rp_lit) ^

            # E -> < E >
            (self.left_defa_lit + self.expression + self.right_defa_lit)
        )

        self.binexpr << Group(
            # BE -> A B
            (self.argexpr + self.binary) ^

            # BE -> B A
            (self.binary + self.argexpr) ^

            # BE -> A B A
            (self.argexpr + self.binary + self.argexpr) ^

            # BE -> B [ E; E ]
            (self.binary + self.lb_lit + self.expression + self.part_sep_lit
             + self.expression + self.rb_lit)
        )

        self.unexpr << Group(
            # UE -> U
            (self.unary) ^

            # UE -> U [ D ]
            (self.unary + self.lb_lit + self.definition + self.rb_lit) ^

            # UE -> U ( U )
            (self.unary + self.lp_lit + self.unary + self.rp_lit)
        )

        self.argexpr << Group(
            # A -> UE
            (self.unexpr) ^

            # A -> [ D ]
            (self.lb_lit + self.definition + self.rb_lit) ^

            # A -> < A >
            (self.left_defa_lit + self.argexpr + self.right_defa_lit) ^

            # A -> '
            (self.prime_lit)
        )

        self.hu, self.pos, self.en, self.lt, self.pt = (
            Word(alphanums + "#-/_.'"),) * 5
        self.defid = Word(nums)
        self.word = Group(self.hu + self.pos + self.en + self.lt + self.pt)

        # S -> W : D | W : D % _
        #self.sen = self.definition + LineEnd()

    def parse(self, s):
        return self.definition.parseString(s, parseAll=True).asList()

    def create_machine(self, name, partitions):
        # lists are accepted because of ["=", "AGT"]
        if type(name) is list:
            name = "".join(name)

        # HACK until we find a good solution for defaults
        name = name.strip('<>')

        is_plur = name in self.plur_dict
        if is_plur:
            name = self.plur_dict[name]

        m = Machine(decode_from_proszeky(name),
                    ConceptControl(), partitions)
        if is_plur:
            m.append(self.create_machine('more', 1), 0)

        return m

    def unify(self, machine):
        def __collect_machines(m, machines, is_root=False):
            # cut the recursion
            key = m.printname(), __has_other(m)
            if (key in machines and m in machines[key]):
                return

            if not is_root:
                machines[m.printname(), __has_other(m)].append(m)
            for partition in m.partitions:
                for m_ in partition:
                    __collect_machines(m_, machines)

        def __has_other(m):
            for m_ in m.partitions[0]:
                if m_.printname() == "other":
                    return True
            return False

        def __get_unified(machines, res=None):
            # if nothing to unify, don't
            if len(machines) == 1:
                return machines[0]

            # if a return machine is given, don't create a new one
            if res is None:
                prototype = machines[0]
                res = self.create_machine(prototype.printname(),
                                          len(prototype.partitions))
            for m in machines:
                # if the same machine, don't add anything
                if id(m) == id(res):
                    continue

                for p_i, p in enumerate(m.partitions):
                    for part_m in p:
                        if part_m.printname() != "other":
                            res.partitions[p_i].append(part_m)

                            part_m.del_parent_link(m, p_i)
                            part_m.add_parent_link(res, p_i)

            return res

        def __replace(where, for_what, is_other=False, visited=None):
            if visited is None:
                visited = set()

            if id(where) in visited:
                return

            visited.add(id(where))

            pn = for_what.printname()
            for p_i, p in enumerate(where.partitions):
                # change the partition machines
                for part_m_i, part_m in enumerate(p):
                    if part_m.printname() == pn and __has_other(
                            part_m) == is_other:
                        where.partitions[p_i][part_m_i] = for_what
                        for_what.add_parent_link(where, p_i)
                    __replace(where.partitions[p_i][part_m_i],
                              for_what, is_other, visited)

                # unification if there is a machine more than once on the same
                # partition
                where.partitions[p_i] = list(set(p))

        machines = defaultdict(list)
        __collect_machines(machine, machines, is_root=True)
        for k, machines_to_unify in machines.iteritems():

            if len(machines_to_unify[0].partitions) > 1:
                continue

            printname, is_other = k
            #if unification affects the root (machine),
            #be that the result machine
            if printname == machine.printname():
                unified = __get_unified(machines_to_unify, machine)
            else:
                unified = __get_unified(machines_to_unify)
            __replace(machine, unified, is_other)

    def __parse_expr(self, expr, root, loop_to_defendum=True,
                     three_parts=False):
        """
        creates machines from a parse node and its children
        there should be one handler for every rule
        """

        logging.debug("Parsing expression: {0}".format(expr))

        # name shortening for classmethods
        cls = DefinitionParser

        is_binary = cls._is_binary
        is_unary = cls._is_unary
        is_tree = lambda r: type(r) == list

        left_part = 0 + int(three_parts)
        right_part = 1 + int(three_parts)
        most_part = 2 + int(three_parts)

        if (len(expr) == 1):
            # UE -> U
            if (is_unary(expr[0])):
                logging.debug("Parsing {0} as a unary.".format(expr[0]))
                return [self.create_machine(expr[0], 1)]

            # E -> UE | BE, A -> UE
            if (is_tree(expr[0])):
                logging.debug("Parsing {0} as a tree.".format(expr[0]))
                return self.__parse_expr(expr[0], root, loop_to_defendum,
                                         three_parts)

        if (len(expr) == 2):
            # BE -> A B
            if (is_tree(expr[0]) and
                    is_binary(expr[1])):
                m = self.create_machine(expr[1], most_part)
                if expr[0] != ["'"]:
                    m.append_all(
                        self.__parse_expr(expr[0], root, loop_to_defendum,
                                          three_parts),
                        left_part)
                if loop_to_defendum:
                    m.append(root, right_part)
                return [m]

            # BE -> B A
            if (is_binary(expr[0]) and
                    is_tree(expr[1])):
                m = self.create_machine(expr[0], most_part)
                if expr[1] != ["'"]:
                    m.append_all(
                        self.__parse_expr(expr[1], root, loop_to_defendum,
                                          three_parts),
                        right_part)
                if loop_to_defendum:
                    m.append(root, left_part)
                return [m]

            # BE -> 'B
            if (expr[0] == ["'"] and
                    is_binary(expr[1])):
                m = self.create_machine(expr[1], most_part)
                #m.append(parent, 1)
                if loop_to_defendum:
                    m.append(root, right_part)
                return [m]

            # BE -> B'
            if (is_binary(expr[0]) and
                    expr[1] == ["'"]):
                m = self.create_machine(expr[0], most_part)
                # m.append(parent, 0)
                if loop_to_defendum:
                    m.append(root, left_part)
                return [m]

            # U -> =AGT
            if expr[0] == deep_pre:
                return [self.create_machine(deep_pre + expr[1], 1)]

            # U -> $HUN_FROM
            if (expr[0] == cls.langspec_pre):
                return [self.create_machine(cls.langspec_pre + expr[1], 1)]

            # U -> #AVM
            if (expr[0] == avm_pre):
                return [self.create_machine(avm_pre + expr[1], 1)]

            # U -> @External_url
            if (expr[0] == enc_pre):
                return [self.create_machine(enc_pre + expr[1], 1)]

        if (len(expr) == 3):
            # UB -> A B A
            if (is_tree(expr[0]) and
                    is_binary(expr[1]) and
                    is_tree(expr[2])):
                m = self.create_machine(expr[1], most_part)
                logging.debug(expr[1])
                if expr[0] != [DefinitionParser.prime]:
                    logging.debug(expr[0])
                    m.append_all(
                        self.__parse_expr(expr[0], root, loop_to_defendum,
                                          three_parts),
                        left_part)
                if expr[2] != [DefinitionParser.prime]:
                    m.append_all(
                        self.__parse_expr(expr[2], root, loop_to_defendum,
                                          three_parts),
                        right_part)
                return [m]

            # A -> [ D ]
            if (expr[0] == "[" and
                    is_tree(expr[1]) and
                    expr[2] == "]"):
                logging.debug(
                    "Parsing expr {0} as an embedded definition".format(expr))
                res = list(
                    self.__parse_definition(expr[1], root, loop_to_defendum,
                                            three_parts))
                return res

            # E -> < E >, U -> < U >
            if expr[0] == '<' and expr[2] == '>':
                logging.debug('E -> < E >' + str(expr[1]))
                return list(self.__parse_expr(expr[1], root, loop_to_defendum,
                                              three_parts))

        if (len(expr) == 4):
            # UE -> U ( U )
            # E -> U ( BE ) provisional
            if (is_unary(expr[0]) and
                    expr[1] == "(" and
                    expr[3] == ")"):
                logging.debug('X -> U ( Y )')
                if is_unary(expr[2]):
                    m = self.create_machine(expr[2], 1)
                else:
                    m = self.__parse_expr(expr[2], root, loop_to_defendum,
                                          three_parts)[0]
                    if not three_parts:
                        logging.warning(
                            "for 0th partition of binary machines, " +
                            "set three_parts=True, "+str(expr))
                m.append(self.create_machine(expr[0], 1), 0)
                return [m]

            # UE -> U [ D ]
            if (is_unary(expr[0]) and
                    expr[1] == "[" and
                    is_tree(expr[2]) and
                    expr[3] == "]"):
                m = self.create_machine(expr[0], 1)
                for parsed_expr in self.__parse_definition(expr[2], root,
                                                           loop_to_defendum,
                                                           three_parts):
                    m.append(parsed_expr, 0)
                return [m]

            # E -> U ( BE )
            #if (is_unary(expr[0]) and
            #        expr[1] == "(" and
            #        is_tree(expr[2]) and
            #        expr[3] == ")"):
            #    ms = self.__parse_expr(expr[2], root, loop_to_defendum,
            #                           three_parts)
            #    # if BE was an expression with an apostrophe, then
            #    # return of __parse_expr() is None
            #    if len(ms) != 0:
            #        ms[0].append(self.create_machine(expr[0], 1), 0)
            #    # if len(ms) == 3 and ms[0] == '<':
            #    #        ms = ms[1]
            #    if len(ms) != 1:
            #        logging.warning("0th partition of binary machines " +
            #                        "is not implemented "+str(ms))
            #    return ms
            logging.warning('machine cannot be built '+str(expr))

        if (len(expr) == 6):
            # BE -> B [E; E]
            if (is_binary(expr[0]) and
                    expr[1] == "[" and
                    is_tree(expr[2]) and
                    expr[3] == ";" and
                    is_tree(expr[4]) and
                    expr[5] == "]"):
                m = self.create_machine(expr[0], 2)
                m.append_all(
                    self.__parse_expr(expr[2], m, root, loop_to_defendum,
                                      three_parts),
                    0)
                m.append_all(
                    self.__parse_expr(expr[4], m, root, loop_to_defendum,
                                      three_parts),
                    1)
                return [m]

        pe = ParserException(
            "Unknown expression in definition: {0} (len={1})".format(
                expr,
                len(expr)))
        logging.debug(str(pe))
        logging.debug(expr)
        raise pe

    def __parse_definition(self, definition, root, loop_to_defendum=True,
                           three_parts=False):
        logging.debug(str(definition))
        for d in definition:
            yield self.__parse_expr(d, root, loop_to_defendum, three_parts)[0]

    def parse_into_machines(self, string, printname_index=0, add_indices=False,
                            loop_to_defendum=True, three_parts=False):
        printname = string.split('\t')[printname_index]
        try:
            id_, urob, pos, def_, comment = string.split('\t')[4:]
        except:
            raise Exception(string.split('\t'))

        machine = self.create_machine(printname.lower(), 1)
        #TODO =AGT -> partition 1, =PAT -> partition 2, =TO -> ?

        if add_indices:
            machine.printname_ = machine.printname() + id_sep + id_

        if def_ != '':
            logging.debug(def_)
            parsed = self.parse(def_)
            logging.debug(parsed)
            for parsed_expr in self.__parse_definition(
                    parsed[0], machine, loop_to_defendum, three_parts):
                machine.append(parsed_expr, 0)

        self.unify(machine)
        return machine
Пример #42
0
class QifParser:
    def __init__(self, debug=False):
        account_items       = { 'N' : "Name",
                                'T' : "AccountType",
                                'D' : "Description",
                                'L' : "CreditLimit",
                                'X' : "UnknownField",
                                'B' : "Balance",
                                '/' : "BalanceDate",
                                '$' : "Balance" }

        noninvestment_items = { 'D' : "Date",
                                'T' : "Amount",
                                'U' : "Amount2",
                                'C' : "Cleared",
                                'N' : "Number",
                                'P' : "Payee",
                                'M' : "Memo",
                                'L' : "Category",
                                'A' : "Address",
                                'S' : "SplitCategory",
                                'E' : "SplitMemo",
                                '$' : "SplitAmount",
                                '-' : "NegativeSplitAmount" }

        investment_items    = { 'D' : "Date",
                                'N' : "Action",
                                'Y' : "Security",
                                'I' : "Price",
                                'Q' : "Quantity",
                                'T' : "Amount",
                                'C' : "Cleared",
                                'P' : "Text",
                                'M' : "Memo",
                                'O' : "Commission",
                                'L' : "TransferAccount",
                                '$' : "TransferAmount" }

        category_items      = { 'N' : "Name",
                                'D' : "Description",
                                'T' : "TaxRelated",
                                'I' : "IncomeCategory",
                                'E' : "ExpenseCategory",
                                'B' : "BudgetAmount",
                                'R' : "TaxSchedule" }

        class_items         = { 'N' : "Name",
                                'D' : "Description" }

        options   = Group(CaselessLiteral('!Option:') + restOfLine).suppress()

        banktxns  = Group(CaselessLiteral('!Type:Bank').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("BankTransactions")

        cashtxns  = Group(CaselessLiteral('!Type:Cash').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CashTransactions")

        ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(),
                              CaselessLiteral('!Type!CCard').suppress()]) +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")

        liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() +
                          ZeroOrMore(Or([self._items(noninvestment_items),
                                         options]))
                          ).setResultsName("CreditCardTransactions")

        invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() +
                          ZeroOrMore(self._items(investment_items))
                          ).setResultsName("InvestmentTransactions")

        acctlist  = Group(CaselessLiteral('!Account').suppress() +
                          ZeroOrMore(Or([self._items(account_items, name="AccountInfo")]))
                          ).setResultsName("AccountList")

        category  = Group(CaselessLiteral('!Type:Cat').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("CategoryList")

        classlist = Group(CaselessLiteral('!Type:Class').suppress() +
                          ZeroOrMore(self._items(category_items))
                          ).setResultsName("ClassList")

        self.parser = Group(ZeroOrMore(White()).suppress() +
                            ZeroOrMore(acctlist).suppress() +
                            OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) +
                            ZeroOrMore(category | classlist).suppress() +
                            ZeroOrMore(White()).suppress()
                            ).setResultsName("QifStatement")

        if (debug):
            self.parser.setDebugActions(_ofxtoolsStartDebugAction,
                                        _ofxtoolsSuccessDebugAction,
                                        _ofxtoolsExceptionDebugAction)


    def _items(self, items, name="Transaction"):
        item_list = []
        for (code, name) in items.items():
            item = self._item(code, name)
            item_list.append(item)
        return Group(OneOrMore(Or(item_list)) +
                     oneOf('^EUR ^').setResultsName('Currency') +
                     LineEnd().suppress()
                     ).setResultsName(name)

    def _item(self, code, name):
        return CaselessLiteral(code).suppress() + \
               restOfLine.setResultsName(name) + \
               LineEnd().suppress()

    def parse(self, qif):
        return self.parser.parseString(qif)
Пример #43
0
class MyHL(object):
    def __init__(self):
        identifier = Word(alphas + alphanums)
        identifier_list = identifier + ZeroOrMore(Word(',') + identifier)
        self.number_exp = Group(Word(alphanums) + OneOrMore(Word("+-/*%", exact=1) + Word(alphanums)))
        expression = self.number_exp | Group(Literal('"') + Word(printables, excludeChars='"') + Literal('"')) | identifier
        datatype = Literal('number') | Literal('word')
        self.print_statement = "print" + identifier + ";"
        self.read_statement = "read" + identifier + ";"
        self.assignment_statement = identifier + "=" + expression + ";"
        self.variable_declaration = identifier_list + "use as" + datatype + ";"
        self.variable_stack = {}

        # with open('input.uy', 'r') as inputFile:
        #     code = [i.strip() for i in inputFile.readlines()]

        # self.run(code)

    def run(self, code, parent=None):

        code = self.clean_code(code)
        self.variable_stack = {}

        if code[0] != "begin vars":
            print "Code should begin with 'begin vars' statement."
            err_line = -1
        elif code[-1] != "end vars" and code[-1] != "end statements":
            print "Code should end with 'end vars' or 'end statements' statement."
            err_line = -1
        else:
            end_vars = False
            begin_stmt = False
            error = False
            for i in code[1:-1]:
                if not i.startswith('//'):
                    if i == "end vars":
                        end_vars = True
                    elif i == "begin statements":
                        if end_vars:
                            var_type = "program"
                            begin_stmt = True
                        else:
                            print "Formal declaration of variables must end with 'end vars' statement."
                            error = True
                            err_line = -1
                            break
                    elif end_vars and not begin_stmt:
                        print "Program statements should begin with 'begin statements' statement."
                        error = True
                        err_line = -1
                        break
                    else:
                        success, err_code = self.check_and_execute(i, begin_stmt, parent)
                        if not success:
                            error = True
                            if err_code == 1:
                                print "Syntax Error at line ", code.index(i) + 1
                                err_line = code.index(i)
                            elif err_code == 2:
                                print "Undeclared variable at line ", code.index(i) + 1
                                err_line = code.index(i)
                            elif err_code == 3:
                                print "Type mismatch error at line ", code.index(i) + 1
                                err_line = code.index(i)
                            elif err_code == 4:
                                print "Execution halted at line ", code.index(i) + 1
                                err_line = code.index(i)
                            break

        if error:
            if err_line >= 0:
                parent.setLineFormat(err_line)

    def check_and_execute(self, statement, begin_stmt, parent=None):
        if not begin_stmt:
            try:
                variables = self.variable_declaration.parseString(statement, parseAll=True)[0::2]
                for v in variables[:-1]:
                    self.variable_stack[v] = [None, variables[-1]]
            except:
                return False, 1
        else:
            if statement.startswith('print'):
                try:
                    identifier = self.print_statement.parseString(statement, parseAll=True)[1]
                    if identifier in self.variable_stack:
                        print self.variable_stack[identifier][0]
                    else:
                        return False, 2
                except:
                    return False, 1
            elif statement.startswith('read'):
                try:
                    identifier = self.read_statement.parseString(statement, parseAll=True)[1]
                    if identifier in self.variable_stack:
                        if self.variable_stack[identifier][1] == 'number':
                            val, ok = parent.getInt(identifier)
                        else:
                            val, ok = parent.getString(identifier)

                        # if not parent:
                        #     self.variable_stack[identifier][0] = raw_input("enter value for variable %s:" % identifier )
                        # else:
                        
                        if ok:
                            self.variable_stack[identifier][0] = val
                        else:
                            return False, 4
                    else:
                        return False, 2
                except:
                    return False, 1 
            else:
                try:
                    ass_stmt = self.assignment_statement.parseString(statement, parseAll=True)
                    try: 
                        self.number_exp.parseString("".join(ass_stmt[-2]), parseAll=True)
                        for i,v in enumerate(ass_stmt[-2]):
                            if not v in ['+', '-', '/' , '*', '%']:
                                try:
                                    int(v)
                                except:
                                    if v in self.variable_stack:
                                        ass_stmt[-2][i] = str(self.variable_stack[v][0])
                                    else:
                                        return False, 2
                        expression = self.arithmetic_ops("".join(ass_stmt[-2]))
                    except:
                        expression = ass_stmt[-2]
                    finally:
                        if not ass_stmt[0] in self.variable_stack:
                            return False, 2

                        if self.variable_stack[ass_stmt[0]][1] == 'number':
                            try:
                                self.variable_stack[ass_stmt[0]][0] = int(expression)
                            except:
                                return False, 3
                        else:
                            if expression[0] == '"' and expression[-1] == '"':
                                self.variable_stack[ass_stmt[0]][0] = expression[1]
                            else:
                                return False, 3

                except:
                    return False, 1 

        return True, 0


    def arithmetic_ops(self, statement):
        return eval(statement)

    def clean_code(self, code):
        ccode = []
        for c in code:
            if c != '':
                if not c.startswith('//'):
                    ccode.append(c.split('//')[0].strip())
                else:
                    ccode.append(c)

        return ccode
Пример #44
0
class OfcParser:
    """Dirt-simple OFC parser for interpreting OFC documents."""
    def __init__(self, debug=False):
        aggregate = Forward().setResultsName("OFC")
        aggregate_open_tag, aggregate_close_tag = self._tag()
        content_open_tag = self._tag(closed=False)
        content = Group(content_open_tag + CharsNotIn("<\r\n"))
        aggregate << Group(aggregate_open_tag \
            + Dict(OneOrMore(aggregate | content)) \
            + aggregate_close_tag)

        self.parser = Group(aggregate).setResultsName("document")
        if (debug):
            self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction,
                                        ofxtools._ofxtoolsSuccessDebugAction,
                                        ofxtools._ofxtoolsExceptionDebugAction)

    def _tag(self, closed=True):
        """Generate parser definitions for OFX tags."""
        openTag = Literal("<").suppress() + Word(alphanums + ".") \
            + Literal(">").suppress()
        if (closed):
            closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress()
            return openTag, closeTag
        else:
            return openTag

    def parse(self, ofc):
        """Parse a string argument and return a tree structure representing
        the parsed document."""
        ofc = self.add_zero_to_empty_ledger_tag(ofc)
        ofc = self.remove_inline_closing_tags(ofc)
        ofc = ofxtools.util.strip_empty_tags(ofc)
        ofc = self._translate_chknum_to_checknum(ofc)
        # if you don't have a good stomach, skip this part
        # XXX:needs better solution
        import sys
        sys.setrecursionlimit(5000)
        try:
          return self.parser.parseString(ofc).asDict()
        except ParseException:
          fixed_ofc = self.fix_ofc(ofc)
          return self.parser.parseString(fixed_ofc).asDict()

    def add_zero_to_empty_ledger_tag(self, ofc):
        """
        Fix an OFC, by adding zero to LEDGER blank tag
        """
        return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc)

    def remove_inline_closing_tags(self, ofc):
        """
        Fix an OFC, by removing inline closing 'tags'
        """
        return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc)

    def fix_ofc(self, ofc):
        """
        Do some magic to fix an bad OFC
        """
        ofc = self._remove_bad_tags(ofc)
        ofc = self._fill_dummy_tags(ofc)
        return self._inject_tags(ofc)

    def _remove_bad_tags(self, ofc):
        ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc)
        return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs)

    def _fill_dummy_tags(self, ofc):
        expression = r'(<%s>)[^\w+]'
        replacement = r'<%s>0\n'
        ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc)
        filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM' , ofc)

        return filled_ofc

    def _translate_chknum_to_checknum(self, ofc):
        """
        Some banks put an CHKNUM instead of CHECKNUM. this method translates
        CHKNUM to CHECKNUM in order to parse this information correctly
        """
        return re.sub('CHKNUM', 'CHECKNUM', ofc)

    def _inject_tags(self, ofc):
        tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n"
        if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc):
            return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
Пример #45
0
def parse_parsers(parsers):
    parser, starter = p4_parser()
    all_parsers = Group(starter & OneOrMore(parser))
    return all_parsers.parseString(parsers)[0]