Exemplo n.º 1
0
    def process(self, query, region, meta, match_offset, match_length):

        # Turn Mantee stuff into usable structure.
        line = cow_region_to_conc(region, self.has_attributes)

        # Find true tokens via indices (not structs) for separating match from context.
        indices = [i for i, s in enumerate(line) if not self.rex.match(s[0])]
        match_start = indices[match_offset]

        # If someone does not search within <x/> but exports just x,
        # part of the match might be cut off. This skips the concordance line
        # in those situations. Prevents crashes reported 2020 by EP.
        if match_offset >= len(
                indices) or match_offset + match_length - 1 >= len(indices):
            print("Index anomaly! You just lost a concordance line.")
            print(
                "Are you querying matches that might exceed the exported container?"
            )
            print
            return

        match_end = indices[match_offset + match_length - 1]
        match_length = match_end - match_start + 1

        # Build concordance line and add to output list.
        if self.full_structure:
            concline = {
                "meta":
                dict(zip(query.references, meta)),
                "left": [
                    str(token[0]) if re.match(r'<', token[0], re.UNICODE) else
                    dict(zip(query.attributes, token))
                    for token in line[:match_start]
                ],
                "match": [
                    str(token[0]) if re.match(r'<', token[0], re.UNICODE) else
                    dict(zip(query.attributes, token))
                    for token in line[match_start:match_end + 1]
                ],
                "right": [
                    str(token[0]) if re.match(r'<', token[0], re.UNICODE) else
                    dict(zip(query.attributes, token))
                    for token in line[match_end + 1:]
                ]
            }
        else:
            concline = {
                "meta":
                dict(zip(query.references, meta)),
                "left": ['|'.join(token) for token in line[:match_start]],
                "match":
                ['|'.join(token) for token in line[match_start:match_end + 1]],
                "right": ['|'.join(token) for token in line[match_end + 1:]]
            }
        self.concordance.append(concline)
Exemplo n.º 2
0
  def process(self, query, region, meta, match_offset, match_length):

    # Turn Mantee stuff into usable structure.
    line         = cow_region_to_conc(region, self.has_attributes)
    for i in line:
        print(i)

    # Find true tokens via indices (not structs) for separating match from context.
    indices      = [i for i, s in enumerate(line) if not self.rex.match(s[0])]
    match_start  = indices[match_offset]
    match_end    = indices[match_offset + match_length - 1]
    match_length = match_end - match_start + 1

    # Write meta, left, match, right.
    self.handle.write('\t'.join(meta) + '\t')
    self.handle.write((' '.join(['|'.join(token) for token in line[:match_start]]) + '\t').encode('utf-8'))
    self.handle.write((' '.join(['|'.join(token) for token in line[match_start:match_end+1]]) + '\t').encode('utf-8'))
    self.handle.write((' '.join(['|'.join(token) for token in line[match_end+1:]]) + '\n').encode('utf-8'))
Exemplo n.º 3
0
    def process(self, query, region, meta, match_offset, match_length):

        # Turn Mantee stuff into usable structure.
        line = cow_region_to_conc(region, self.has_attributes)

        # Find true tokens via indices (not structs) for separating match from context.
        # Turn everything into nodes already - to be linked into tree in next step.
        indices = [i for i, s in enumerate(line) if not self.rex.match(s[0])]
        nodes        = [Node("0", token = "TOP", relation = "", head = "", linear = 0, meta = dict(zip(query.references, meta))),] + \
                         [Node(make_token_safe(line[x][self.column_index]),
                         token    = line[x][self.column_token],
                         relation = line[x][self.column_relation],
                         head     = line[x][self.column_head],
                         linear   = int(line[x][self.column_index]),
                         **dict(zip([query.attributes[a] for a in self.attribs], [line[x][a] for a in self.attribs])) ) for x in indices]

        # Build tree from top.
        for n in nodes[1:]:
            n.parent = next((x for x in nodes if x.name == n.head), None)

        # If a descendant implements the filter, certain structures can be
        # discarded.
        if not self.filtre(nodes, line):
            return

        # Export as desired. Three independent formats.
        if self.printtrees:
            for pre, _, node in RenderTree(nodes[0]):
                print("%s%s (%s)" % (pre, node.token, node.name))

        if self.savejson:
            self.exporter.write(nodes[0], self.writer)

        if self.saveimage:
            fnam = self.fileprefix + '_' + meta[self.imagemetaid1]
            if self.imagemetaid2:
                fnam = fnam + '_' + meta[self.imagemetaid2]
            if self.saveimage is 'dot':
                DotExporter(nodes[0]).to_dotfile(fnam + '.dot')
            elif self.saveimage is 'png':
                DotExporter(nodes[0],
                            edgeattrfunc=edgeattrfunc,
                            nodenamefunc=nodenamefunc).to_picture(fnam +
                                                                  '.png')