Exemplo n.º 1
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        xfrPath = self.get_path(session, "xsltPath")
        if xfrPath is None:
            raise ConfigFileException("Missing path 'xsltPath' for "
                                      "{0}.".format(self.id))

        if os.path.isabs(xfrPath):
            path = xfrPath
        else:
            dfp = self.get_path(session, "defaultPath")
            path = os.path.join(dfp, xfrPath)

        ns = etree.FunctionNamespace(
            'http://www.cheshire3.org/ns/function/xsl/')
        ns['now'] = myTimeFn
        self.functionNamespace = ns
        self.parsedXslt = etree.parse(path)
        self.txr = etree.XSLT(self.parsedXslt)
        self.params = None
        parameter = self.get_setting(session, 'parameter', None)
        if (parameter):
            self.params = {}
            kv = parameter.split(' ')
            for pair in kv:
                (k, v) = pair.split(':')
                self.params[k] = '"%s"' % v
Exemplo n.º 2
0
 def __init__(self, session, config, parent):       
     Transformer.__init__(self, session, config, parent)
     self.session = session
     self.extractor = self.get_path(session, 'extractor')
     self.rfot = self.get_path(session, 'tokenizer')
     self.regexp = re.compile('[\s]+')
     self.eidXpath = self.get_setting(session, 'eidXpath')
Exemplo n.º 3
0
 def __init__(self, session, config, parent):       
     Transformer.__init__(self, session, config, parent)
     self.session = session
     self.extractor = self.get_path(session, 'extractor')
     self.rfot = self.get_path(session, 'tokenizer')
     self.regexp = re.compile('[\s]+')
     self.eidXpath = self.get_setting(session, 'eidXpath')
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     xfrPath = self.get_path(session, "xsltPath")
     if xfrPath is None:
         raise ConfigFileException("Missing path 'xsltPath' for "
                                   "{0}.".format(self.id))
     
     if os.path.isabs(xfrPath):
         path = xfrPath
     else:
         dfp = self.get_path(session, "defaultPath")
         path = os.path.join(dfp, xfrPath)
     
     ns = etree.FunctionNamespace(
         'http://www.cheshire3.org/ns/function/xsl/'
     )
     ns['now'] = myTimeFn
     self.functionNamespace = ns
     self.parsedXslt = etree.parse(path)
     self.txr = etree.XSLT(self.parsedXslt)
     self.params = None
     parameter = self.get_setting(session, 'parameter', None)
     if (parameter):
         self.params = {}
         kv = parameter.split(' ')
         for pair in kv:
             (k, v) = pair.split(':')
             self.params[k] = '"%s"' % v
Exemplo n.º 5
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.session = session
     self.extractor = self.get_path(session, "extractor")
     self.rfot = self.get_path(session, "tokenizer")
     self.regexp = re.compile("[\s]+")
     self.eidXpath = self.get_setting(session, "eidXpath")
Exemplo n.º 6
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.session = session
     self.rfot = self.get_path(session, 'tokenizer')
     self.tupg = self.get_path(session, 'geniaNormalizer')
     self.dashre = re.compile("""([^ ]+[-/*=(`] )""")
     self.enddashre = re.compile("""\W+[-('`]\W*$""")
     self.urlre = re.compile('\[[ ]*a .*?\[/a\]')
     self.debug = 0
Exemplo n.º 7
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.session = session
     self.rfot = self.get_path(session, 'tokenizer')
     self.tupg = self.get_path(session, 'geniaNormalizer')
     self.dashre = re.compile("""([^ ]+[-/*=(`] )""")
     self.enddashre = re.compile("""\W+[-('`]\W*$""")
     self.urlre = re.compile('\[[ ]*a .*?\[/a\]')
     self.debug = 0
Exemplo n.º 8
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.selector = self.get_path(session, 'selector')
     self.extractor = self.get_path(session, 'extractor')
     tmplPath = self.get_path(session, "templatePath")
     if tmplPath is not None:
         dfp = self.get_path(session, "defaultPath")
         path = os.path.join(dfp, tmplPath)
         with open(path, 'r') as fh:
             self.template = unicode(fh.read())
     else:
         tmpl = self.get_setting(session, 'template')
         self.template = unicode(tmpl)
Exemplo n.º 9
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.selector = self.get_path(session, 'selector')
     self.extractor = self.get_path(session, 'extractor')
     tmplPath = self.get_path(session, "templatePath")
     if tmplPath is not None:
         dfp = self.get_path(session, "defaultPath")
         path = os.path.join(dfp, tmplPath)
         with open(path, 'r') as fh:
             self.template = unicode(fh.read()) 
     else:
         tmpl = self.get_setting(session, 'template')
         self.template = unicode(tmpl)
Exemplo n.º 10
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        self.label = self.get_setting(session, 'label', '')        
        if not self.label:
            self.labelXPath = self.get_setting(session, 'labelXPath', '')
            if not self.labelXPath:
                lxpo = self.get_setting(session, 'labelXPathProcessor', '')
                if not lxpo:
                    raise ConfigFileException("No label (class) source set for %s" % (self.id))
                else:
                    # Will raise if not found
                    self.labelXPathProcessor = db.get_object(session, lxpo)
        # And now get vector index
        self.vectorIndex = self.get_path(session, 'vectorIndex')
        self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1)
        self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1)
        self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1)
        self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1)
        self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1)
        self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1)

        self.minGlobalFreqPct = self.get_setting(session, 'minGlobalFreqPct', -1.0)
        self.maxGlobalFreqPct = self.get_setting(session, 'maxGlobalFreqPct', -1.0)
        self.minGlobalOccsPct = self.get_setting(session, 'minGlobalOccsPct', -1.0)
        self.maxGlobalOccsPct = self.get_setting(session, 'maxGlobalOccsPct', -1.0)

        self.minPropGlobalFreqPct = self.get_setting(session, 'minPropGlobalFreqPct', -1.0)
        self.maxPropGlobalFreqPct = self.get_setting(session, 'maxPropGlobalFreqPct', -1.0)
        self.minPropGlobalOccsPct = self.get_setting(session, 'minPropGlobalOccsPct', -1.0)
        self.maxPropGlobalOccsPct = self.get_setting(session, 'maxPropGlobalOccsPct', -1.0)

        self.minLocalFreqPct = self.get_setting(session, 'minLocalFreqPct', -1.0)
        self.maxLocalFreqPct = self.get_setting(session, 'maxLocalFreqPct', -1.0)

        self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreq', -1)
        self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccs', -1)
        self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreqPct', -1.0)
        self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccsPct', -1.0)
        self.minNLocalFreq = self.get_setting(session, 'minNLocalFreq', -1)
        self.maxNLocalFreq = self.get_setting(session, 'maxNLocalFreq', -1)

        self.idxMetadata = self.vectorIndex.fetch_metadata(session)

        db = session.server.get_object(session, session.database)
        self.totalRecords = db.totalRecords
        
        self.clear(session)
Exemplo n.º 11
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        self.label = self.get_setting(session, 'label', '')        
        if not self.label:
            self.labelXPath = self.get_setting(session, 'labelXPath', '')
            if not self.labelXPath:
                lxpo = self.get_setting(session, 'labelXPathProcessor', '')
                if not lxpo:
                    raise ConfigFileException("No label (class) source set for %s" % (self.id))
                else:
                    # Will raise if not found
                    self.labelXPathProcessor = db.get_object(session, lxpo)
        # And now get vector index
        self.vectorIndex = self.get_path(session, 'vectorIndex')
        self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1)
        self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1)
        self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1)
        self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1)
        self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1)
        self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1)

        self.minGlobalFreqPct = self.get_setting(session, 'minGlobalFreqPct', -1.0)
        self.maxGlobalFreqPct = self.get_setting(session, 'maxGlobalFreqPct', -1.0)
        self.minGlobalOccsPct = self.get_setting(session, 'minGlobalOccsPct', -1.0)
        self.maxGlobalOccsPct = self.get_setting(session, 'maxGlobalOccsPct', -1.0)

        self.minPropGlobalFreqPct = self.get_setting(session, 'minPropGlobalFreqPct', -1.0)
        self.maxPropGlobalFreqPct = self.get_setting(session, 'maxPropGlobalFreqPct', -1.0)
        self.minPropGlobalOccsPct = self.get_setting(session, 'minPropGlobalOccsPct', -1.0)
        self.maxPropGlobalOccsPct = self.get_setting(session, 'maxPropGlobalOccsPct', -1.0)

        self.minLocalFreqPct = self.get_setting(session, 'minLocalFreqPct', -1.0)
        self.maxLocalFreqPct = self.get_setting(session, 'maxLocalFreqPct', -1.0)

        self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreq', -1)
        self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccs', -1)
        self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreqPct', -1.0)
        self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccsPct', -1.0)
        self.minNLocalFreq = self.get_setting(session, 'minNLocalFreq', -1)
        self.maxNLocalFreq = self.get_setting(session, 'maxNLocalFreq', -1)

        self.idxMetadata = self.vectorIndex.fetch_metadata(session)

        db = session.server.get_object(session, session.database)
        self.totalRecords = db.totalRecords
        
        self.clear(session)
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.selector = self.get_path(session, 'selector')
     self.extractor = self.get_path(session, 'extractor')
     tmplPath = self.get_path(session, "templatePath")
     if tmplPath is not None:
         dfp = self.get_path(session, "defaultPath")
         path = os.path.join(dfp, tmplPath)
         with open(path, 'r') as fh:
             self.template = unicode(fh.read()) 
     else:
         tmpl = self.get_setting(session, 'template', '')
         if not tmpl:
             raise ConfigFileException("{0} requires either a "
                                       "'templatePath' path or a "
                                       "'template' setting."
                                       "".format(self.id))
         self.template = unicode(tmpl)
Exemplo n.º 13
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.selector = self.get_path(session, 'selector')
     self.extractor = self.get_path(session, 'extractor')
     tmplPath = self.get_path(session, "templatePath")
     if tmplPath is not None:
         dfp = self.get_path(session, "defaultPath")
         path = os.path.join(dfp, tmplPath)
         with open(path, 'r') as fh:
             self.template = unicode(fh.read())
     else:
         tmpl = self.get_setting(session, 'template', '')
         if not tmpl:
             raise ConfigFileException("{0} requires either a "
                                       "'templatePath' path or a "
                                       "'template' setting."
                                       "".format(self.id))
         self.template = unicode(tmpl)
Exemplo n.º 14
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        self.vectorIndex = self.get_path(session, 'vectorIndex')
        self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1)
        self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1)
        self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1)
        self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1)
        self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1)
        self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1)
        self.prox = self.get_setting(session, 'proxElement', 0)
        self.matches = self.get_setting(session, 'matchesOnly', 0)
        self.stripMatch = self.get_setting(session, 'stripMatch', 0)

        sw = self.get_setting(session, 'stopwords', '')
        ignoreTermids = []
        for w in sw.split(' '):
            if w:
                try:
                    (tid, bla, bla2) = self.vectorIndex.fetch_term(
                        session,
                        w,
                        summary=True
                    )                    
                    ignoreTermids.append(tid)
                except ValueError:
                    # Term doesn't exist
                    pass
        self.ignoreTermids = ignoreTermids
        sw = self.get_setting(session, 'reqdwords', '')
        mandatoryTermids = []
        for w in sw.split(' '):
            if w:
                try:
                    (tid, bla, bla2) = self.vectorIndex.fetch_term(
                        session,
                        w,
                        summary=True
                    )                    
                    mandatoryTermids.append(tid)
                except ValueError:
                    # Term doesn't exist
                    pass
        self.mandatoryTermids = mandatoryTermids
        self._clear(session)
Exemplo n.º 15
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        self.vectorIndex = self.get_path(session, 'vectorIndex')
        self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1)
        self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1)
        self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1)
        self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1)
        self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1)
        self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1)
        self.prox = self.get_setting(session, 'proxElement', 0)
        self.matches = self.get_setting(session, 'matchesOnly', 0)
        self.stripMatch = self.get_setting(session, 'stripMatch', 0)

        sw = self.get_setting(session, 'stopwords', '')
        ignoreTermids = []
        for w in sw.split(' '):
            if w:
                try:
                    (tid, bla,
                     bla2) = self.vectorIndex.fetch_term(session,
                                                         w,
                                                         summary=True)
                    ignoreTermids.append(tid)
                except ValueError:
                    # term doesn't exist
                    pass
        self.ignoreTermids = ignoreTermids

        sw = self.get_setting(session, 'reqdwords', '')
        mandatoryTermids = []
        for w in sw.split(' '):
            if w:
                try:
                    (tid, bla,
                     bla2) = self.vectorIndex.fetch_term(session,
                                                         w,
                                                         summary=True)
                    mandatoryTermids.append(tid)
                except ValueError:
                    # term doesn't exist
                    pass
        self.mandatoryTermids = mandatoryTermids

        self._clear(session)
Exemplo n.º 16
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     htag = self.get_setting(session, 'highlightTag', None)
     if htag is None:
         self.highlightTag = 'c3:highlight'
         self.attrs = {'xmlns:c3': "http://www.cheshire3.org/schemas/highlight/"}
     else:
         self.highlightTag = htag 
         self.attrs = {}
     
     tagAttrs = self.get_setting(session, 'tagAttrList', None)
     if tagAttrs is not None:
         for attr in tagAttrs.split(' '):
             bits = attr.split('=', 1)
             k = bits[0]
             v = bits[1][1:-1]    # strip off "s
             self.attrs[k] = v
             
     self.breakElements = self.get_setting(session, 'breakElementsList', '').split(' ')
Exemplo n.º 17
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     xfrPath = self.get_path(session, "xsltPath")
     dfp = self.get_path(session, "defaultPath")
     path = os.path.join(dfp, xfrPath)
     
     ns = etree.FunctionNamespace('http://www.cheshire3.org/ns/function/xsl/')
     ns['now'] = myTimeFn
     self.functionNamespace = ns
     self.parsedXslt = etree.parse(path)
     self.txr = etree.XSLT(self.parsedXslt)
     self.params = None
     parameter = self.get_setting(session, 'parameter', None)
     if (parameter):
         self.params = {}
         kv = parameter.split(' ')
         for pair in kv:
             (k, v) = pair.split(':')
             self.params[k] = '"%s"' % v
Exemplo n.º 18
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        htag = self.get_setting(session, 'highlightTag', None)
        if htag is None:
            self.highlightTag = 'c3:highlight'
            self.attrs = {'xmlns:c3': self.HIGHLIGHT_NS}
        else:
            self.highlightTag = htag
            self.attrs = {}

        tagAttrs = self.get_setting(session, 'tagAttrList', None)
        if tagAttrs is not None:
            for attr in tagAttrs.split(' '):
                bits = attr.split('=', 1)
                k = bits[0]
                v = bits[1][1:-1]  # strip off "s
                self.attrs[k] = v

        self.breakElements = self.get_setting(session, 'breakElementsList',
                                              '').split(' ')
Exemplo n.º 19
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        xfrPath = self.get_path(session, "xsltPath")
        dfp = self.get_path(session, "defaultPath")
        path = os.path.join(dfp, xfrPath)

        ns = etree.FunctionNamespace(
            'http://www.cheshire3.org/ns/function/xsl/')
        ns['now'] = myTimeFn
        self.functionNamespace = ns
        self.parsedXslt = etree.parse(path)
        self.txr = etree.XSLT(self.parsedXslt)
        self.params = None
        parameter = self.get_setting(session, 'parameter', None)
        if (parameter):
            self.params = {}
            kv = parameter.split(' ')
            for pair in kv:
                (k, v) = pair.split(':')
                self.params[k] = '"%s"' % v
Exemplo n.º 20
0
 def __init__(self, session, config, parent):
     self.maps = []
     self.tagset = ""
     Transformer.__init__(self, session, config, parent)
Exemplo n.º 21
0
 def __init__(self, session, config, parent):
     self.maps = []
     self.tagset = ""
     Transformer.__init__(self, session, config, parent)
Exemplo n.º 22
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.workflow = self.get_path(session, 'workflow')
 def __init__(self, session, config, parent):       
     Transformer.__init__(self, session, config, parent)
     self.session = session
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.workflow = self.get_path(session, 'workflow')
Exemplo n.º 25
0
 def __init__(self, session, config, parent):
     Transformer.__init__(self, session, config, parent)
     self.session = session