示例#1
0
    def handle(self, req):
        path = req.uri[5:]
        form = FieldStorage(req)

        strict = form.get('strict', True)
        if strict in ['false', 'False', '0', None, '']:
            strict = False

        mt = form.get('mimeType', '')
        mt = mt.replace(' ', '+')

        if not mt:
            xtn = form.get('extension', '')
            if xtn:
                if not srlzHash.has_key(xtn):
                    # can't continue
                    raise ValueError(xtn)
                else:
                    mt = srlzHash[xtn].mimeType
        
        if not mt:
            try:
                wanted = req.headers_in['Accept']
                mts = conneg.parse(wanted)
                mt = conneg.best(mts, mimeList)
            except:
                mt = ''

        if mt:
            xtn = mimeHash[str(mt)]
        else:
            # default to rdf/xml
            xtn = "rdf.xml"

        srlz = srlzHash[xtn]

        if form.has_key('aggregation'):
            uri = form.get('aggregation')
        else:
            uri = path

        if not uri:
            data = '<html><body>Instructions etc. goes here</body></html>'
            self.send(data, req, ct="text/html");
            return
        elif not protoUriRe.match(uri):
            self.error("Resource Map URI must be a protocol based URI", req)
            return

        try:
            # fetch
            
            rd = ReMDocument(uri)
        except Exception, e:
            self.error("Could not retrieve Resource Map from '%s': %s" % (uri, e.message), req)
            return
示例#2
0
def handler(req):
    global db, htmlPath, logPath, cheshirePath, xmlp, recordStore
    form = FieldStorage(req)
    try:
        dir = req.uri[1:].rsplit('/')[1]
    except IndexError:
        return apache.HTTP_NOT_FOUND
    remote_host = req.get_remote_host(apache.REMOTE_NOLOOKUP)
    lgr = FileLogger(logPath, remote_host) 
#    lgr.log(req.uri)
#    lgr.log('directory is %s' % dir)
#    if dir == 'index.html' :
#        page = read_file(os.path.join(cheshirePath, 'clic', 'www', 'dickens', 'html', 'index.html'))
#        req.write(page)
#        #req.sendfile(os.path.join(cheshirePath, 'clic', 'www', 'dickens', 'html' + dir))
#        return apache.OK
    if dir in ['css', 'js', 'img', 'images']:
        #raise ValueError(os.path.join(cheshirePath, 'clic', 'www' + req.uri))
        req.sendfile(os.path.join(cheshirePath, 'clic', 'www' + req.uri))
        return apache.OK
    else:        
        try:
            remote_host = req.get_remote_host(apache.REMOTE_NOLOOKUP)     # get the remote host's IP for logging
            os.chdir(htmlPath)                                            # cd to where html fragments are
            lgr = FileLogger(logPath, remote_host)
            # Determine whether to use a sub-handler      
            if form.get('operation', None) =='search':
                handler = SearchHandler(lgr)                                # initialise handler - with logger for this request
            elif form.get('operation', None) =='browse':
                handler = BrowseHandler(lgr)
            else:
                req.content_type = "text/html"
                page = read_file('dickensInterface.html')
                req.write(page)
                #return apache.HTTP_NOT_FOUND
                return apache.OK
            # Handle request
            try:
                handler.handle(req)
            finally:
                # Clean-up
                # Flush all logged strings to disk
                try:
                    lgr.flush()
                except:
                    pass
                # Delete handler to ensure no state info is retained
                del lgr, handler
        except:
            req.content_type = "text/html"
            cgitb.Hook(file = req).handle()                               # give error info
            return apache.HTTP_INTERNAL_SERVER_ERROR
        else:
            return apache.OK
示例#3
0
 def handle(self, req):
     form = FieldStorage(req)
     mode = form.get('mode', None)
     if (mode == 'compare'):
         page = self.compareIndexes(req)
         self.send_xml(page, req)
     elif (mode == 'index') :
         page = self.getIndexList(req)
         self.send_xml(page, req)
     elif (mode == 'statstable') :
         page = self.getStatsTable(req)
         self.send_xml(page, req)
     elif (mode == 'tfp') :
         page = self.create_TFP(form)
         self.send_xml(page, req)
示例#4
0
 def handle(self, req):
     form = FieldStorage(req)        
     mode = form.get('mode', None)
     if (mode == 'search'):
         page = self.search(req)
         self.send_xml(page,req)
     elif (mode=='collocates'):
         page = self.collocates(form)
         self.send_xml(page,req)
     elif (mode=='exportkwic'):
         page = self.exportkwic(form)
         self.send_txt(page, req)
         return
     elif (mode == 'concordance'):
         page = self.concordance(form)
         self.send_xml(page, req)
     elif (mode=='arm'):
         page = self.arm(form)
         self.send_xml(page,req)
     elif (mode=='armtable'):
         page = self.armTable(form)
         self.send_xml(page,req)
     elif (mode=='article'):
         page = self.articleDisplay(form)
         self.send_html(page, req)
     elif (mode=='browse'):
         page = self.articleBrowse(form)
         self.send_html(page, req)
     elif (mode=='sort'):
         page = self.sort(form)
         self.send_xml(page, req)
     elif (mode=='filter'):
         page = self.filter(form)
         self.send_xml(page, req)
     elif (mode=='cfp'):
         page = self.create_cfp(form)
         self.send_xml(page, req)
     else :
         page = read_file('search.html')
         self.send_html(page, req)
 def handle(self, req):
     form = FieldStorage(req, True)
     tmpl = unicode(read_file(self.baseTemplatePath))
     title = ' :: '.join(self.htmlTitle)
     navbar = ' '.join(self.htmlNav)
     replaceAll = {
         "%TITLE%": title,
         "%NAVBAR%": navbar,
         "%USERNAME%": session.user.username,
         "%REALNAME%": session.user.realName
     }
     tmpl = multiReplace(tmpl, replaceAll)
     path = req.uri[1:]
     path = path[path.rfind('/') + 1:]
     content = None
     operation = form.get('operation', None)
     if path.endswith('.js'):
         self.send_response(read_file(abspath('../js/{0}'.format(path))),
                            req,
                            content_type='text/javascript',
                            code=200
                            )
         return apache.OK
     elif path == 'users.html':
         if (operation):
             if (operation == 'findcontacts'):
                 content = self.get_contactDetails(form)
                 self.send_xml(content, req)
                 return
             else:
                 if (operation == 'adduser'):
                     content = self.add_user(form)
                 elif (operation == 'addinstitution'):
                     content = self.add_inst(form)
                 elif (operation == 'editinstitution'):
                     content = self.edit_inst(form)
                 elif (operation == 'deleteinst'):
                     content = self.delete_inst(form)
                 elif (operation == 'deleteuser'):
                     content = self.delete_user(form)
                 elif operation in ['edit', 'edituser']:
                     content = self.edit_user(form)
                 else:
                     content = self.show_adminMenu()
         else:
             content = self.show_adminMenu()
     elif path == 'admin':
         # redirect to make sure later relative links work correctly
         redirect(req, 'admin/',
                  permanent=False, # TODO: make me True
                  text=("To prevent URL collisions caused by internal "
                        "relative, this service must be accessed at "
                        "admin/"
                        )
                  )
         return
     else:
         content = self.show_adminMenu()
     content = tmpl.replace('%CONTENT%', content)
     # send the display
     self.send_html(content, req)
示例#6
0
    def compareIndexes(self, req):
        self.logger.log('comparing indexes')
        start = time.time()
        form = FieldStorage(req)
        id = form.get('id','data_grid')
        offset = str(form.get('offset', 0))
        if offset.find('.') != -1:
            startNum = int(offset[:offset.find('.')])
            adjustValue = int(offset[offset.find('.')+1:])
        else :
            startNum = int(offset)
            adjustValue = 0       
        howMany = int(form.get('page_size', 100))
        indexStrings = form.get('index', None)
        baseIdx = db.get_object(session, indexForStats)
        corpusSize = baseIdx.fetch_metadata(session)['nOccs']
        indexList = []
        addTfp = False
        # list means we are comparing indexes otherwise its just one
        # we get the actual index object from the string and store them in indexList
        if (indexStrings.__class__ == list):
            if (indexStrings[0].find('gram') == -1):
                addTfp = True
            for i in range(0, len(indexStrings)):
                if indexStrings[i].find('gram') == -1:
                    compareIndex = db.get_object(session, '%s' % indexStrings[i])
                else: 
                    if indexStrings[i].replace('-idx', '').find('-') == -1:
                        compareIndex = db.get_object(session, 'sentence-idx')
                    else:                   
                        compareIndex = db.get_object(session, '%s-idx' % indexStrings[i][:indexStrings[i].replace('-idx', '').rfind('-')])
                indexList.append((db.get_object(session, '%s' % indexStrings[i]), compareIndex))  
        else :
            if (indexStrings.find('gram') == -1):
                addTfp = True
                compareIndex = db.get_object(session, '%s' % indexStrings)
            else:
                if indexStrings.replace('-idx', '').find('-') == -1:
                    compareIndex = db.get_object(session, 'sentence-idx')
                else:
                    compareIndex = db.get_object(session, '%s-idx' % indexStrings[:indexStrings.replace('-idx', '').rfind('-')])
            indexList.append((db.get_object(session, '%s' % indexStrings), compareIndex))  

#            indexList.append(db.get_object(session, '%s' % indexStrings))
        # 
        output = []
        firstIndex = indexList[0][0]
        
        firstTotal = indexList[0][1].fetch_metadata(session)['nOccs']
        q = qf.get_query(session, 'idx-foo any "bar"')
        appending = True
        if startNum < 0 :
            appending = False
            startNum = startNum/-1
        
        idxLength = firstIndex.fetch_metadata(session)['nTerms']
        completed = False
        cycles = 0
        firstStart = startNum
        while len(output) < howMany and completed == False:     
            if appending:
                startNum = int(firstStart+(howMany*cycles))
            else:
                startNum = int(startNum-(howMany*cycles))
            cycles += 1
            if appending and idxLength-(startNum) <= howMany:
                completed = True
            if appending:
                termList = firstIndex.fetch_termFrequencies(session, 'occ', startNum, min(howMany, idxLength-(startNum)), '>')
            else:
                termList = firstIndex.fetch_termFrequencies(session, 'occ', startNum, min(howMany, startNum), '<')         
            for i, t in enumerate(termList):                
                cells = []
                word = firstIndex.fetch_termById(session, t[1])
                q.term.value = word
                percentage = round((float(t[2]) / float(firstTotal) * normalizationBase), 2)
                firstIndexName = indexList[0][0].id[:indexList[0][0].id.find('-idx')]
                
                if appending:
                    cells.append('<td>%d</td>' % (i + 1 + startNum))
                else:                   
                    cells.append('<td>%d</td>' % (startNum + 1 - i))
                # This try/except/else deals with whether we are viewing one
                # index or more than one
                try:
                    indexList[1]
                except:
                    # A single index     
                    if addTfp == True and tfp == True:                                       
                        cells.append('<td><a href="javascript:searchFor(\'%s\', \'%s\')">%s</a></td><td><a href="javascript:tfpFor(\'%s\', \'%s\')">tfp</a></td><td>%s</td>' % (word, firstIndexName, word, word, firstIndexName, percentage))                     
                    else :
                        cells.append('<td><a href="javascript:searchFor(\'%s\', \'%s\')">%s</a></td><td>%s</td>' % (word, firstIndexName, word, percentage))                     
                    cells.append('<td>%s</td>' % t[2])     
                # more than one index
                else:                    
                    if addTfp == True and tfp == True:
                        cells.append('<td>&lt;a href="javascript:searchFor(\'%s\', \'%s\')">%s&lt;/a></td><td>&lt;a href="javascript:tfpFor(\'%s\', \'%s\')">tfp&lt;/a></td><td>%s</td>' % (word, firstIndexName, word, word, firstIndexName, percentage))                     
                    else :
                        cells.append('<td>&lt;a href="javascript:searchFor(\'%s\', \'%s\')">%s&lt;/a></td><td>%s</td>' % (word, firstIndexName, word, percentage))                     
                    othersTotal = 0
                    othersHits = 0 
                    self.logger.log(cells)              
                    for j in range(1, len(indexList)):
                        total = indexList[j][1].fetch_metadata(session)['nOccs']
                        othersTotal += total
                        occs = indexList[j][0].scan(session, q, 1)    
                        
                        if (occs[0][0] == word):
                            othersHits += occs[0][1][2]
                            #add each cell
                            normalisedOccs = round((float(occs[0][1][2]) / float(total) * normalizationBase), 2)
                            cells.append('<td>%s</td>' % normalisedOccs)
                        else :
                            cells.append('<td>0</td>')                        
                    if z :
                        zstat = zscore(othersHits, t[2], othersTotal, indexList[0][1].fetch_metadata(session)['nOccs'])
                        if zstat >= zstatSig:
                            cells.append('<td>%s</td>' % zstat)
                        else :
                            continue      
                output.append('<tr>%s</tr>' % ''.join(cells))

            if not appending:
                output.reverse()
           # output = output[adjustValue:]
        (mins, secs) = divmod(time.time()-start, 60)
        self.logger.log('scanning complete: %s' % secs) 
        return '<ajax-response><response type="object" id="%s_updater"><rows update_ui="true">%s</rows></response></ajax-response>' % (id, ''.join(output))
示例#7
0
 def search(self, req):
     global db, idxStore, resultSetStore
   
     self.logger.log('search called')
     start = time.time()
     form = FieldStorage(req)
     type_ = form.get('type', None)
     terms = form.get('terms', None)  
     book = form.get('book', 'all') 
     csCheckbox = form.get('caseSensitive', None)
     caseSensitive = csCheckbox and "s" or "i"
     id_ = form.get('id', None)
     span = int(form.get('span', 0))
     wordWindow = int(form.get('windowsize', 10))
     gid = form.get('gid', None)
     if id_:
         # remove the 'kwic_grid_' that comes from LiveGrid id 
         self.logger.log('ID SUPPLIED DISPLAYING LINES') 
         id_ = id_[10:]
         start = int(form.get('offset', 0))
         howmany = int(form.get('page_size', 100))            
         return self.kwicDisplay(id_, start, howmany)
     elif (gid != None):
         start = int(form.get('offset', 0))
         howmany = int(form.get('page_size', 100))            
         return self.kwicDisplay(gid, start, howmany)                   
     else:
         if (terms == None):
             self.logger.log('no terms') 
            
         id_ = '%s|%s|%s|%d|%d|%s|%s|' % (form.get('context', None), type_, multiReplace(terms, {'"' : '*', ' ' : '_', '<' : '(', '>' : ')'}), span, wordWindow, caseSensitive, book) 
         try:
             rs = resultSetStore.fetch_resultSet(session, id_)
         except cheshire3.exceptions.ObjectDoesNotExistException:
             if type_ == 'CQL':
                 queryString = terms
             else:
                 (queryString, idx) = self.build_query(id_) 
                                         
             query = qf.get_query(session, queryString)
             (mins, secs) = divmod(time.time() - start, 60)
             self.logger.log('%s\nquery parsed: %s' % (queryString, secs))
             rs = db.search(session, query)
             
             (mins, secs) = divmod(time.time() - start, 60)
             self.logger.log('db searched: %s' % secs)
             
             # Save ResultSet
             resultSetStore.begin_storing(session)
             rs.id = id_                   
             resultSetStore.store_resultSet(session, rs)
             resultSetStore.commit_storing(session) 
         try:
             totalOccs = rs.totalOccs
         except:
             totalOccs = 'unavailable'
         if totalOccs == 0:
             totalOccs = 'unavailable' 
       
         (mins, secs) = divmod(time.time() - start, 60)
         (hours, mins) = divmod(mins, 60) 
         self.logger.log('search complete: %d:%d:%d' % (hours, mins, secs))
         output = '<results><rsid>%s</rsid><totalDocs>%i</totalDocs><totalOccs>%s</totalOccs></results>' % (id_, len(rs), str(totalOccs))        
     return output
    def handle_workflowRequest(self, config, req):
        postdata = FieldStorage(req)

        xmlstr = postdata.get('requestxml', None)

        if not (xmlstr):
            # throw some sensible error
            time.sleep(1)
            req.write('ERROR : No request XML submitted\n')
            return
#        else:
#            self.log.write(xmlstr)

        doc = document.StringDocument(xmlstr)
        rec = mdp.process_document(session, doc)

        self.log.write('rec.get_xml():\n%s\n' % rec.get_xml(session))

        dom = rec.get_dom(session)
        #now do some clever dynamic object stuff
        wfNode = dom.childNodes[0]
        wfNode = dom.getElementsByTagName('workflow')[0]
        wfConf = self._generateWorkflowConfigNode(wfNode)
        wfobj = SimpleWorkflow(session, wfConf, serv)
        inputs = dom.getElementsByTagName('inputs')[0]

        time.sleep(1)
        if not (wfobj):
            req.write('ERROR : Junk XML - must contain workflow element\n')
            return
        elif not (inputs):
            req.write('ERROR : Junk XML - must contain inputs element\n')
            return

        iCount = 1
        wfmsgs = []
        self.log.write('inputs:%s\n' % inputs.toxml())
        for input in inputs.childNodes:
            self.log.write('input:%s\n' % input.toxml())
            if (input.nodeType == elementType):
                objectType = input.getAttribute('type')
            else:
                continue

            if objectType == 'document.StringDocument':
                try:
                    f = postdata.get('file', None)
                    data = f.value
                except:
                    data = None
            else:
                data = input.firstChild.toxml()

            if not data:
                req.write('ERROR : No input data provided\n')
                return

            time.sleep(5)
            #req.write('PROGRESS : %s\n' % (objectType))
            modName = objectType[:objectType.rfind('.')]
            clName = objectType[objectType.rfind('.') + 1:]
            mod = dynamic.globalImport(modName, [])
            cl = getattr(mod, clName)
            inobj = cl(data)
            try:
                msg = wfobj.process(session, inobj)
            except ObjectAlreadyExistsException, e:
                time.sleep(1)
                req.write(
                    'ERROR : One or more records in input %d already exist in the database : %s\n'
                    % (iCount, e))
                continue
            except Exception, e:
                time.sleep(1)
                msg = e
                req.write(
                    'ERROR : Something went wrong while processing input %d : %s\n'
                    % (iCount, e))
                continue
示例#9
0
    def handle_workflowRequest(self, config, req):
        postdata = FieldStorage(req)
            
        xmlstr = postdata.get('requestxml', None)
        
        if not (xmlstr):
            # throw some sensible error
            time.sleep(1)
            req.write('ERROR : No request XML submitted\n')
            return
#        else:
#            self.log.write(xmlstr)
            
        doc = document.StringDocument(xmlstr)
        rec = mdp.process_document(session, doc)
        
        self.log.write('rec.get_xml():\n%s\n' % rec.get_xml(session))
        
        dom = rec.get_dom(session)
        #now do some clever dynamic object stuff
        wfNode = dom.childNodes[0]
        wfNode = dom.getElementsByTagName('workflow')[0]
        wfConf = self._generateWorkflowConfigNode(wfNode)
        wfobj = SimpleWorkflow(session, wfConf, serv)
        inputs = dom.getElementsByTagName('inputs')[0]
                        
        time.sleep(1);                      
        if not (wfobj): req.write('ERROR : Junk XML - must contain workflow element\n'); return
        elif not (inputs): req.write('ERROR : Junk XML - must contain inputs element\n'); return
        
        iCount = 1
        wfmsgs = []
        self.log.write('inputs:%s\n' % inputs.toxml())
        for input in inputs.childNodes:
            self.log.write('input:%s\n' % input.toxml())
            if (input.nodeType == elementType):
                objectType = input.getAttribute('type')
            else:
                continue
            
            if objectType == 'document.StringDocument':
                try: 
                    f = postdata.get('file', None)
                    data = f.value
                except: 
                    data = None
            else: 
                data = input.firstChild.toxml()
                
            if not data: req.write('ERROR : No input data provided\n'); return
                
            time.sleep(5);
            #req.write('PROGRESS : %s\n' % (objectType))
            modName = objectType[:objectType.rfind('.')]
            clName = objectType[objectType.rfind('.')+1:]
            mod = dynamic.globalImport(modName, [])
            cl = getattr(mod, clName)
            inobj = cl(data)
            try:
                msg = wfobj.process(session, inobj)
            except ObjectAlreadyExistsException, e:
                time.sleep(1);
                req.write('ERROR : One or more records in input %d already exist in the database : %s\n' % (iCount, e))
                continue
            except Exception, e:
                time.sleep(1);
                msg = e
                req.write('ERROR : Something went wrong while processing input %d : %s\n' % (iCount, e))
                continue
示例#10
0
    def handle(self, req):
        path = req.uri[5:]
        form = FieldStorage(req)

        strict = form.get('strict', True)
        if strict in ['false', 'False', '0', None, '']:
            strict = False

        mt = form.get('mimeType', '')
        mt = mt.replace(' ', '+')

        if not mt:
            xtn = form.get('extension', '')
            if xtn:
                if xtn not in srlzHash:
                    # can't continue
                    raise ValueError(xtn)
                else:
                    mt = srlzHash[xtn].mimeType

        if not mt:
            try:
                wanted = req.headers_in['Accept']
                mts = conneg.parse(wanted)
                mt = conneg.best(mts, mimeList)
            except:
                mt = ''

        if mt:
            xtn = mimeHash[str(mt)]
        else:
            # default to rdf/xml
            xtn = "rdf.xml"

        srlz = srlzHash[xtn]

        if 'aggregation' in form:
            uri = form.get('aggregation')
        else:
            uri = path

        if not uri:
            data = '<html><body>Instructions etc. goes here</body></html>'
            self.send(data, req, ct="text/html")
            return
        elif not protoUriRe.match(uri):
            self.error("Resource Map URI must be a protocol based URI", req)
            return

        try:
            # fetch

            rd = ReMDocument(uri)
        except Exception as e:
            self.error(
                "Could not retrieve Resource Map from '%s': %s" %
                (uri, e.message), req)
            return

        try:
            # parse
            if rd.format == 'atom':
                parser = ap
            elif rd.format == 'rdfa':
                parser = rdfap
            else:
                parser = p
            if not strict:
                parser.strict = False
            try:
                rem = parser.parse(rd)
                parser.strict = True
            except:
                parser.strict = True
                raise

        except OreException as e:
            # get exception message
            self.error("Resource Map Invalid: %s" % e.message, req)
            return
        except SAXParseException as e:
            self.error(
                "Could not parse XML: %s (line %s, column %s)" %
                (e.getMessage(), e.getLineNumber(), e.getColumnNumber()), req)
            return
        except:
            raise

        try:
            # serialize
            rem2 = rem._aggregation_.register_serialization(
                srlz, 'http://foresite.cheshire3.org/%s#rem' % req.uri)
            rd = rem2.get_serialization()
            data = rd.data
            if srlz == srlzHash['rdfa.html']:
                data = '<xhtml xmlns="http://www.w3.org/1999/xhtml"><body><i>Invisible RDFa resource map follows, it must have validated okay. [view source] :)</i>' + data + "</body></xhtml>"

        except Exception as e:
            self.error(
                "Could not serialize Aggregation to Resource Map: %s" %
                e.message, req)
            return

        self.send(data, req, ct=srlz.mimeType)