示例#1
0
def _get_uptox_items(x, queue):

    work = []
    i = 0

    while ((i < x) & (not queue.empty())):
        filename = queue.get()

        p = PaperParser()

        paper = p.paperExists(filename)

        if paper != None:
            logger.info("Paper already exists for %s, skipping...", filename)
            os.unlink(filename)
            inform_watcher(logger, filename, exists=True, paperObj=paper)
            continue

        with open(filename, 'rb') as f:
            data = f.read()

            i += 1

        work.append((filename, data))

    return Binary(zlib.compress(cPickle.dumps(work)))
示例#2
0
    def paperExists(self, infile):
        """Return true if paper with same authors and title already in db"""
        parser = PaperParser()

        paper = parser.paperExists(infile)

        if paper != None:
            inform_watcher(self.logger, infile, exists=True, paperObj=paper)

        return paper != None
示例#3
0
    def run(self):
        """This is the main event loop for the result handler"""
        self.running = True

        while self.running:
            try:
                self.paper_files = []
                result = self.queue.get(block=False)
                paperObj = None

                if (isinstance(result, PreprocessingException)):
                    #get exception information and dump to user
                    self.logger.error("Error processing paper %s: %s",
                                      result.paper, result)

                    self.paper_files.append((result.paper, 'keep'))

                    if (result.pdf):
                        name, _ = os.path.splitext(
                            os.path.basename(result.paper))
                        pdf = os.path.join(self.watchdir, name + ".pdf")
                        self.paper_files.append((pdf, 'move'))

                    inform_watcher(self.logger, result.paper, exception=result)

                    try:
                        #send the error report
                        send_error_report(result, result.traceback,
                                          result.files)

                    except Exception as e:
                        self.logger.error("ERROR SENDING EMAIL: %s", e)
                else:

                    try:
                        paperObj = self._process_paper(result)
                    except Exception as e:

                        _, _, exc_tb = sys.exc_info()
                        self.logger.error("Error processing paper %s: %s",
                                          result[0], e)

                        for line in traceback.format_tb(exc_tb):
                            self.logger.error(line)

                        inform_watcher(self.logger, result[0], exception=e)

                self.cleanupFiles(paperObj)

            except Empty:
                self.logger.debug("No work to do.. going back to sleep")
                time.sleep(1)
示例#4
0
    def handleProcessingException(self, result):
        """Method for handling processing exceptions"""
        #get exception information and dump to user
        self.logger.error("Error processing paper %s: %s", result.paper,
                          result)

        inform_watcher(self.logger, result.paper, exception=result)

        try:
            print result.files
            #send the error report
            send_error_report(result, result.traceback, [result.paper])

        except Exception as e:
            self.logger.error("ERROR SENDING EMAIL: %s", e)

        self.cleanup(result.paper)
示例#5
0
    def store(self, result):
        """Once a document has been handled, store it in file"""

        if (isinstance(result, PreprocessingException)):
            self.handleProcessingException(result)
            print result.files
        else:
            filename, outfile, timetaken = result

            if (self.paperExists(outfile)):
                inform_watcher(
                    self.logger,
                    filename,
                    exception=PaperExistsException("Paper Already Exists"))

                self.cleanup(filename)
                return None

            #store the paper object in database
            paperObj = self.storePaperData(outfile)

            #add paper classification to database
            paperObj = self.classifyPaper(paperObj)

            filenames = [filename, outfile]

            basename = os.path.basename(filename)
            name, ext = os.path.splitext(basename)
            pdf = os.path.join(self.watchdir, name + ".pdf")

            if os.path.exists(pdf):
                filenames.append(pdf)

            #add the related files to the db
            self.savePaperFiles(filenames, paperObj)

            self.logger.info("Paper has been added successfully")

            try:
                inform_watcher(self.logger, filename, paperObj=paperObj)
            except Exception as e:
                self.logger.warn(
                    "Failed to inform watcher about paper" + " success: %s", e)

            if config.has_key('TWITTER_ENABLED') and config['TWITTER_ENABLED']:
                try:
                    tweet_paper(paperObj)
                except Exception as e:
                    self.logger.warn("Could not tweet about paper %s", e)

            #finally update stats
            average = self.stats[0]

            total = self.stats[1] + 1

            if (average == 0.0):
                self.stats = (timetaken, total)
            else:
                self.stats = (average +
                              ((timetaken - average) / self.stats[1]), total)

            #save the preprocessing stats to disk
            save_pp_stats(self.stats, self.outdir)

            return paperObj