Exemplo n.º 1
0
class dblpHandler(handler.ContentHandler):
    """docstring for dblpHandler"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.isPaperTag = False
        self.conf = ''
        self.isTitleTag = False
        self.paperId = -1
        self.isAuthorTag = False
        self.authors = list()
        self.isYearTag = False
        self.year = ''

    def startDocument(self):
        logging.info('Document start...')

    def endDocument(self):
        logging.info('Document End...')

    def startElement(self, name, attrs):
        if name in paperLabels:
            self.conf = attrs.get('key').split('/')[1]
            self.isPaperTag = True
        if self.isPaperTag:
            if name == 'title':
                self.isTitleTag = True
            if name == 'author':
                self.isAuthorTag = True
            if name == 'year':
                self.isYearTag = True

    def endElement(self, name):
        if name in paperLabels:
            if self.isPaperTag:
                self.isPaperTag = False
                self.mRedis.addPaperItem(self.authors, self.paperId, self.conf,
                                         self.year)
                if self.paperId % 1000 == 0:
                    logging.info(self.paperId)
                self.conf = ''
                self.authors = []
                self.year = ''

    def characters(self, content):
        if self.isTitleTag:
            self.paperId += 1
            self.isTitleTag = False
        if self.isYearTag:
            self.year = content
            self.isYearTag = False
        if self.isAuthorTag:
            self.authors.append(content)
            self.isAuthorTag = False
Exemplo n.º 2
0
class dblpHandler(handler.ContentHandler):
    """docstring for dblpHandler"""
    def __init__(self):
        self.mRedis = RedisHelper()
        self.isPaperTag = False
        self.conf = ''
        self.isTitleTag = False
        self.paperId = -1
        self.isAuthorTag = False
        self.authors = list()
        self.isYearTag = False
        self.year = ''

    def startDocument(self):
        logging.info('Document start...')

    def endDocument(self):
        logging.info('Document End...')

    def startElement(self, name, attrs):
        if name in paperLabels:
            self.conf = attrs.get('key').split('/')[1]
            self.isPaperTag = True
        if self.isPaperTag:
            if name == 'title':
                self.isTitleTag = True
            if name == 'author':
                self.isAuthorTag = True
            if name == 'year':
                self.isYearTag = True

    def endElement(self, name):
        if name in paperLabels:
            if self.isPaperTag:
                self.isPaperTag = False
                self.mRedis.addPaperItem(self.authors, self.paperId, self.conf, self.year)
                if self.paperId % 1000 == 0:
                    logging.info(self.paperId)
                self.conf = ''
                self.authors = []
                self.year = ''

    def characters(self, content):
        if self.isTitleTag:
            self.paperId += 1
            self.isTitleTag = False
        if self.isYearTag:
            self.year = content
            self.isYearTag = False
        if self.isAuthorTag:
            self.authors.append(content)
            self.isAuthorTag = False