Пример #1
0
    def test_id_parser(self):

        self.identifier = arxiv_id_parser.arxiv_id_parser(FILE_SOURCE_PDF, FILE_NAME_PDF)
        self.assertEqual(self.identifier, u'1305.5767v1')

        self.identifier = arxiv_id_parser.arxiv_id_parser(FILE_SOURCE_TXT, FILE_NAME_TXT)
        self.assertRaises(Exception('NonArxivPdf'), self.identifier)
Пример #2
0
    def test_id_parser(self):

        self.identifier = arxiv_id_parser.arxiv_id_parser(
            FILE_SOURCE_PDF, FILE_NAME_PDF)
        self.assertEqual(self.identifier, u'1305.5767v1')

        self.identifier = arxiv_id_parser.arxiv_id_parser(
            FILE_SOURCE_TXT, FILE_NAME_TXT)
        self.assertRaises(Exception('NonArxivPdf'), self.identifier)
Пример #3
0
def arxiv_pyler(path):
    if os.path.exists(path):
        COUNTER = 0
        if path==None:
            return
        entry_list=[]
        print path
        for root, dirs, files in os.walk(path):
            for pdf in files:
                if re.search('.pdf$', pdf):
                    try:
                        source = os.path.join(root, pdf)
                        identifier = arxiv_id_parser(source, pdf)
                        xml_content = arxiv_query(identifier)
                        entry_content = xml_parser(xml_content, identifier)
                        entry_list.append(entry_content)
                        entry_list[COUNTER].update({'file_source':source})
                        #shutil.move(source, pdf_dir)
                        COUNTER += 1
                    except Exception as inst:
    					for error in inst.args:
                        	print error
                else:
                    continue
        html_generator(entry_list)
        print 'complete'
        return entry_list
    else:
        print 'path does not exits.'