Пример #1
0
class EconPapers:
    """ EconPapers类获取和解析EconPapers网站内容

    :param str journal_web: 杂志首页
    :return: 无返回值
    """
    def __init__(self,journal=None,journal_web=None):
        self.journal_web = journal_web
        self.scraper = SiteScraper(journal_web)
        # 期刊名称
        self.journal = journal
        # 网页集合
        self.literature_websites = None
        # 文献信息列表
        self.literature_info = list()

    def to_literature_websites(self,condition=None,filter=None):
        """ 构建文献网址的列表

        :param str condition: 筛选条件
        :param str filter: 过滤条件
        :return: 无返回值
        """
        self.scraper.get_links(page_url="",condition=condition)
        pages = self.scraper.pages

        if filter is not None:
            pages = (page for page in pages if re.search(filter,page) is not None)

        self.literature_websites = [''.join([self.journal_web,page]) for page in pages]

    def get_literature_info(self,websites=None):
        """ 利用网页信息获取文献信息

        :param str,list websites: 网页地址
        :return: 无返回值
        """
        if websites is None:
            websites = self.literature_websites

        if isinstance(websites,str):
            websites = [websites]

        i = 0
        for web in websites:
            print(i)
            econ_parser = EconPapersLitPageParser(page=web,journal=self.journal)
            self.literature_info.append(econ_parser.literature_info)
            i += 1

    def export_literature_websites(self,file):
        """ 导出文献网址

        :param str file: 导出的文件名
        :return: 无返回值
        """
        json.dump(self.literature_websites, fp=open(file,'w'))
Пример #2
0
 def __init__(self,journal=None,journal_web=None):
     self.journal_web = journal_web
     self.scraper = SiteScraper(journal_web)
     # 期刊名称
     self.journal = journal
     # 网页集合
     self.literature_websites = None
     # 文献信息列表
     self.literature_info = list()