else:
            self.email = extract(email_rule, self.sec).replace('mailto:', '')

    def _generate_website(self):
        if "website" in self.parse_data.keys():
            if self.parse_data["website"]:
                regex = '"(.*?)"'
                res = re.search(regex, str(self.parse_data["website"]))
                self.website = res.group()
        else:
            self.website = "http://www.mccormick.northwestern.edu/mechanical/people/faculty/"

    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1, len(self.keywords) + 1)]

    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1, len(self.city) + 1)]

    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1, len(self.timeKeys) + 1)]


if __name__ == '__main__':
    MccormickTask = CommonTask(website_name=MccormickClass.__name__,
                               custom_parser=MccormickClass,
                               base_url=base_url,
                               sample_url=sample_url,
                               data=data,
                               item_url_rule=item_url_rule)
    MccormickTask.run()
    def _generate_website(self):
        if "website" in self.parse_data.keys():
            if self.parse_data["website"]:
                regex = '"(.*?)"'
                res = re.search(regex, str(self.parse_data["website"]))
                self.website = res.group()
        else:
            self.website = extract(website_rule, self.sec)

    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1, len(self.keywords) + 1)]

    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1, len(self.city) + 1)]

    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1, len(self.timeKeys) + 1)]


if __name__ == '__main__':
    ECEUcsbTask = CommonTask(
        website_name=ECEUcsbClass.__name__,
        custom_parser=ECEUcsbClass,
        base_url=base_url,
        sample_url=sample_url,
        data=data,
        item_url_rule=item_url_rule,
        #="http://www.cs.ucsb.edu",
        is_url_joint=False)
    ECEUcsbTask.run()
            if self.parse_data["bio"]:
                self.bio = self.parse_data["bio"]
        if len(bio_rule):
            self.bio = extract(bio_rule, self.sec).xpath('string(.)')
    def _generate_keywords(self):
        if "keywords" in self.parse_data.keys():
            if self.parse_data["keywords"]:
                self.keywords.append(self.parse_data["keywords"])
    def _generate_city(self):
        pass
    def _generate_time(self):
        pass
    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1,len(self.keywords)+1)]
    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1,len(self.city)+1)]
    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1,len(self.timeKeys)+1)]

if __name__ == '__main__':
    CIVILNyuTask = CommonTask(website_name=CIVILNyuClass.__name__,
                   custom_parser=CIVILNyuClass,
                   base_url=base_url,
                   sample_url=sample_url,
                   data=data,
                   item_url_rule=item_url_rule,
                   default_url="http://engineering.nyu.edu",
                   is_url_joint=True
                   )
    CIVILNyuTask.run()
    print("count:", CIVILNyuTask.count)
示例#4
0
            if self.parse_data["keywords"]:
                self.keywords.append(self.parse_data["keywords"])

    def _generate_city(self):
        pass

    def _generate_time(self):
        pass

    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1, len(self.keywords) + 1)]

    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1, len(self.city) + 1)]

    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1, len(self.timeKeys) + 1)]


if __name__ == '__main__':
    UcsdTask = CommonTask(
        website_name=UcsdClass.__name__,
        custom_parser=UcsdClass,
        base_url=base_url,
        sample_url=sample_url,
        data=data,
        item_url_rule=item_url_rule,
        default_url="http://jacobsschool.ucsd.edu/faculty/faculty_bios/",
        is_url_joint=True)
    UcsdTask.run()
    print("count:", UcsdTask.count)
    def _generate_keywords(self):
        if "keywords" in self.parse_data.keys():
            if self.parse_data["keywords"]:
                self.keywords.append(self.parse_data["keywords"])
    def _generate_city(self):
        pass
    def _generate_time(self):
        pass
    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1,len(self.keywords)+1)]
    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1,len(self.city)+1)]
    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1,len(self.timeKeys)+1)]

if __name__ == '__main__':
    from SampleData.cse_nd  import base_url,sample_url,data,item_url_rule
    CSENdTask = CommonTask(website_name=CSENdClass.__name__,
                   custom_parser=CSENdClass,
                   base_url=base_url,
                   sample_url=sample_url,
                   data=data,
                   item_url_rule=item_url_rule
                   )
    CSENdTask.run()
    print(CSENdTask.count)
    # from SampleData.ame_nd import *
    # from utils.connection import extract, fetch
    # html = fetch("https://engineering.nd.edu/profiles/kchristensen")
    # a = extract(phone_rule, html).xpath('string(.)').strip().replace('Phone:','')
    # print(a)
示例#6
0
                self.email = self.parse_data["email"]

    def _generate_website(self):
        if "website" in self.parse_data.keys():
            if self.parse_data["website"]:
                regex = '"(.*?)"'
                res = re.search(regex, str(self.parse_data["website"]))
                self.website = res.group()
        else:
            self.website = extract(website_rule, self.sec)

    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1, len(self.keywords) + 1)]

    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1, len(self.city) + 1)]

    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1, len(self.timeKeys) + 1)]


if __name__ == '__main__':
    CYBERUmdTask = CommonTask(website_name=CYBERUmdClass.__name__,
                              custom_parser=CYBERUmdClass,
                              base_url=base_url,
                              sample_url=sample_url,
                              data=data,
                              item_url_rule=item_url_rule,
                              default_url="http://www.cyber.umd.edu",
                              is_url_joint=True)
    CYBERUmdTask.run()
示例#7
0
                self.bio = self.parse_data["bio"]
        if bio_rule:
            self.bio = extract(bio_rule, self.sec)
    def _generate_keywords(self):
        if "keywords" in self.parse_data.keys():
            if self.parse_data["keywords"]:
                self.keywords.append(self.parse_data["keywords"])
    def _generate_city(self):
        pass
    def _generate_time(self):
        pass
    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1,len(self.keywords)+1)]
    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1,len(self.city)+1)]
    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1,len(self.timeKeys)+1)]

if __name__ == '__main__':
    from SampleData.me_udel import base_url,sample_url,data,item_url_rule
    MEUdelTask = CommonTask(website_name=MEUdelClass.__name__,
                   custom_parser=MEUdelClass,
                   base_url=base_url,
                   sample_url=sample_url,
                   data=data,
                   item_url_rule=item_url_rule,
                   default_url="http://www.me.udel.edu/people/",
                   is_url_joint=True
                   )
    MEUdelTask.run()
    print("count:",MEUdelTask.count)
                self.bio = self.parse_data["bio"]
        if bio_rule:
            self.bio = extract(bio_rule, self.sec)
    def _generate_keywords(self):
        if "keywords" in self.parse_data.keys():
            if self.parse_data["keywords"]:
                self.keywords.append(self.parse_data["keywords"])
    def _generate_city(self):
        pass
    def _generate_time(self):
        pass
    def _generate_keywordKeys(self):
        self.keywordKeys = [i for i in range(1,len(self.keywords)+1)]
    def _generate_cityKeys(self):
        self.cityKeys = [i for i in range(1,len(self.city)+1)]
    def _generate_timeKeys(self):
        self.timeKeys = [i for i in range(1,len(self.timeKeys)+1)]

if __name__ == '__main__':
    from SampleData.cbe_udel import base_url,sample_url,data,item_url_rule
    CBETask = CommonTask(website_name=CBEUdelClass.__name__,
                   custom_parser=CBEUdelClass,
                   base_url=base_url,
                   sample_url=sample_url,
                   data=data,
                   item_url_rule=item_url_rule,
                   default_url="http://www.cbe.udel.edu/directory/",
                   is_url_joint=True
                   )
    CBETask.run()
    print(CBETask.count)