Python Inputs примеры использования

Язык программирования: Python

Пространство имен/Пакет: InputModule

Класс/Тип: Inputs

Примеров на hotexamples.com: 5

Python Inputs - 5 примеров найдено. Это лучшие примеры Python кода для InputModule.Inputs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

readKeywords(2)

contactPageRegular(1)

getCountries(1)

getLocals(1)

Пример #1

Показать файл

Файл: ContactInforFinder.py Проект: Elvis-Zhou/BusinessInfo-Crawler

    def findContactPageUrl(self,url):
        result=""
        if not url:
            return result
        if not url.startswith("http"):
            return result
        if FliterRegular.websiteFiltered(url):
            return
        print "Dealing the url to get the contact page:",url

        self.contactPageRegular=Inputs.contactPageRegular()
        shortUrllength=25

        htmlfile=self.getpage(url)
        try:
            soup=BeautifulSoup(htmlfile,'lxml')
        except BaseException:
            return ""

        for regular in self.contactPageRegular:
            contact=soup.find("a",{"href":re.compile(r".*?%s.*?" % regular,re.DOTALL|re.IGNORECASE)})
            if contact:
                if contact["href"].startswith("/"):
                    #print url+contact["href"]
                    return url+contact["href"]
                elif len(contact["href"])<shortUrllength:
                    #print url+"/"+contact["href"]
                    return url+"/"+contact["href"]
                else:
                    #print contact["href"]
                    return contact["href"]
        return ""

Пример #2

Показать файл

Файл: UrlFinder.py Проект: Elvis-Zhou/BusinessInfo-Crawler

    def main(self):

        max,threadLimit,local,sleeptime=self.showScreenInfor()

        print "Program Begin: "
        keys=Inputs.readKeywords()
        #开始对每个关键词进行处理

        for word in keys:
            print "Now ,the word is:",word,".\nIt is in progress."
            keyword=word.strip()
            self.mainGetUrls(keyword,max,sleeptime,local)

        print "All finish."

Пример #3

Показать файл

Файл: UrlFinder.py Проект: Elvis-Zhou/BusinessInfo-Crawler

 def mainGetUrls(self,word="led light bulbs",max=1000,sleeptime=0,local=0):
     countries=[]
     if local==1:
         countries=Inputs.getCountries()
     if (not max)or max=="0":
         max=1000
     else:
         max=int(max)*10
     if local==1:
         for country in countries:
             print "now dealing country:"+country
             self.max=max
             self.country=country
             self.word=word
             keyword={
                 "q":word,
                 "cr":"country"+country
             }
             for i in range(0,self.max,10):
                 self.page=i
                 print "page:",i/10,"item:",i
                 url=self.originurl % (urllib.urlencode(keyword),str(self.page))
                 htmlfile=self.getpage(url)
                 self.findTitleAndUrl(htmlfile)
                 self.saveList()
                 if (not sleeptime)or sleeptime=="0":
                     sleeptime=5
                 if sleeptime:
                     print "waiting for :"+str(sleeptime)+" second,then continue"
                     sleep(int(sleeptime))
     else:
         self.max=max
         self.country="UK"
         self.word=word
         keyword={
             "q":word
         }
         for i in range(0,self.max,10):
             self.page=i
             print "page:",i/10,"item:",i
             url=self.originurl % (urllib.urlencode(keyword),str(self.page))
             htmlfile=self.getpage(url)
             self.findTitleAndUrl(htmlfile)
             self.saveList()
             if (not sleeptime)or sleeptime=="0":
                 sleeptime=5
             if sleeptime:
                 print "waiting for :"+str(sleeptime)+" second,then continue"
                 sleep(int(sleeptime))

Пример #4

Показать файл

Файл: YellowPageSpider.py Проект: Elvis-Zhou/BusinessInfo-Crawler

    def main(self,titles=("looking_for","page","location"),allInformationInList="1"):

        max,threadlimit,local=self.showScreenInfor()

        print "Program Begin: "
        keys=Inputs.readKeywords()
        #开始对每个关键词进行处理

        #开启多线程
        threads=self.startThreadPool(threadlimit)

        for word in keys:
            print "Now ,the category and word are",word,",they are in progress."
            self.category=word.split(":")[0]
            keyword=word.split(":")[1]
            self.mainGetUrls(keyword,max,local,allInformationInList,titles)
        if allInformationInList!='1':
            self.mainMiningUrlDB(threadlimit)

        self.queue.join()

        print "All finish!!! \n END。"

Пример #5

Показать файл

Файл: YellowPageSpider.py Проект: Elvis-Zhou/BusinessInfo-Crawler

    def mainGetUrls(self,word="led light bulbs",max=0,local=0,allInformationInList="1",titles=("looking_for","page","location")):
        self.max=max
        self.word=word
        self.page=1
        self.goalurl=self.formUrl(titles[0],self.word,titles[1],self.page,"","","0")
        if  local==1:
            locals=Inputs.getLocals()
            if locals:
                for l in locals:
                    print "Finding location: "+l
                    self.printTotalResults(max,l,titles)

                    print " dealing every page."
                    self.page=1
                    for p in range(1,self.max+1):
                        self.page=p
                        self.goalurl=self.formUrl(titles[0],self.word,titles[1],self.page,titles[2],l,"1")
                        url=self.goalurl
                        print "Now dealing Location: ",l
                        print "Dealing page: ",p
                        if allInformationInList=="1":
                            #全部信息都在列表页中
                            self.queue.put((url,self.word,self.category,self.country))
                            print "page: ",str(p)," information has got."
                        else:
                        #全部信息不都在列表页中，需要进入获取
                            self.getPageUrls(url)


                    if allInformationInList!="1":
                        print "Succeed in getting all pages,ready to write to DB."
                        self.saveUrlList()
                        self.contacturls=[]
                    #print "休息一分钟后继续获取下一个地区"
                    #sleep(60)

                print "Success！"


        else:
            self.printTotalResults(max,titles=titles)

            print " dealing every page."
            self.page=1
            for p in range(1,self.max+1):
                self.page=p
                self.goalurl=self.formUrl(titles[0],self.word,titles[1],self.page,"","","0")
                url=self.goalurl
                print "Dealing page: ",p

                if allInformationInList=="1":
                    #全部信息都在列表页中
                    self.queue.put((url,self.word,self.category,self.country))

                else:
                    #全部信息不都在列表页中，需要进入获取
                    self.getPageUrls(url)

            if allInformationInList!="1":
                print "Succeed in getting all pages,ready to write to DB."
                self.saveUrlList()
                self.contacturls=[]

            print "Success！"