def download(self):
        """
        >>> app=MultiThreadDownloader(conf)
        >>> app.stock.allCode

        >>> app.download()
        True
        """
        logging.debug("Start downloading data...\nCrawl mode is mutil.")
        conf = {}
        conf.update(self.conf)
        conf['handle']=self.handle
        conf['date'] = self.date
        oQueue = queue.Queue()
        for code in self.allCode:
            if type(code) == int:
                code = Util.getCode(code)
            oQueue.put(code)
        for i in range(self.threadNum):
            conf["queue"]=oQueue
            multiThreadCrawlHandler = MultiThreadHandler(conf = conf)
            multiThreadCrawlHandler.setDaemon(True)
            multiThreadCrawlHandler.start()
        oQueue.join()               
        return True
예제 #2
0
def getUrl(num,date,sourceType="qq2",conf={}):
    """
    >>> getUrl(num="601919",date="20110817",conf = conf)
    'http://stock.gtimg.cn/data/index.php?appn=detail&action=download&c=sh601919&d=20110817'
    """
    num=int(num)
    date=str(date)
    detailSource=conf.get("DETAIL_SOURCE",DETAIL_SOURCE)
    if sourceType=="qq":
        strNum=Util.getCode(num)
        if(num<600000):
            pre="sz"
        else:
            pre="sh"
        fileName=pre+strNum+".js"
        return detailSource.get("qq") % fileName
    elif sourceType=="sina":
        strNum=Util.getCode(num)
        if(num<600000):
            pre="sz"
        else:
            pre="sh"
        symbol=pre+strNum
        formatDate=time.strftime("%Y-%m-%d",time.strptime(date,"%Y%m%d"))
        url=detailSource.get("sina") % (formatDate,symbol)
        return url
    elif sourceType=="qq2":
        strNum=Util.getCode(num)
        if(num<600000):
            pre="sz"
        else:
            pre="sh"
        symbol=pre+strNum
        formatDate=time.strftime("%Y%m%d",time.strptime(date,"%Y%m%d"))
        url=detailSource.get("qq2") %(symbol, formatDate)
        return url
예제 #3
0
    def download(self,date=None):
        """
        >> detail=Detail(conf)
        >> detail.stock.allCode

        >> detail.download()
        True
        """
        date = date or self.date
        logging.debug("Start downloading Detail data...\nCrawl mode is %s." % self.threadMode)
        if self.conf.get("restart"): #中断后重新开始传数据的restart模式
            lastCode=Util.getLastCode(
                path = os.path.join(self.conf.get("SYS_HOME"),
                self.conf.get("DETAIL_DATA_PATH"),date)
            ) #得到中断前最后一个抓取成功的股票代码
            codes = map(lambda code:int(code) >int (lastCode),self.stock.allCode) #得到剩余的需要抓取的代码
            logging.info("Downloader is in restart mode.Restart begin at " + str(codes[0]))
            self.conf['restart'] = False
        else:
            codes = self.stock.allCode

        if self.threadMode == "multi":  #多线程模式
            oQueue = queue.Queue()
            for code in codes:
                if type(code) == int:
                    code = Util.getCode(code)
                oQueue.put(code)
            for i in range(self.threadNum):
                self.conf["queue"]=oQueue
                oMultiThreadCrawlDetail = MultiThreadDetail(date=date,parser=self.parser,conf=self.conf)
                oMultiThreadCrawlDetail.setDaemon(True)
                oMultiThreadCrawlDetail.start()
            oQueue.join()               
        else: #单线程模式
            for code in codes:
                handleDetail(code = code,date = date,parser = self.parser,conf = self.conf)
        return True