示例#1
0
文件: fund.py 项目: henry42/html2json
def loadconf():
    global CONFIG;
    global SQL_CONN;
    
    config = ConfigParser.ConfigParser()  
    config.readfp(codecs.open("conf/ef.ini", "r"));
    log.info("conf loaded");
    SQL_CONN = {
                "host" : config.get("database", "host"),
                "db" : config.get("database", "db"),
                "port" : config.getint("database", "port"),
                "user" : config.get("database", "user"),
                "passwd" : config.get("database", "passwd")
                }
示例#2
0
文件: fund.py 项目: henry42/html2json
def update_all_nav(fundcode=[],start=None,end=datetime.now()):
    
    log.info('update_all_nav start');
    
    global SQL_CONN;
    
    req = 'http://biz.finance.sina.com.cn/fundinfo/open/lsjz.php?fund_code=';
    fundstart = {};
    endtime = end.strftime('%Y-%m-%d');
    
    conn = pydb.connect(**SQL_CONN);
    cur = conn.cursor();
    cur.execute("select code,birthday from fund_info");
    for row in cur.fetchall():
        fundstart[row[0]] = {'startdate1' : row[1].strftime('%Y-%m-%d'),'enddate1':endtime};
    if fundcode is None:
        fundcode = fundstart.keys();

    sql4data = '''insert into fund_data (
                code , 
                date , 
                nav , 
                tnav) values (%s,%s,%s,%s) on duplicate key update 
                nav=%s,
                tnav=%s ''';
    ind = 1;
    count = len(fundcode);
    cur = conn.cursor();
    for fc in fundcode:
        log.info("start %s %s/%s" % (fc,ind,count));
        crawl = crawler();
        crawl.settranslator("xml");
        postdata = fundstart[fc];
        if start is not None:
            postdata['startdate1'] = start.strftime('%Y-%m-%d');
        header = {'Refer':req + fc};
        crawl.seturi(req + fc,postdata,header);
        crawl.setcfgfile("crawler_allfund_of_sina.xml");
        result = crawl.parse();
        log.debug("data %s %s" % (fc,result));
        
        for data in result or []:
            if(result):
                cur.execute(sql4data,(fc,data['date'])+(data['nav'],data['tnav'])*2);
            else:
                log.error("no information about %s" % fc);
        warnings = cur.fetchwarnings()
        if warnings:
            log.warn("db:" + warnings);
        conn.commit();
        log.info("done %s %s/%s" % (fc,ind,count));
	ind = ind + 1;
    
    conn.close();
    log.info('update_all_nav done');
示例#3
0
 def seturi(self,uri,params=None,headers=None):
     log.info("loading %s %s" % (uri,params));
     self.uri = uri;
     if(params is not None and type(params) == dict):
         params = urllib.urlencode(params);
     for i in range(1,self.MAX_URL_OPEN):
         try:
             opener=urllib.URLopener();
             opener.addheader("User-Agent","Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0;  Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; Tablet PC 2.0)");
             opener.addheader("Cache-Control","no-cache");
             for k,v in (headers or {}).iteritems():
                 opener.addheader(k,v);
             self.content = opener.open(uri,params).read();
             break;
         except BaseException,e:
             log.error("%s got error %s" % (i,e));
             if i < self.MAX_URL_OPEN:
                 time.sleep(5);
示例#4
0
    def parse(self):
        self.result = None;
        
        if(self.content is None):
            log.error("no content for parsing");
            return;
        if(self.cfg is None):
            log.error("no config file");
            return;
        if(self.tran is None):
            log.error("no translator");
            return;

        log.info("parsing");
        data = self._parse(self.tran.getroot(),self.cfg);
        log.info("done!");
        _writetologfile(self.uuid , json.dumps(data,ensure_ascii=False,indent=4));
        return data;
示例#5
0
文件: fund.py 项目: henry42/html2json
def update_all_nav2(fundcode=['050001'],start=None,end=datetime.now()):
    
    log.info('update_all_nav2 start');
    
    req = 'http://biz.finance.sina.com.cn/fundinfo/open/lsjz.php?fund_code=';
    

    for fc in fundcode:
        crawl = crawler();
        header = {'Refer':req + fc};
        crawl.seturi(req + fc,None,header);
        crawl.setcfgfile("crawler_allfund_of_sina.xml");
        result = crawl.parse();
        log.debug("data %s %s" % (fc,result));
        print result
    log.info('update_all_nav done');
        
    
    
示例#6
0
文件: fund.py 项目: henry42/html2json
def update_fund_info():
    
    log.info('update_fund_info start');
    
    global SQL_CONN;
    
    crawl = crawler();
    crawl.settranslator("xml");
    crawl.setcfgfile("crawler_fund_of_163.xml");
    result = crawl.parse();
    sql4info = '''INSERT INTO fund_info (
        CODE , 
        NAME , 
        manager , 
        size , 
        company , 
        birthday , 
        `type` , 
        status) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE 
        NAME=%s,
        manager=%s,
        size=%s,
        company=%s,
        birthday=%s,
        `type`=%s,
        status=%s''';
        
    sql4data = '''insert into fund_data (
                code , 
                date , 
                nav , 
                tnav) values (%s,%s,%s,%s) on duplicate key update 
                nav=%s,
                tnav=%s ''';
    conn = pydb.connect(**SQL_CONN);
    cur = conn.cursor();

    array = result.get('data',[]);
    for data in array:
        log.info("update %s %s" % (data['code'],data['name']));
        d1 = (data['code'],)+(data['name'],
                data['manager'],
                data['size'],
                data['company'],
                data['birthday'],
                data['type'],
                data['status'])*2;
        log.debug("data %s" % list(d1));
        cur.execute(sql4info,d1);
        d2=(data['code'],data['date'])+(data['nav'],data['tnav'])*2
        log.debug("data %s" % list(d2));
        cur.execute(sql4data,d2);
        warnings = cur.fetchwarnings()
        if warnings:
            log.warn("db:" + warnings);
        conn.commit();
        
    cur.close();
    conn.close();
    
    log.info('update_fund_info done');