Exemplo n.º 1
0
def getGeoLocation():
    url = cfg.getProperty("baiduapi", "geo_url")
    fout = open("D:/shequloc.txt", "a")
    with open('D:/shequ.txt', 'r') as f:
        loop = 0
        for line in f:
            addr = line.rstrip()
            lng, lat = geoinfo(url, addr)
            print lng, lat
            fout.write(addr + ',' + lng + ',' + lat + '\n')
            loop = loop + 1
            if loop % 20 == 0 :
                fout.flush()
                time.sleep(random.randint(2, 5))
    fout.close()
Exemplo n.º 2
0
def getGeoLocation():
    url = cfg.getProperty("baiduapi", "geo_url")
    fout = open("D:/shequloc.txt", "a")
    with open('D:/shequ.txt', 'r') as f:
        loop = 0
        for line in f:
            addr = line.rstrip()
            lng, lat = geoinfo(url, addr)
            print lng, lat
            fout.write(addr + ',' + lng + ',' + lat + '\n')
            loop = loop + 1
            if loop % 20 == 0:
                fout.flush()
                time.sleep(random.randint(2, 5))
    fout.close()
Exemplo n.º 3
0
def get_db():
    print cfg.getProperty("database", "DB_HOST")
    client = MongoClient(cfg.getProperty("database", "DB_HOST"),
                         int(cfg.getProperty("database", "DB_PORT")))
    db = client[cfg.getProperty("database", "DB_NAME")]
    return db
Exemplo n.º 4
0

def write_to_excel(sheet, datalist):
    fn = 'D:/shequ_' + sheet + '.xls'
    wb = excel.Workbook()

    sheet = wb.add_sheet(sheet.decode('utf-8'))
    size = len(datalist)
    for i in range(size):
        for j in range(3):
            try:
                sheet.write(i, j, datalist[i][j])
            except:
                print i, j
    wb.save(fn)


def test():
    for i in range(3):
        print random.randint(1, 5)
    d = '浦东'
    print d.decode('GBK').encode('utf-8')
    print d.decode('utf-8')


if __name__ == '__main__':
    url = cfg.getProperty("soufun", "url")
    crawlerjob(url)
#     crawler_pages(u'浦东', 'http://esf.sh.soufun.com/housing/25__0_0_0_0_1_0_0/')
#     test()
Exemplo n.º 5
0
        response.close()
        gzipped = response.headers.get('Content-Encoding')
        if gzipped:
            data = StringIO.StringIO(data)
            gzipper = gzip.GzipFile(fileobj=data)
            html = gzipper.read()
            gzipper.close()
        else:
            html = data
        html = html.decode('GBK').encode('utf-8')
        return html
    except Exception, e:
        print "No content for ", url, e
    return None

def test():
    for i in range(3):
        print random.randint(1, 5)
    d = '浦东'
    print d.decode('GBK').encode('utf-8')
    print d.decode('utf-8')
        
if __name__ == '__main__':
    url = cfg.getProperty("soufun", "url")
    crawlerjob(url)
#     crawler_pages(u'浦东', 'http://esf.sh.soufun.com/housing/25__0_0_0_0_1_0_0/')
#     test()
    


Exemplo n.º 6
0
def get_db():
    print cfg.getProperty("database", "DB_HOST");
    client = MongoClient(cfg.getProperty("database", "DB_HOST"), int(cfg.getProperty("database", "DB_PORT")))
    db = client[cfg.getProperty("database", "DB_NAME")]
    return db