def originalCode(): core.GetHouseByRegionlist(city, regionlist) core.GetRentByRegionlist(city, regionlist) # Init,scrapy celllist and insert database; could run only 1st time core.GetCommunityByRegionlist(city, regionlist) communitylist = get_communitylist(city) # Read celllist from database core.GetSellByCommunitylist(city, communitylist)
def get_community_worker(queue, city): while True: try: reg = queue.get_nowait() print(reg) except: return logging.info("Processing" + reg + "" + city) core.GetCommunityByRegionlist(city, [reg])
def testUpsertDB(): hisprice_data_source = [] # info_dict = {'houseID': "121", 'totalPrice': '12110'} info_dict = {"id":121, "title":u'京基御景华城', "link":'https://sz.lianjia.com/xiaoqu/12221', 'district': u'福田区', 'bizcircle': u'赤尾1', 'tagList': u'近地铁7号线赤尾站'\ ,'onsale':'1', 'year':'' } # hisprice_data_source.append( # {"houseID": info_dict["houseID"], "totalPrice": info_dict["totalPrice"]}) # model.Hisprice.insert_many( # hisprice_data_source).upsert().execute() # model.Hisprice.insert_many(hisprice_data_source).on_conflict(conflict_target=[model.Hisprice.houseID,model.Hisprice.totalPrice], preserve=[model.Hisprice.totalPrice],update={}).execute() model.Community.insert(info_dict).on_conflict(conflict_target=[model.Community.id], preserve=[model.Community.title, model.Community.link, model.Community.district, \ model.Community.bizcircle, model.Community.tagList, model.Community.onsale, model.Community.onrent, model.Community.year, \ model.Community.housetype, model.Community.cost, model.Community.service, \ model.Community.company, model.Community.building_num, model.Community.house_num, \ model.Community.price, model.Community.city],update={}).execute() if __name__ == "__main__": # originalCode() regionlist = settings.REGIONLIST # only pinyin support city = settings.CITY model.database_init() core.GetCommunityByRegionlist(city, regionlist) communitylist = get_communitylist(city) # print communitylist core.GetHouseByCommunitylist(city, communitylist) core.GetSellByCommunitylist(city, communitylist) core.GetRentByCommunitylist(city, communitylist) # testUpsertDB()
import core import model import settings def get_communitylist(): res = [] for community in model.Community.select().where( model.Community.onsale > 0): res.append(community.title) return res if __name__ == "__main__": regionlist = settings.REGIONLIST # only pinyin support model.database_init() core.GetCommunityByRegionlist( regionlist ) # Init,scrapy celllist and insert database; could run only 1st time communitylist = get_communitylist() # Read celllist from database core.GetHouseByCommunitylist(communitylist) core.GetRentByCommunitylist(communitylist) core.GetSellByCommunitylist(communitylist)
# model是数据库模型。 model.database_init() """ core是核心爬虫模块。 """ """ 根据行政区来爬虫在售房源信息, 返回regionlist里面所有在售房源信息。 由于链家限制,仅支持爬前100页数据,可使用GetHouseByCommunitylist。 """ # core.GetHouseByRegionlist(city, regionlist) """ 获取行政区在租房源信息 """ # core.GetRentByRegionlist(city, regionlist) # 获取在租房子信息 """ 获取行政区内小区信息,可以只运行一次即可。 """ # Init,scrapy celllist and insert database; could run only 1st time core.GetCommunityByRegionlist(city, regionlist) # 根据行政区列表获取小区信息 """ 根据小区来爬虫成交房源信息,返回communitylist里面所有成交房源信息。 部分数据无法显示因为这些数据仅在链家app显示 """ communitylist = get_communitylist(city) print(communitylist) list = ['东荟城', '金色梦想'] # core.GetHouseByCommunitylist(city, list) # core.GetSellByCommunitylist(city, list) # 成交房源信息
if __name__ == "__main__": ret = mysql_status() if ret != 0: print('mysql start failed.') sys.exit() regionlist = settings.REGIONLIST # only pinyin support city = settings.CITY if args.initDatabase: model.database_init() # create_tables: 执行一次即可 # Init,scrapy celllist and insert database; could run only 1st time if args.updateCommunity: core.GetCommunityByRegionlist(city, regionlist) # 获取小区列表写入表community communitylist = get_communitylist(city) # Read celllist from database # for community in communitylist: # logging.info("%s", community) if args.isDebug: # dump_db('ershoufang') # core.get_sell_percommunity(city, communitylist[0]) # core.get_house_percommunity(city, communitylist[0]) # core.get_community_perregion(city, 'chaoyang') # core.get_house_perregion(city, 'chaoyang') # core.get_rent_percommunity(city, 'chaoyang') sys.exit()
import core import model import settings def get_communitylist(): res = [] for community in model.Community.select(): res.append(community.title) return res if __name__=="__main__": regionlist = settings.REGIONLIST # only pinyin support model.database_init() # only run on the first time # ByRegionlist cant not get all data because linajie only display 100 pages # core.GetHouseByRegionlist(regionlist) # core.GetRentByRegionlist(regionlist) # Init,scrapy celllist and insert database; could run only 1st time core.GetCommunityByRegionlist(regionlist) # Read celllist from database communitylist = get_communitylist() # history sell core.GetSellByCommunitylist(communitylist) # on sell core.GetHouseByCommunitylist(communitylist) # Rent core.GetRentByCommunitylist(communitylist)