def __init__(self, type): self.config = ConfigProvider('LianJiaConfig.cfg') self.is_csv = self.config.get('data', 'csv') self.is_db = self.config.get('data', 'db') self.type = type if self.is_db == 'ON': self.con, self.cursor = self.prepare() self.dbinit()
def __init__(self, typeName, configName): self.type = typeName self.config = ConfigProvider(configName) if self.type == 'sale': self.link = '.lianjia.com/ershoufang/' elif self.type == 'deal': self.link = '.lianjia.com/chengjiao/' else: self.link = '' logging.warning('输入类型错误,需要为sale或者deal')
class ChatManager(): idsDic = {} aliasDic = {} config = {} listeners = {} def addIRCChannelToDictionary(self, channel): self.idsDic[channel] = {'type':'irc', 'channel':channel, 'alias':'#' + channel} self.aliasDic[channel] = {'type':'irc', 'channel':channel, 'alias':'#' + channel} def addSkypeChannelToDictionary(self, channel): channel['type'] = "skype" channel['alias'] = "&" + channel['name'] self.idsDic[channel['channel']] = channel self.aliasDic['&' + channel['name']] = channel def __init__(self): if (self.idsDic == {}): self.configProvider = ConfigProvider() skypesection = self.configProvider.getConfigSection('skype') ircsection = self.configProvider.getConfigSection('irc') for channel in ircsection['channels']: self.addIRCChannelToDictionary(channel) for channel in skypesection['channels']: self.addSkypeChannelToDictionary(channel) def getChanelByAlias(self, alias): return self.aliasDic.get(alias, None) def getChanelById(self, Id): return self.idsDic.get(Id, None) def getAliases(self): return self.aliasDic.keys() def getIds(self): return self.idsDic.keys() def isListening(self, channel1, channel2): if(self.listeners.keys().count(channel1) and self.listeners[channel1].count(channel2)): return True def turnListenOn(self, channel1, channel2): aliases = self.getAliases() if(self.getAliases().count(channel1) and self.getAliases().count(channel2) and channel1!=channel2): if(self.isListening(channel1,channel2) != True ): if self.listeners.get(channel1, False) == False: self.listeners[channel1] = [] self.listeners[channel1].append(channel2) return self.listeners elif(channel1 == channel2): return "nonono" else: return "already listening" def turnListenOff(self, channel1, channel2): if(self.isListening(channel1, channel2)): self.listeners[channel1].remove(channel2) return self.listeners
class UrlsPro: def __init__(self, typeName, configName): self.type = typeName self.config = ConfigProvider(configName) if self.type == 'sale': self.link = '.lianjia.com/ershoufang/' elif self.type == 'deal': self.link = '.lianjia.com/chengjiao/' else: self.link = '' logging.warning('输入类型错误,需要为sale或者deal') # 生成第一个请求url,目的在于获取页数 def getFirstUrls(self): city = self.config.get(self.type, 'city') areas = self.config.get(self.type, 'area').split('/') if self.link == '' or city == '' or areas == '': return [] urls = [] for area in areas: urls.append('https://' + city + self.link + area + '/') return urls # 若选择多城市爬取,生成各城市请求url def getMultiCityUrls(self): urls = [] if self.isMulti(): cities = self.config.get(self.type, 'multi_city').split('/') if cities == '': return urls for city in cities: urls.append('https://' + city + self.link) return urls def getMinPage(self): page = self.config.get(self.type, 'min_page') if page is None: return 0 return int(page) def isMulti(self): multi = self.config.get(self.type, 'enable_multi') if multi == 'True': return True else: return False def getAllUrls(self): urls = self.getFirstUrls() urls.extend(self.getMultiCityUrls()) return urls
def __init__(self): if(self.__state): self.__dict__ = self.__state else: self.configProvider = ConfigProvider() self.gatesProvider = GatesProvider() self.chatMgr = ChatManager()
def __init__(self): if (self.idsDic == {}): self.configProvider = ConfigProvider() skypesection = self.configProvider.getConfigSection('skype') ircsection = self.configProvider.getConfigSection('irc') for channel in ircsection['channels']: self.addIRCChannelToDictionary(channel) for channel in skypesection['channels']: self.addSkypeChannelToDictionary(channel)
class InterChatDispatcher(): __state = {} def __init__(self): if(self.__state): self.__dict__ = self.__state else: self.configProvider = ConfigProvider() self.gatesProvider = GatesProvider() self.chatMgr = ChatManager() def launch(self): #launch irc irc_config = self.configProvider.getConfigSection('irc'); self.irc_thread = IRCThread( irc_config['host'], irc_config['port'], irc_config['nick'], irc_config['channels'], self) self.connexion = self.irc_thread.getConnexion() self.irc_thread.start() self.skype_thread = InterChatSkypeThread(self) self.skype_thread.start() def dispatchIRCCommand(self, nick, message, channel): print "Message: "+message+" From " + channel return "oui" def dispatchMessageToChannel(self, channel, message): channelTo = self.chatMgr.getChanelByAlias(channel) if(channelTo['type'] == 'irc'): self.irc_thread.writeToChannel(message, channelTo['channel']) if(channelTo['type'] == 'skype'): self.skype_thread.writeToChannel(message, channelTo['channel']) def messageToChannel(self, channel, message, nick=""): parts = message.split(" ") if(len(parts) > 1 and self.chatMgr.getChanelByAlias(parts[1])): channelTo = self.chatMgr.getChanelByAlias(parts[1]) body = message.replace("!tele "+parts[1],"") rmessage = "tele from {0}@{1}: {2}".format(nick, channel, body) if(channelTo['type'] == 'irc'): self.irc_thread.writeToChannel(rmessage, channelTo['channel']) elif(channelTo['type'] == 'skype'): self.skype_thread.writeToChannel(rmessage, channelTo['channel']) def helpToChannel(self, channel, message): with open('config/help.txt','r') as f: for i,l in enumerate(f): self.dispatchMessageToChannel(channel, l) def getChannelList(self): return self.chatMgr.getAliases() def dispatchMessage(self, chatname, sendername, body): # print body channel = self.chatMgr.getChanelById(chatname) if( sendername in ['pdobot', 'ictelecom'] ): return False; for channelTo in self.chatMgr.getAliases(): channelToFull = self.chatMgr.getChanelByAlias(channelTo) if(self.chatMgr.isListening(channelTo, channel['alias'])): print "BINGO" if (channelToFull['type'] == 'irc'): message = "{0}{1}: {2}".format(sendername, channel['alias'], body) self.irc_thread.writeToChannel(message, channelTo) return False def listenChatOn(self, channel1, channel2): return self.chatMgr.turnListenOn(channel1, channel2) def listenChatOff(self, channel1, channel2): return self.chatMgr.turnListenOff(channel1, channel2)
class DataTo: def __init__(self, type): self.config = ConfigProvider('LianJiaConfig.cfg') self.is_csv = self.config.get('data', 'csv') self.is_db = self.config.get('data', 'db') self.type = type if self.is_db == 'ON': self.con, self.cursor = self.prepare() self.dbinit() # 建立数据库连接,进行初始化工作,返回数据库连接 def prepare(self): con = pymysql.connect(host=self.config.get('DB', 'host'), user=self.config.get('DB', 'user'), passwd=self.config.get('DB', 'password'), port=int(self.config.get('DB', 'port')), charset='utf8') cursor = con.cursor() return con, cursor def dbinit(self): try: # 建立数据库与表 self.cursor.execute( 'CREATE DATABASE IF NOT EXISTS HOUSECRAWL DEFAULT CHARSET utf8 COLLATE utf8_general_ci' ) self.cursor.execute('USE HOUSECRAWL') createSale = """CREATE TABLE IF NOT EXISTS `sale`( `house_id` VARCHAR(20), `city` VARCHAR(20), `district` VARCHAR(30), `title` VARCHAR(200) NOT NULL, `area` VARCHAR(100) NOT NULL, `description` VARCHAR(200) NOT NULL, `attention` VARCHAR(50) NOT NULL, `putdate` VARCHAR(50) NOT NULL, `totalprice` FLOAT, `unitprice` FLOAT, PRIMARY KEY (`house_id`))ENGINE=InnoDB DEFAULT CHARSET=utf8""" createDeal = """CREATE TABLE IF NOT EXISTS `deal`( `house_id` VARCHAR(20), `city` VARCHAR(20), `district` VARCHAR(30), `title` VARCHAR(100) NOT NULL, `description` VARCHAR(150) NOT NULL, `saletime` VARCHAR(20) NOT NULL, `dealdate` VARCHAR(20) NOT NULL, `saleprice` FLOAT, `dealprice` FLOAT, `unitprice` FLOAT, PRIMARY KEY (`house_id`))ENGINE=InnoDB DEFAULT CHARSET=utf8""" self.cursor.execute(createSale) self.cursor.execute(createDeal) self.con.commit() except: self.con.rollback() def insert(self, items): # 在售二手房插入sql,利用on duplicate key update 去重,注意类型必须均为%s insertSale = """insert into sale(house_id,city,district,title,area,description,attention,putdate,totalprice,unitprice) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) on duplicate key update city=VALUES(city),district=VALUES(district),title=VALUES(title),area=VALUES(area),description=VALUES(area), attention=VALUES(attention),putdate=VALUES(putdate),totalprice=VALUES(totalprice),unitprice=VALUES(unitprice)""" # 成交二手房插入sql insertDeal = """insert into deal(house_id,city,district,title,description,saletime,dealdate,saleprice,dealprice,unitprice) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) on duplicate key update city=VALUES(city),district=VALUES(district),title=VALUES(title),description=VALUES(description), saletime=VALUES(saletime),dealdate=VALUES(dealdate),saleprice=VALUES(saleprice),dealprice=VALUES(dealprice), unitprice=VALUES(unitprice)""" try: if self.type == 'sale': self.cursor.executemany(insertSale, items) if self.type == 'deal': self.cursor.executemany(insertDeal, items) self.con.commit() except: self.con.rollback() # 返回csv列名 def getName(self): if self.type == 'sale': return ["标题", "区域", "描述", "关注信息", "总价(万)", "单价(元/平方米)"] if self.type == 'deal': return [ "标题", "描述", "成交周期", "成交日期", "挂牌价(万)", "成交价(万)", "单价(元/平方米)" ]