示例#1
0
 def countRoomsForRecentDays(self, query, city, days):
     """
     get the number of available individual rooms for recent days
     :param query: the query string
     :param city: the city name specified in config.json, this will take effect only when query is None
     :param days: the number of days before now
     :return:
     """
     ret = 0
     cursor = self._conn.cursor()
     try:
         if query:
             cursor.execute(
                 "SELECT DISTINCT(room_id) FROM room_info WHERE update_time>? AND query_str=?",
                 (getDeltaTimeStamp(datetime.datetime.now(), -days), query))
         elif city:
             cursor.execute(
                 "SELECT DISTINCT(room_id) FROM room_info WHERE update_time>? AND city=?",
                 (getDeltaTimeStamp(datetime.datetime.now(), -days), city))
         else:
             Log.w(StorageService._TAG, "query and city all empty!!")
             return 0
         ret = len(cursor.fetchall())
     except BaseException as e:
         Log.w(StorageService._TAG, "countRoomsForRecentDays() failed", e)
     finally:
         cursor.close()
         self._conn.commit()
     return ret
示例#2
0
def testServer():
    configPath = "config_example.json"
    parentPath = os.environ.get('HOME')
    if not parentPath:
        print('can not fetch $HOME')
        parentPath = "."
    parentPath += "/airbnb"
    if not os.path.exists(parentPath):
        os.mkdir(parentPath, mode=0o755)

    logPath = parentPath + "/all_logs.log"
    storagePath = parentPath + "/rooms.db"
    config = parseConfigFile(configPath)
    if not config:
        sys.exit(0)
    config.localStoragePath = storagePath
    # 配置日志模块
    Log.config(logPath, echo=True)

    # 初始化service
    service = SearchService(config)

    # 准备入住日期数据
    start_date = datetime.datetime.now() + datetime.timedelta(weeks=1)
    time_title = '{:%Y-%m-%d}'.format(start_date)
    leave_dates = [start_date + datetime.timedelta(days=x) for x in [1, 2, 3]]
    checkin_date = '{:%Y-%m-%d}'.format(start_date)
    # 2020.03.22 一次尝试多种入住时间(1天,2天,3天),因为有些房源只有在1天以上入住时间下才能搜到
    checkout_dates = ['{:%Y-%m-%d}'.format(leave_date) for leave_date in leave_dates]

    # 启动查询
    service.doQuery(checkin_date, checkout_dates, time_title)
示例#3
0
def daemon_start(pid_file):
    def handle_exit(signum, _):
        if signum == signal.SIGTERM:
            sys.exit(0)
        sys.exit(1)

    signal.signal(signal.SIGINT, handle_exit)
    signal.signal(signal.SIGTERM, handle_exit)

    # fork only once because we are sure parent will exit
    pid = os.fork()
    assert pid != -1

    if pid > 0:
        # parent waits for its child
        time.sleep(5)
        sys.exit(0)

    # child signals its parent to exit
    ppid = os.getppid()
    pid = os.getpid()
    if write_pid_file(pid_file, pid) != 0:
        Log.e(MAIN_TAG, "write_pid_file() failed, pid = " + str(pid))
        os.kill(ppid, signal.SIGINT)
        sys.exit(1)

    os.setsid()
    signal.signal(signal.SIG_IGN, signal.SIGHUP)

    Log.d(MAIN_TAG, 'started')
    os.kill(ppid, signal.SIGTERM)

    sys.stdin.close()
示例#4
0
    def _sendWithAttachment(self, receivers, title, content, attachment=None):
        # 设置eamil信息
        # 添加一个MIMEmultipart类,处理正文及附件
        message = MIMEMultipart()
        message['From'] = self.sender
        message['To'] = receivers[0]
        message['Subject'] = title
        part = MIMEApplication(open(attachment, 'rb').read())
        part.add_header('Content-Disposition',
                        'attachment',
                        filename=attachment)
        # 将内容附加到邮件主体中
        message.attach(MIMEText(content, 'plain', 'utf-8'))
        message.attach(part)

        # 登录并发送
        try:
            smtpObj = smtplib.SMTP_SSL(
            ) if self.sendType == "ssl" else smtplib.SMTP()
            smtpObj.connect(self.mailHost, self.mailPort)
            smtpObj.ehlo()
            if self.sendType == "tls":
                smtpObj.starttls()
            smtpObj.login(self.sender, self.passwd)
            smtpObj.sendmail(self.sender, receivers, message.as_string())
            Log.i(MailReporter.TAG,
                  'Mail successfully sent to ' + receivers[0])
            smtpObj.quit()
        except smtplib.SMTPException as e:
            Log.e(MailReporter.TAG, 'send() error', e)
示例#5
0
 def tryGetHomeInfo(self, checkin_date, checkout_date, homeInfoCollection, adults, city):
     """
     the kernel logic for getting home information through cgi
     :param checkin_date:
     :param checkout_date:
     :param homeInfoCollection:
     :param adults:
     :param city:
     :return:
     """
     api = Api(randomize=False)
     query = getQueryStr(city)
     hasNextPage = True
     startOffset = 0
     sectionOffset = 0
     try:
         while hasNextPage:
             homes = api.get_homes(query=query, checkin=checkin_date, checkout=checkout_date,
                                   offset=startOffset, sectionOffset=sectionOffset, items_per_grid=50, adults=adults)
             pagination = self.retrieveHomeData(query, homes, homeInfoCollection)
             if not pagination:
                 hasNextPage = False
             else:
                 print("paging for next...")
                 hasNextPage = pagination["hasNextPage"]
                 startOffset = pagination.get("itemsOffset")
                 sectionOffset = pagination.get("sectionOffset")
             time.sleep(self._queryIntervalInSec)  # do not query too fast
     except BaseException as e:
         Log.w(SearchService.TAG, "get_homes() failed, ",
               str(e) + "\r\nexception detail:" + traceback.format_exc())
示例#6
0
 def _sendText(self, receivers, title, content):
     # 设置email信息
     # 邮件内容设置
     message = MIMEText(content, 'plain', 'utf-8')
     # 邮件主题
     message['Subject'] = title
     # 发送方信息
     message['From'] = self.sender
     # 接受方信息
     message['To'] = receivers[0]
     try:
         smtpObj = smtplib.SMTP_SSL(
         ) if self.sendType == "ssl" else smtplib.SMTP()
         # 连接到服务器
         smtpObj.connect(self.mailHost, self.mailPort)
         smtpObj.ehlo()
         if self.sendType == "tls":
             smtpObj.starttls()
         # 登录到服务器
         smtpObj.login(self.sender, self.passwd)
         # 发送
         smtpObj.sendmail(self.sender, receivers, message.as_string())
         # 退出
         smtpObj.quit()
         Log.i(MailReporter.TAG,
               'Mail successfully sent to ' + receivers[0])
     except smtplib.SMTPException as e:
         Log.e(MailReporter.TAG, 'send() error', e)
示例#7
0
def main():
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
    configPath = None
    parentPath = os.environ.get('HOME')
    if not parentPath:
        print('can not fetch $HOME')
        parentPath = "."
    parentPath += "/airbnb"
    if not os.path.exists(parentPath):
        os.mkdir(parentPath, mode=0o755)

    pidFilePath = parentPath + "/" + PID_FILE_NAME
    logPath = parentPath + "/all_logs.log"
    storagePath = parentPath + "/rooms.db"
    shortopts = 'h:c:d:l:s:v'  # 命令行参数简写
    longopts = [
        'help', 'storage-file=', 'log-file=', 'version', 'config-file='
    ]  # 完整体
    # 执行动作 start 后台运行,stop 结束后台已经运行的实例
    operation = None

    optlist, args = getopt.getopt(sys.argv[1:], shortopts, longopts)
    # 拿到命令行参数的key value
    for key, value in optlist:
        if key in ('-v', '--version'):
            print("version: airbnb fetcher 0.1")
            sys.exit(0)
        if key in ('-h', '--help'):
            printHelp()
            sys.exit(0)
        if key in ('-c', '--config-file'):
            configPath = value
        if key in ('-l', '--log-file'):
            logPath = value
        if key in ('-s', '--storage-file'):
            storagePath = value
        if key == '-d':
            operation = value

    # 配置日志模块
    Log.config(logPath, echo=False)
    if operation == "start":
        daemon_start(pidFilePath)
    elif operation == "stop":
        daemon_stop(pidFilePath)
        sys.exit(0)

    if not configPath:
        Log.e(MAIN_TAG, "config path not provided")
        sys.exit(0)
    config = parseConfigFile(configPath)
    if not config:
        sys.exit(0)
    config.localStoragePath = storagePath
    service = SearchService(config)
    service.start()
示例#8
0
 def saveReservationInfo(self, roomId):
     """
     mark a room as available for today
     :param roomId:
     :return:
     """
     cursor = self._conn.cursor()
     success = False
     try:
         now_date = getDateStr()
         cursor.execute(
             "INSERT INTO reservation_info (room_id, date) VALUES(?, ?)",
             (roomId, now_date))
         success = cursor.rowcount > 0
     except BaseException as e:
         Log.w(StorageService._TAG, "saveReservationInfo() failed", e)
     finally:
         cursor.close()
         self._conn.commit()
     return success
示例#9
0
 def isAvailable(self, roomId, dateStr):
     """
     query whether a room specified by roomId is available on given date
     :param roomId:
     :param dateStr: like 2019-05-30, be sure to use '{:%Y-%m-%d}'.format(datetime)
     :return: True for available that day, False otherwise
     """
     cursor = self._conn.cursor()
     ret = False
     try:
         cursor.execute(
             "SELECT * FROM reservation_info WHERE room_id=? AND date=?",
             (roomId, dateStr))
         ret = len(cursor.fetchall()) > 0
     except BaseException as e:
         Log.w(StorageService._TAG, "hasReservation() failed", e)
     finally:
         cursor.close()
         self._conn.commit()
     return ret
示例#10
0
 def prepareDB(self, path):
     conn = sqlite3.connect(path)
     cursor = conn.cursor()
     try:
         # table for room
         cursor.execute(
             "CREATE TABLE IF NOT EXISTS room_info (_id INTEGER PRIMARY KEY AUTOINCREMENT,\
                room_id INTEGER UNIQUE, person_capacity INTEGER, city VARCHAR(20), beds INTEGER, \
                localized_neighborhood VARCHAR(50), price INTEGER, pic VARCHAR(200), update_time INTEGER,\
                 query_str VARCHAR(100))")
         # table for reservation
         cursor.execute(
             "CREATE TABLE IF NOT EXISTS reservation_info (_id INTEGER PRIMARY KEY AUTOINCREMENT,\
                        room_id INTEGER, date VARCHAR(20) )")
     except BaseException as e:
         Log.e(StorageService._TAG, "prepareDB() failed", e)
     finally:
         cursor.close()
         conn.commit()
     return conn
示例#11
0
 def start(self):
     print("airbnb service start")
     while True:
         while True:
             now = datetime.datetime.now()
             if now.hour == self._config.startHour and now.minute == self._config.startMinute:
                 dates = self.prepareDate()
                 self.analyzeCollection.clear()
                 self.forExcel.clear()
                 try:
                     for item in dates:
                         self.doQuery(item[0], item[1], item[2])
                 except BaseException as e:
                     Log.e(SearchService.TAG, "doQuery() failed, ", e)
                 if len(self.analyzeCollection) > 0:
                     self.reportForAnalyzeResult(self.analyzeCollection, self.forExcel)
                 else:
                     Log.w(SearchService.TAG, "no analyze result generated")
                 time.sleep(70)
             else:
                 time.sleep(10)  # in second
示例#12
0
def write_pid_file(pid_file, pid):
    import fcntl
    import stat

    try:
        fd = os.open(pid_file, os.O_RDWR | os.O_CREAT,
                     stat.S_IRUSR | stat.S_IWUSR)
    except OSError as e:
        Log.w(MAIN_TAG, "exception for write_pid_file()", e)
        return -1
    flags = fcntl.fcntl(fd, fcntl.F_GETFD)
    assert flags != -1
    flags |= fcntl.FD_CLOEXEC
    r = fcntl.fcntl(fd, fcntl.F_SETFD, flags)
    assert r != -1
    # There is no platform independent way to implement fcntl(fd, F_SETLK, &fl)
    # via fcntl.fcntl. So use lockf instead
    try:
        fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB, 0, 0, os.SEEK_SET)
    except IOError:
        r = os.read(fd, 32)
        if r:
            Log.e(MAIN_TAG, 'already started at pid %s' % Utils.to_str(r))
        else:
            Log.e(MAIN_TAG, 'already started')
        os.close(fd)
        return -1
    os.ftruncate(fd, 0)
    os.write(fd, Utils.to_bytes(str(pid)))
    return 0
示例#13
0
 def doQuery(self, checkin_date, checkout_dates, time_title):
     """
     the entry for build and execute a home api query
     :param checkin_date: the date when u are going to check in.
     :param checkout_dates: multiple checkout dates for enumerating the home info as completable as possible
     :param time_title:
     :return:
     """
     Log.d(SearchService.TAG, "===> querying on " + getDateStr())
     storageService = StorageService(self._config.localStoragePath)
     try:
         for (cityName, cityList) in self._config.cityList.items():
             retry_time = 0
             while retry_time < 3:
                 homeInfoCollection = dict()
                 for city in cityList:
                     for checkout_date in checkout_dates:
                         for adults in [1, 2]:
                             self.tryGetHomeInfo(checkin_date, checkout_date, homeInfoCollection, adults, city)
                 if len(homeInfoCollection) > 0:
                     roomInfos = [x for (_, x) in homeInfoCollection.items()]
                     storageService.saveOrUpdateRoomBatch(roomInfos)
                     analyze, excelRet = self.performAnalyze(roomInfos, cityName, storageService, time_title)
                     self.analyzeCollection.append(analyze)
                     self.forExcel.append(excelRet)
                     break
                 else:
                     retry_time += 1
                     Log.w(SearchService.TAG, "no data for query={0}, will retry for {1}th time".format(cityName, retry_time))
                     time.sleep(self._queryIntervalForFailInSec)  # take your time to do it
     except BaseException as e:
         Log.w(SearchService.TAG, "doQuery() failed, ", str(e) + "\r\nexception detail:" + traceback.format_exc())
     finally:
         storageService.close()
示例#14
0
 def saveOrUpdateRoomBatch(self, roomInfos):
     """
     save or update information for a given room
     :param roomInfos: a list of RoomInfo
     :return:
     """
     cursor = self._conn.cursor()
     success = True
     try:
         for roomInfo in roomInfos:
             cursor.execute("SELECT room_id FROM room_info WHERE room_id=?",
                            (roomInfo.roomId, ))
             if len(cursor.fetchall()) > 0:
                 # exist
                 cursor.execute(
                     "UPDATE room_info SET person_capacity=?, city=?, beds=?,\
                          localized_neighborhood=?, price=?, pic=?, update_time=?, query_str=? WHERE room_id=?",
                     (roomInfo.personCapacity, roomInfo.city, roomInfo.beds,
                      roomInfo.neighbourhood, int(
                          roomInfo.price), roomInfo.pic, getNowTimeStamp(),
                      roomInfo.query, roomInfo.roomId))
             else:
                 # new
                 cursor.execute(
                     "INSERT INTO room_info (room_id, person_capacity, city, beds, localized_neighborhood, \
                         price, pic, update_time, query_str) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                     (roomInfo.roomId, roomInfo.personCapacity,
                      roomInfo.city, roomInfo.beds,
                      roomInfo.neighbourhood, roomInfo.price, roomInfo.pic,
                      getNowTimeStamp(), roomInfo.query))
             success = cursor.rowcount > 0 and success
     except BaseException as e:
         success = False
         Log.w(StorageService._TAG, "saveOrUpdateRoomBatch() failed", e)
     finally:
         cursor.close()
         self._conn.commit()
     return success
示例#15
0
 def getRoomById(self, roomId):
     """
     query full information for a given roomId
     :param roomId:
     :return: a RoomInfo with given roomId if found, None if not
     """
     cursor = self._conn.cursor()
     ret = None
     try:
         cursor.execute(
             "SELECT room_id, person_capacity, city, beds, localized_neighborhood, \
                     price, pic, query_str FROM room_info WHERE room_id=?",
             (roomId, ))
         values = cursor.fetchall()
         if len(values) > 0:
             ret = RoomInfo(values[0][0], values[0][1], values[0][2],
                            values[0][3], values[0][4], values[0][5],
                            values[0][6], values[0][7])
     except BaseException as e:
         Log.w(StorageService._TAG, "getRoomById() failed", e)
     finally:
         cursor.close()
         self._conn.commit()
     return ret
示例#16
0
    def HttpGetRequest(host, url, decode='utf-8'):
        """HTTP/HTTPS GET 方法"""
        try:
            if host.startswith("https://"):
                connect = httplib.HTTPSConnection
                host = host[8:]
            elif host.startswith("http://"):
                connect = httplib.HTTPConnection
                host = host[7:]
            else:
                connect = httplib.HTTPConnection

            with closing(connect(host)) as conn:
                conn.request(method="GET", url=url)
                response = conn.getresponse()
                if response.status != httplib.OK:
                    return None

                res = response.read()
                res = res.decode(decode, 'ignore').strip()

                return res
        except Exception as e:
            Log.d(TAG, e)
示例#17
0
    def retrieveHomeData(self, query, originReturn, homeInfoCollection):
        '''
        pagenationMeta is like
         "paginationMetadata":{
                        "__typename":"DoraExploreV3PaginationMetadata",
                        "hasNextPage":true,
                        "itemsOffset":40,
                        "sectionOffset":3,
                        "hasPreviousPage":true,
                        "previousPageSectionOffset":0,
                        "previousPageItemsOffset":0,
                        "searchSessionId":"d91045aa-0d95-49ba-ad44-0d7bcf8b2813",
                        "pageLimit":20,
                        "totalCount":"124"
                    }
        '''
        Log.d(SearchService.TAG, "analyzing " + query)
        sections = originReturn["data"]["dora"]["exploreV3"]["filters"]["sections"]
        pagination = originReturn["data"]["dora"]["exploreV3"]["metadata"]["paginationMetadata"]
        if sections and len(sections) > 0:
            # if pagination:
            #     print("has_next_page = " + str(pagination["has_next_page"]))
            #     print("items_offset = " + str(pagination.get("items_offset")))
            #     print("section_offset = " + str(pagination.get("section_offset")))
            for item in sections:
                if item and item["__typename"] == "DoraExploreV3ListingsSection":
                    real_homes = item["items"]
                    Log.d(SearchService.TAG, str(len(real_homes)) + " rooms for " + query)
                    for roomItem in real_homes:
                        roomInfo = RoomInfo.parseFromDict(roomItem, query)
                        if roomInfo:
                            homeInfoCollection[roomInfo.roomId] = roomInfo

        else:
            Log.w(SearchService.TAG, "explore_tabs is empty")
        return pagination
示例#18
0
def parseConfigFile(path):
    '''
    example for a valid config file, should be in json format

    {
        "start_hour":14,
        "start_minute":0,
        "counting_days":30,
        "room_limit":0,
        "smtp_host":"smtp.exmail.qq.com",
        "smtp_port":465,
        "email_receiver":"*****@*****.**",
        "email_sender":"*****@*****.**",
        "sender_passwd":"***",
        "query":[
            "上海,徐汇区,中国",
            "上海,虹口区,中国",
            "上海,浦东新区,中国",
            "上海,普陀区,中国",
            "上海,长宁区,中国",
            "上海,闸北区,中国",
            "上海,杨浦区,中国",
            "上海,黄浦区,中国",
            "上海,卢湾区,中国",
            "上海,静安区,中国",
            "上海,宝山区,中国",
            "上海,闵行区,中国",
            "上海,嘉定区,中国",
            "上海,金山区,中国",
            "上海,松江区,中国",
            "上海,青浦区,中国",
            "上海,南汇区,中国",
            "上海,奉贤区,中国"
        ]
    }

    :param path:
    :return:
    '''
    try:
        f = open(path, "r", encoding='utf-8')
        jsonStr = f.read()
        configDict = json.loads(jsonStr.replace("\\n", ""))
        hour = configDict.get("start_hour")
        minute = configDict.get("start_minute")
        counting_days = configDict.get("counting_days")
        room_limit = configDict.get("room_limit")
        email_receiver = configDict.get("email_receiver")
        email_sender = configDict.get("email_sender")
        sender_passwd = configDict.get("sender_passwd")
        query = configDict.get("query")
        smtp_host = configDict.get("smtp_host")
        smtp_port = configDict.get("smtp_port")
        send_type = configDict.get("send_type")
        cities = dict()
        for item in query:
            splited = item.split(",")
            if len(splited) < 2:
                continue
            content = cities.get(splited[0])
            if not content:
                content = list()
                cities[splited[0]] = content
            content.append(splited)
        return Config(hour,
                      minute,
                      cities,
                      email_receiver,
                      email_sender,
                      sender_passwd,
                      room_limit,
                      countingDays=counting_days,
                      smtpHost=smtp_host,
                      smtpPort=smtp_port,
                      sendType=send_type)
    except json.decoder.JSONDecodeError as e:
        Log.w(MAIN_TAG, "parseConfigFile() fail, ", e)
    return None
示例#19
0
def daemon_stop(pid_file):
    import errno
    try:
        with open(pid_file) as f:
            buf = f.read()
            pid = Utils.to_str(buf)
            if not buf:
                Log.e(MAIN_TAG, 'not running')
    except IOError as e:
        Log.e(MAIN_TAG, "daemon_stop() fail", e)
        if e.errno == errno.ENOENT:
            # always exit 0 if we are sure daemon is not running
            Log.e(MAIN_TAG, 'not running')
            return
        sys.exit(1)
    pid = int(pid)
    if pid > 0:
        try:
            os.kill(pid, signal.SIGTERM)
        except OSError as e:
            if e.errno == errno.ESRCH:
                Log.e(MAIN_TAG, 'not running')
                # always exit 0 if we are sure daemon is not running
                return
            Log.e(MAIN_TAG, "daemon_stop() fail", e)
            sys.exit(1)
    else:
        Log.e(MAIN_TAG, 'pid is not positive: ' + str(pid))

    # sleep for maximum 10s
    for i in range(0, 200):
        try:
            # query for the pid
            os.kill(pid, 0)
        except OSError as e:
            if e.errno == errno.ESRCH:
                break
        time.sleep(0.05)
    else:
        Log.e(MAIN_TAG, 'timed out when stopping pid ' + str(pid))
        sys.exit(1)
    Log.d(MAIN_TAG, 'stopped')
    os.unlink(pid_file)
示例#20
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 测试日志辅助类

import sys
sys.path.append("../")
from LogHelper import Log

TAG = "Test"

Log.d(TAG, "Hello")
Log.e(TAG, "World!")
Log.i(TAG, "Hehe")
Log.w(TAG, "XXXX")