示例#1
0
def crawler_req(visa_type, place):
    try:
        # prepare session
        sess = session_op.get_session(visa_type, place)
        if not sess:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "No Session"))
            return
        cookies = copy.deepcopy(g.COOKIES)
        cookies["sid"] = sess
        # send request
        r = requests.get(g.CANCEL_URI, headers=g.HEADERS, cookies=cookies, proxies=g.value("proxies", None))
        if r.status_code != 200:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        # parse HTML
        page = r.text
        date = get_date(page)
        if not date:
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        elif date == (0, 0, 0):
            logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Date Not Found"))
            last_status = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
            if last_status != (0, 0, 0): 
                session_op.replace_session(visa_type, place, sess)
            elif not check_alive(page):
                logger.warning("%s, %s, FAILED, %s" % (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
            return
        logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date))
        g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
示例#2
0
    def check_crawler_server_connection():
        """ Check the connection of all the crawler server.
            Update the current crawler server in use.
        """
        if G.value('checking_crawler_connection', False):
            return

        G.assign('checking_crawler_connection', True)
        crawler_path = G.value('crawler_path', None)
        previous_crawler_node = G.value('current_crawler_node', '')

        if crawler_path is None or not os.path.exists(crawler_path):
            LOGGER.warning(
                'GlobalVar crawler file path is not found or path not valid.')
            G.assign('checking_crawler_connection', False)
            return

        with open(crawler_path) as f:
            crawler_server_lst = [line.strip() for line in f.readlines()]

        for crawler_node in crawler_server_lst:
            try:
                res = requests.get(crawler_node, timeout=5)
                if res.status_code == 200 and previous_crawler_node != crawler_node:
                    G.assign('current_crawler_node', crawler_node)
                    LOGGER.warning('Choose crawler node: %s', crawler_node)
                    G.assign('checking_crawler_connection', False)
                    return
            except Exception:
                pass

        LOGGER.error('All crawler servers fail!')
        G.assign('checking_crawler_connection', False)
示例#3
0
def crawler_req(visa_type, place, start_time, requests):
    try:
        # prepare session
        sess = session_op.get_session(visa_type, place)
        if not sess:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "No Session"))
            return
        refresh_endpoint = g.value(
            "crawler_node", "") + "/refresh/?session=" + sess
        try:
            r = requests.get(refresh_endpoint, timeout=7,
                             proxies=g.value("proxies", None))
        except:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "Endpoint Timeout"))
            check_crawler_node()
            return
        if r.status_code != 200:
            logger.warning("%s, %s, %s, FAILED, %s" % (
                start_time, visa_type, place, "Endpoint Inaccessible"))
            check_crawler_node()
            return
        result = r.json()
        if result["code"] > 0:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, place, "Session Expired"))
            session_op.replace_session(visa_type, place, sess)
            return
        date = tuple(map(int, result["msg"].split("-")))
        logger.info("%s, %s, %s, SUCCESS, %s" %
                    (start_time, visa_type, place, date))
        g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
示例#4
0
def crawler(visa_type, places):
    localtime = time.localtime()
    s = {'time': time.strftime('%Y/%m/%d %H:%M:%S', localtime)}
    cur = time.strftime('%Y/%m/%d', time.localtime())
    cur_time = time.strftime('%H:%M:%S', time.localtime())
    pool = []
    req = g.value(visa_type + "_req", requests.Session())
    for place in places:
        t = threading.Thread(
            target=crawler_req,
            args=(visa_type, place, cur_time, req)
        )
        t.start()
        pool.append(t)
    for t in pool:
        t.join()

    # write to file
    for place in places:
        n = place + '-' + cur
        n2 = place + '2-' + cur
        y, m, d = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
        s[n] = s[n2] = '{}/{}/{}'.format(y, m, d) if y > 0 else "/"
        if s[n] != '/':
            path = visa_type + '/' + n.replace('-', '/')
            os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True)
            time_hm = time.strftime('%H:%M', localtime)
            open(path, 'a+').write(time_hm + ' ' + s[n] + '\n')
    merge('../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' %
          visa_type.lower(), s, cur, visa_type)
示例#5
0
def check_crawler_node():
    if g.value("crawler_checking", False):
        return
    g.assign("crawler_checking", True)
    crawler_filepath = g.value("crawler_path", None)
    last_node = g.value("crawler_node", "")
    if not crawler_filepath:
        logger.warning("Crawler file not found")
        g.assign("crawler_checking", False)
        return
    with open(crawler_filepath, "r") as f:
        nodes = list(f.readlines())
    for node in nodes:
        node = node.strip()
        try:
            r = requests.get(node, timeout=5)
            if r.status_code == 200:
                if last_node != node:
                    g.assign("crawler_node", node)
                    logger.warning("Choose Crawler Node: " + node)
                g.assign("crawler_checking", False)
                return
        except:
            pass
    logger.error("All Crawler Nodes Failed")
    g.assign("crawler_checking", False)
示例#6
0
 def get_session(self, visa_type, place):
     # get a session given visa type and place. return None if failed.
     session = g.value("session", {})
     if visa_type not in session or place not in session[visa_type]:
         return None
     idx = g.value("idx_%s_%s" % (visa_type, place), 0)
     sess_list = session[visa_type][place]
     if len(sess_list) == 0:
         return None
     sess = sess_list[idx % len(sess_list)]
     logger.debug("session: " + sess)
     g.assign("idx_%s_%s" % (visa_type, place), idx + 1)
     return sess
示例#7
0
def merge(fn, s, cur, visa_type):
    status = g.value("merge_lock" + visa_type, 0)
    if status == 1:
        return
    g.assign("merge_lock" + visa_type, 1)
    orig = json.loads(open(fn).read()) if os.path.exists(fn) else {}
    open(fn.replace('.json', '-last.json'),
         'w').write(json.dumps(orig, ensure_ascii=False))
    last = copy.deepcopy(orig)
    for k in s:
        if '2-' in k:
            orig[k] = min_date(orig.get(k, '/'), s[k])
        else:
            orig[k] = s[k]
    if cur not in orig.get('index', []):
        orig['index'] = [cur] + orig.get('index', [])
    orig['index'], o = orig['index'][:50], orig['index'][50:]
    rmkeys = [i for i in orig if i.split('-')[-1] in o]
    for r in rmkeys:
        orig.pop(r)
    open(fn, 'w').write(json.dumps(orig, ensure_ascii=False))
    g.assign("merge_lock" + visa_type, 0)
    subprocess.check_call([
        'python3', 'notify.py', '--type', visa_type, '--js',
        json.dumps(orig, ensure_ascii=False), '--last_js',
        json.dumps(last, ensure_ascii=False)
    ])
示例#8
0
def set_fetching_interval(visa_type: str,
                          location: str,
                          sys: str,
                          interval_sec: int,
                          first_run: bool = True):
    """ Execute the fetching function every `interval` seconds
        https://stackoverflow.com/questions/2697039/python-equivalent-of-setinterval
    """
    def function_wrapper():
        set_fetching_interval(visa_type,
                              location,
                              sys,
                              interval_sec,
                              first_run=False)
        VisaFetcher.fetch_visa_status(
            visa_type, location,
            G.value(f'{visa_type}_requests_Session', requests.Session()))

    emb = G.USEmbassy.get_embassy_by_loc(location)
    now_minute = datetime.now().minute
    if sys == 'cgi' and visa_type == "F" and 47 <= now_minute < 49 and emb.region == 'DOMESTIC' and emb.code not in [
            'hk', 'hkr', 'tp'
    ]:
        interval = 5
    else:
        interval = interval_sec

    fetching_thread = threading.Timer(interval, function_wrapper)
    fetching_thread.start()

    if first_run:  # execute fecthing without waiting for the first time.
        VisaFetcher.fetch_visa_status(
            visa_type, location,
            G.value(f'{visa_type}_requests_Session', requests.Session()))
    return fetching_thread
示例#9
0
def start_threads():
    """ Start the threads for fetching data from crawler server."""
    LOGGER.info('Setting up crawler node...')
    VisaFetcher.check_crawler_server_connection()

    LOGGER.info('Starting threads...')
    LOGGER.info('Setting up session update consumer...')
    session_update_consumer = threading.Thread(
        target=VisaFetcher.consume_new_session_request)
    session_update_consumer.start()

    LOGGER.info('Setting interval for fetching visa status...')
    sys = G.value('target_system', None)
    thread_pool = []
    for visa_type, interval_sec in G.FETCH_TIME_INTERVAL[sys].items():
        for location in G.SYS_LOCATION[sys]:
            if location[-1] == 'u' and sys == 'cgi' and visa_type != 'F':
                continue
            thread_pool.append(
                set_fetching_interval(visa_type, location, sys, interval_sec))
    LOGGER.info('Fetching threads start, %s threads in total',
                len(thread_pool))

    for thread in thread_pool:
        thread.join()
示例#10
0
def crawler(visa_type, places):
    open(visa_type + '_state', 'w').write('1')
    localtime = time.localtime()
    s = {'time': time.strftime('%Y/%m/%d %H:%M:%S', localtime)}
    second = localtime.tm_sec
    cur = time.strftime('%Y/%m/%d', time.localtime())
    pool = []
    for place in places:
        t = threading.Thread(
            target=crawler_req, 
            args=(visa_type, place)
        )
        t.start()
        pool.append(t)
    for t in pool:
        t.join()

    # write to file
    for place in places:
        n = place + '-' + cur
        n2 = place + '2-' + cur
        y, m, d = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
        s[n] = s[n2] = '{}/{}/{}'.format(y, m, d) if y > 0 else "/"
        if s[n] != '/':
            path = visa_type + '/' + n.replace('-', '/')
            os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True)
            time_hm = time.strftime('%H:%M', localtime)
            open(path, 'a+').write(time_hm + ' ' + s[n] + '\n')
    merge('../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' % visa_type.lower(), s, cur)
    open(visa_type + '_state', 'w').write('0')
    os.system('python3 notify.py --type ' + visa_type + ' &')
示例#11
0
 def get_session_count(self, visa_type, place):
     session_list = g.value("session", {})
     if visa_type not in session_list:
         session_list[visa_type] = {}
     if place not in session_list[visa_type]:
         session_list[visa_type][place] = []
     return len(session_list[visa_type][place])
示例#12
0
def get_session_count(visa_type, place):
    session_list = g.value("session", {})
    if not visa_type in session_list:
        session_list[visa_type] = {}
    if not place in session_list[visa_type]:
        session_list[visa_type][place] = []
    return len(session_list[visa_type][place])
def add_session():
    while True:
        visa_type, place, replace = replace_items.get()
        # check if replaced
        if replace:
            session_list = g.value("session", {})
            if not visa_type in session_list:
                session_list[visa_type] = {}
            if not place in session_list[visa_type]:
                session_list[visa_type][place] = []
            if not replace in session_list[visa_type][place]:
                continue
            logger.info("Update session " + replace)
        try:
            cracker = g.value("cracker", None)
            username, passwd, sid = login(cracker, place)
            print(
                f'Login Info: Username: {username}, Password: {passwd}, Session ID: {sid}'
            )
            date = visa_select(visa_type, place, sid)
            if not date:
                print('date is None from `visa_select`')
                continue
            localtime = time.asctime(time.localtime(time.time()))
            print(
                f'[ {localtime} ] Earliest appointment for {visa_type} visa at {place}: {date}'
            )
            try:
                session_list = g.value("session", {})
                if not visa_type in session_list:
                    session_list[visa_type] = {}
                if not place in session_list[visa_type]:
                    session_list[visa_type][place] = []
                if replace:
                    idx = session_list[visa_type][place].index(replace)
                    session_list[visa_type][place][idx] = sid
                else:
                    session_list[visa_type][place].append(sid)
                session_file = g.value("session_file", "session.json")
                with open(session_file, "w") as f:
                    f.write(json.dumps(session_list, ensure_ascii=False))
            except:
                logger.error(traceback.format_exc())
        except:
            logger.error(traceback.format_exc())
示例#14
0
 def function_wrapper():
     set_fetching_interval(visa_type,
                           location,
                           sys,
                           interval_sec,
                           first_run=False)
     VisaFetcher.fetch_visa_status(
         visa_type, location,
         G.value(f'{visa_type}_requests_Session', requests.Session()))
示例#15
0
def add_session():
    while True:
        visa_type, place, replace = replace_items.get()
        # check if replaced
        if replace:
            session_list = g.value("session", {})
            if not visa_type in session_list:
                session_list[visa_type] = {}
            if not place in session_list[visa_type]:
                session_list[visa_type][place] = []
            if not replace in session_list[visa_type][place]:
                continue
            logger.info("Update session " + replace)
        try:
            endpoint = g.value(
                "crawler_node",
                "") + "/register/?type=%s&place=%s" % (visa_type, place)
            r = requests.get(endpoint,
                             timeout=40,
                             proxies=g.value("proxies", None))
            result = r.json()
            date = tuple(map(int, result["msg"].split("-")))
            sid = result["session"]
            if not date:
                continue
            try:
                session_list = g.value("session", {})
                if not visa_type in session_list:
                    session_list[visa_type] = {}
                if not place in session_list[visa_type]:
                    session_list[visa_type][place] = []
                if replace:
                    idx = session_list[visa_type][place].index(replace)
                    session_list[visa_type][place][idx] = sid
                else:
                    session_list[visa_type][place].append(sid)
                session_file = g.value("session_file", "session.json")
                with open(session_file, "w") as f:
                    f.write(json.dumps(session_list, ensure_ascii=False))
            except:
                logger.error(traceback.format_exc())
        except:
            logger.error(traceback.format_exc())
示例#16
0
 def init_cache(self):
     session_file = g.value("session_file", "session.json")
     session = {}
     if os.path.exists(session_file):
         with open(session_file, "r") as f:
             try:
                 session = json.load(f)
             except:
                 pass
     g.assign("session", session)
示例#17
0
def crawler_req_ais(visa_type, code, places, start_time, requests):
    try:
        # prepare session
        sess, scedule_id = session_op.get_session(visa_type, code)
        if not sess:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "No Session"))
            return
        refresh_endpoint = g.value(
            "crawler_node", "") + "/ais/refresh/?code=%s&id=%s&session=%s" % (
                code, scedule_id, sess)
        try:
            r = requests.get(refresh_endpoint,
                             timeout=7,
                             proxies=g.value("proxies", None))
        except:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "Endpoint Timeout"))
            check_crawler_node()
            return
        if r.status_code != 200:
            logger.warning(
                "%s, %s, %s, FAILED, %s" %
                (start_time, visa_type, code, "Endpoint Inaccessible"))
            check_crawler_node()
            return
        result = r.json()
        if result["code"] > 0:
            logger.warning("%s, %s, %s, FAILED, %s" %
                           (start_time, visa_type, code, "Session Expired"))
            session_op.replace_session(visa_type, code, sess)
            return
        date_list = result["msg"]
        new_sess = result["session"]
        session_op.replace_session_immediate(visa_type, code, sess, new_sess)
        for place, date in date_list:
            if place not in places:
                continue
            logger.info("%s, %s, %s, %s, SUCCESS, %s" %
                        (start_time, visa_type, code, place, date))
            g.assign("status_%s_%s" % (visa_type, place), date)
    except:
        logger.error(traceback.format_exc())
示例#18
0
    def replace_session_immediate(self, visa_type, place, sess, new_sess):
        ais = "-" in place
        session_list = g.value("session", {})
        if visa_type not in session_list:
            session_list[visa_type] = {}
        if place not in session_list[visa_type]:
            session_list[visa_type][place] = []
        if ais and sess not in [x[0] for x in session_list[visa_type][place]]:
            return
        if not ais and not sess in session_list[visa_type][place]:
            return

        if ais:
            idx = [x[0] for x in session_list[visa_type][place]].index(sess)
            session_list[visa_type][place][idx][0] = new_sess
        else:
            idx = session_list[visa_type][place].index(sess)
            session_list[visa_type][place][idx] = new_sess

        session_file = g.value("session_file", "session.json")
        with open(session_file, "w") as f:
            f.write(json.dumps(session_list, ensure_ascii=False))
示例#19
0
def set_session_pool_size(visa_type, place, size):
    session_list = g.value("session", {})
    if not visa_type in session_list:
        session_list[visa_type] = {}
    if not place in session_list[visa_type]:
        session_list[visa_type][place] = []
    cnt = len(session_list[visa_type][place])
    if cnt < size:
        for _ in range(size - cnt):
            rand_str = "".join([chr(np.random.randint(26) + ord('a')) for _ in range(15)])
            session_list[visa_type][place].append("placeholder_" + rand_str)
    elif cnt > size:
        session_list[visa_type][place] = session_list[visa_type][place][:size]
    def __init__(self) -> None:
        self.session = defaultdict(lambda: defaultdict(list))
        self.session_idx = defaultdict(lambda: defaultdict(int))
        now = datetime.now()
        self.session_avail = defaultdict(lambda: defaultdict(lambda: now))
        self.logger = logging.getLogger(G.GlobalVar.var_dct['log_name'])

        # read cached session pool (if any)
        sys = G.value('target_system', None)
        session_file = G.value('session_file', 'session.json')
        if sys is None:
            self.logger.error('Not target system given')
            raise ValueError('The target system is not set!')

        if os.path.exists(session_file):
            with open(session_file) as f:
                try:
                    old_session = json.load(f)
                    if not isinstance(old_session, dict):
                        raise TypeError()
                except json.decoder.JSONDecodeError:
                    self.logger.debug(
                        'session.json is empty or borken written')
                except TypeError:
                    self.logger.debug(
                        'session.json doesn\'t store a dictionary.')
                else:
                    for visa_type, loc_sess_lst in old_session.items():
                        for loc, sess_lst in loc_sess_lst.items():
                            self.session[visa_type][loc] = [
                                Session(**session) for session in sess_lst
                            ]
                            self.session_idx[visa_type][
                                loc] = 0  # set currently used index to 0
        self.session, self.session_idx = self.inititae_session_cache(
            sys, self.session, self.session_idx)
        self.save()
示例#21
0
def add_session():
    while True:
        visa_type, place, replace = replace_items.get()
        # check if replaced
        if replace:
            session_list = g.value("session", {})
            if not visa_type in session_list:
                session_list[visa_type] = {}
            if not place in session_list[visa_type]:
                session_list[visa_type][place] = []
            if not replace in session_list[visa_type][place]:
                continue
            logger.info("Update session " + replace)
        try:
            cracker = g.value("cracker", None)
            username, passwd, sid = login(cracker, place)
            date = visa_select(visa_type, place, sid)
            if not date:
                continue
            try:
                session_list = g.value("session", {})
                if not visa_type in session_list:
                    session_list[visa_type] = {}
                if not place in session_list[visa_type]:
                    session_list[visa_type][place] = []
                if replace:
                    idx = session_list[visa_type][place].index(replace)
                    session_list[visa_type][place][idx] = sid
                else:
                    session_list[visa_type][place].append(sid)
                session_file = g.value("session_file", "session.json")
                with open(session_file, "w") as f:
                    f.write(json.dumps(session_list, ensure_ascii=False))
            except:
                logger.error(traceback.format_exc())
        except:
            logger.error(traceback.format_exc())
    def save(self):
        """ Write the current session into disk."""
        session_file = G.value('session_file', 'session.json')
        with G.LOCK:

            session_json = defaultdict(lambda: defaultdict(list))
            for visa_type, loc_sess_dct in self.session.items():
                for loc, sess_lst in loc_sess_dct.items():
                    session_json[visa_type][loc] = [
                        session.to_json() for session in sess_lst
                    ]

            with open(session_file, 'w') as f:
                json.dump(dict(session_json), f, indent=4, ensure_ascii=False)

            self.logger.debug('Write session cache into disk: %s',
                              session_file)
示例#23
0
 def set_session_pool_size(self, visa_type, place, size, ais=False):
     session_list = g.value("session", {})
     if visa_type not in session_list:
         session_list[visa_type] = {}
     if place not in session_list[visa_type]:
         session_list[visa_type][place] = []
     cnt = len(session_list[visa_type][place])
     if cnt < size:
         for _ in range(size - cnt):
             rand_str = "".join(
                 [chr(np.random.randint(26) + ord('a')) for _ in range(15)])
             if ais:
                 session_list[visa_type][place].append(
                     ["placeholder_" + rand_str, "114514"])
             else:
                 session_list[visa_type][place].append("placeholder_" +
                                                       rand_str)
     elif cnt > size:
         session_list[visa_type][place] = session_list[visa_type][
             place][:size]
示例#24
0
def add_session():
    while True:
        visa_type, place, replace = replace_items.get()
        ais = "-" in place
        # check if replaced
        if replace:
            session_list = g.value("session", {})
            if visa_type not in session_list:
                session_list[visa_type] = {}
            if place not in session_list[visa_type]:
                session_list[visa_type][place] = []
            if ais and replace not in [
                    x[0] for x in session_list[visa_type][place]
            ]:
                continue
            if not ais and replace not in session_list[visa_type][place]:
                continue
            logger.info("Update session " + replace)
        try:
            if ais:
                endpoint = g.value(
                    "crawler_node",
                    "") + "/ais/register/?code=%s&email=%s&pswd=%s" % (
                        place, g.value("ais_email_" + visa_type, None),
                        g.value("ais_pswd_" + visa_type, None))
            else:
                endpoint = g.value("crawler_node", "") + \
                    "/register/?type=%s&place=%s" % (visa_type, place)
            r = requests.get(endpoint,
                             timeout=40,
                             proxies=g.value("proxies", None))
            result = r.json()
            if ais:
                schedule_id = result["id"]
                date = 1 if len(result["msg"]) > 0 else None
                sid = result["session"]
            else:
                date = tuple(map(int, result["msg"].split("-")))
                sid = result["session"]
            if not date:
                continue
            try:
                session_list = g.value("session", {})
                if visa_type not in session_list:
                    session_list[visa_type] = {}
                if place not in session_list[visa_type]:
                    session_list[visa_type][place] = []
                if replace:
                    if ais:
                        idx = [x[0] for x in session_list[visa_type][place]
                               ].index(replace)
                    else:
                        idx = session_list[visa_type][place].index(replace)
                    session_list[visa_type][place][idx] = ([sid, schedule_id]
                                                           if ais else sid)
                else:
                    session_list[visa_type][place].append(
                        ([sid, schedule_id] if ais else sid))
                session_file = g.value("session_file", "session.json")
                with open(session_file, "w") as f:
                    f.write(json.dumps(session_list, ensure_ascii=False))
            except:
                logger.error(traceback.format_exc())
        except:
            logger.error(traceback.format_exc())
def visa_select(visa_type, place, sid):
    proxies = g.value("proxies", None)
    cookies = copy.deepcopy(g.COOKIES)
    cookies["sid"] = sid

    # select immigrant/nonimmigrant visa
    select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype"
    r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 1: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa",
        # "j_id0:SiteTemplate:theForm:j_id176": "继续",
        "j_id0:SiteTemplate:theForm:j_id176": "Continue",
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_type_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 2: bad status code', r.status_code)
        return None

    # select place
    # if place != "香港" and place != "台北":
    select_post_uri = "https://cgifederal.secure.force.com/selectpost"
    r = requests.get(select_post_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 3: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(
        id="j_id0:SiteTemplate:j_id112:contactId").get("value")
    # NOTE: Place IDs are not unique; the first ID is simply the first one on the list
    place2id = {
        # "北京": "j_id0:SiteTemplate:j_id112:j_id165:0",
        # "成都": "j_id0:SiteTemplate:j_id112:j_id165:1",
        # "广州": "j_id0:SiteTemplate:j_id112:j_id165:2",
        # "上海": "j_id0:SiteTemplate:j_id112:j_id165:3",
        # "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4"
        "Melbourne": "j_id0:SiteTemplate:j_id112:j_id165:0",
        "Perth": "j_id0:SiteTemplate:j_id112:j_id165:1",
        "Sydney": "j_id0:SiteTemplate:j_id112:j_id165:2",
    }
    place_code = soup.find(id=place2id[place]).get("value")
    data = {
        "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112",
        "j_id0:SiteTemplate:j_id112:j_id165": place_code,
        # "j_id0:SiteTemplate:j_id112:j_id169": "继续",
        "j_id0:SiteTemplate:j_id112:j_id169": "Continue",
        "j_id0:SiteTemplate:j_id112:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_post_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 4: bad status code', r.status_code)
        return None

    # select visa category
    select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory"
    r = requests.get(select_visa_category_uri,
                     cookies=cookies,
                     proxies=proxies)
    if r.status_code != 200:
        print('visa_select 5: bad status code', r.status_code)
        return None

    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(
        id="j_id0:SiteTemplate:j_id109:contactId").get("value")
    prefix = "j_id0:SiteTemplate:j_id109:j_id162:"
    category2id = {
        # "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 1, "台北": 1},
        # "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 0, "台北": 0},
        # "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3, "台北": 3},
        # "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3, "台北": 3},
        # "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3, "台北": 3}
        "E": {
            "Melbourne": 5,
            "Perth": 0,
            "Sydney": 3
        },
        "F": {
            "Melbourne": 1,
            "Perth": 0,
            "Sydney": 0
        },
    }
    category_code = soup.find(id=prefix +
                              str(category2id[visa_type][place])).get("value")
    data = {
        "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109",
        "j_id0:SiteTemplate:j_id109:j_id162": category_code,
        # "j_id0:SiteTemplate:j_id109:j_id166": "继续",
        "j_id0:SiteTemplate:j_id109:j_id166": "Continue",
        "j_id0:SiteTemplate:j_id109:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_category_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 6: bad status code', r.status_code)
        return None

    # select visa type
    select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 7: bad status code', r.status_code)
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(
        id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(
        id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(
        id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(
        id="com.salesforce.visualforce.ViewStateCSRF").get("value")

    # Indices of the list of 'selectedVisaClass' values
    if place == 'Sydney':
        F_typecode = 0
        E_typecode = -2
    elif place == 'Perth':
        F_typecode = 3
        E_typecode = -2
    elif place == 'Melbourne':
        E_typecode = 0
        F_typecode = 0
    else:
        print('visa_select 8: unsupported place', place)
        return None

    type2id = {"F": F_typecode, "E": E_typecode}
    inputs = soup.find_all("input")
    type_codes = [
        x.get("value") for x in inputs if x.get("name") == "selectedVisaClass"
    ]
    type_code = type_codes[type2id[visa_type]]
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        # "j_id0:SiteTemplate:theForm:j_id178": "继续",
        "j_id0:SiteTemplate:theForm:j_id178": "Continue",
        "selectedVisaClass": type_code,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_code_uri,
                      data=data,
                      cookies=cookies,
                      proxies=proxies)
    if r.status_code != 200:
        print('visa_select 9: bad status code', r.status_code)
        return None

    # update data
    update_data_uri = "https://cgifederal.secure.force.com/updatedata"
    r = requests.get(update_data_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        print('visa_select 10: bad status code', r.status_code)
        return None
    date = get_date(r.text)
    logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date))
    if date:
        g.assign("status_%s_%s" % (visa_type, place), date)
    return date
示例#26
0
def visa_select(visa_type, place, sid):
    proxies = g.value("proxies", None)
    cookies = copy.deepcopy(g.COOKIES)
    cookies["sid"] = sid

    # select immigrant/nonimmigrant visa
    select_visa_type_uri = "https://cgifederal.secure.force.com/selectvisatype"
    r = requests.get(select_visa_type_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:ttip": "Nonimmigrant Visa",
        "j_id0:SiteTemplate:theForm:j_id176": "继续",
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_type_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # select place
    if place != "香港":
        select_post_uri = "https://cgifederal.secure.force.com/selectpost"
        r = requests.get(select_post_uri, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None
        soup = bs(r.text, "html.parser")
        view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
        view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
        view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
        view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
        contact_id = soup.find(id="j_id0:SiteTemplate:j_id112:contactId").get("value")
        place2id = {
            "北京": "j_id0:SiteTemplate:j_id112:j_id165:0", 
            "成都": "j_id0:SiteTemplate:j_id112:j_id165:1", 
            "广州": "j_id0:SiteTemplate:j_id112:j_id165:2", 
            "上海": "j_id0:SiteTemplate:j_id112:j_id165:3", 
            "沈阳": "j_id0:SiteTemplate:j_id112:j_id165:4"
        }
        place_code = soup.find(id=place2id[place]).get("value")
        data = {
            "j_id0:SiteTemplate:j_id112": "j_id0:SiteTemplate:j_id112",
            "j_id0:SiteTemplate:j_id112:j_id165": place_code,
            "j_id0:SiteTemplate:j_id112:j_id169": "继续",
            "j_id0:SiteTemplate:j_id112:contactId": contact_id,
            "com.salesforce.visualforce.ViewState": view_state,
            "com.salesforce.visualforce.ViewStateVersion": view_state_version,
            "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
            "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
        }
        r = requests.post(select_post_uri, data=data, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None

    # select visa category
    select_visa_category_uri = "https://cgifederal.secure.force.com/selectvisacategory"
    r = requests.get(select_visa_category_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    contact_id = soup.find(id="j_id0:SiteTemplate:j_id109:contactId").get("value")
    prefix = "j_id0:SiteTemplate:j_id109:j_id162:"
    category2id = {
        "B": {"北京": 0, "成都": 0, "广州": 0, "上海": 0, "沈阳": 0, "香港": 0}, 
        "F": {"北京": 1, "成都": 1, "广州": 1, "上海": 1, "沈阳": 1, "香港": 1}, 
        "O": {"北京": 4, "成都": 2, "广州": 3, "上海": 4, "沈阳": 2, "香港": 3}, 
        "H": {"北京": 2, "广州": 3, "上海": 2, "香港": 3}, 
        "L": {"北京": 3, "广州": 2, "上海": 3, "香港": 3} 
    }
    category_code = soup.find(id=prefix + str(category2id[visa_type][place])).get("value")
    data = {
        "j_id0:SiteTemplate:j_id109": "j_id0:SiteTemplate:j_id109",
        "j_id0:SiteTemplate:j_id109:j_id162": category_code,
        "j_id0:SiteTemplate:j_id109:j_id166": "继续",
        "j_id0:SiteTemplate:j_id109:contactId": contact_id,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_category_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # select visa type
    select_visa_code_uri = "https://cgifederal.secure.force.com/selectvisacode"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    soup = bs(r.text, "html.parser")
    view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
    view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
    view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
    view_state_csrf = soup.find(id="com.salesforce.visualforce.ViewStateCSRF").get("value")
    type2id = {
        "F": 0, 
        "B": 2, 
        "H": 0, 
        "O": 11 if place == "香港" else (7 if place == "广州" else 0), 
        "L": 8 if place == "香港" else 2
    }
    inputs = soup.find_all("input")
    type_codes = [x.get("value") for x in inputs if x.get("name") == "selectedVisaClass"]
    type_code = type_codes[type2id[visa_type]]
    data = {
        "j_id0:SiteTemplate:theForm": "j_id0:SiteTemplate:theForm",
        "j_id0:SiteTemplate:theForm:j_id178": "继续",
        "selectedVisaClass": type_code,
        "com.salesforce.visualforce.ViewState": view_state,
        "com.salesforce.visualforce.ViewStateVersion": view_state_version,
        "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
        "com.salesforce.visualforce.ViewStateCSRF": view_state_csrf
    }
    r = requests.post(select_visa_code_uri, data=data, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None

    # update data
    update_data_uri = "https://cgifederal.secure.force.com/updatedata"
    r = requests.get(select_visa_code_uri, cookies=cookies, proxies=proxies)
    if r.status_code != 200:
        return None
    date = get_date(r.text)
    logger.info("%s, %s, SUCCESS_N, %s" % (visa_type, place, date))
    if date:
        g.assign("status_%s_%s" % (visa_type, place), date)
    return date
示例#27
0
def login(cracker, place):
    proxies = g.value("proxies", None)

    # get register page
    REG_URI = "https://cgifederal.secure.force.com/SiteRegister?country=China&language=zh_CN"
    REG_HK_URI = "https://cgifederal.secure.force.com/SiteRegister?country=Hong%20Kong&language=zh_CN"
    r = requests.get(REG_HK_URI if place == "香港" else REG_URI, proxies=proxies)
    if r.status_code != 200:
        return None

    # In case of failure
    while True:
        soup = bs(r.text, "html.parser")
        view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
        view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
        view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
        cookies = r.cookies

        # get recaptcha
        REG_CAPTCHA_URI = "https://cgifederal.secure.force.com/SiteRegister?refURL=https%3A%2F%2Fcgifederal.secure.force.com%2F%3Flanguage%3DChinese%2520%28Simplified%29%26country%3DChina"
        REG_CAPTCHA_HK_URI = "https://cgifederal.secure.force.com/SiteRegister?refURL=https%3A%2F%2Fcgifederal.secure.force.com%2F%3Flanguage%3DChinese%2520%28Simplified%29%26country%3DHong%20Kong"
        data = {
            "AJAXREQUEST": "_viewRoot",
            "Registration:SiteTemplate:theForm": "Registration:SiteTemplate:theForm",
            "Registration:SiteTemplate:theForm:username": "",
            "Registration:SiteTemplate:theForm:firstname": "",
            "Registration:SiteTemplate:theForm:lastname": "",
            "Registration:SiteTemplate:theForm:password": "",
            "Registration:SiteTemplate:theForm:confirmPassword": "",
            "Registration:SiteTemplate:theForm:response": "",
            "Registration:SiteTemplate:theForm:recaptcha_response_field": "",
            "com.salesforce.visualforce.ViewState": view_state,
            "com.salesforce.visualforce.ViewStateVersion": view_state_version,
            "com.salesforce.visualforce.ViewStateMAC": view_state_mac,
            "Registration:SiteTemplate:theForm:j_id177": "Registration:SiteTemplate:theForm:j_id177"
        }
        r = requests.post(REG_CAPTCHA_HK_URI if place == "香港" else REG_CAPTCHA_URI, data=data, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None

        soup = bs(r.text, "html.parser")
        view_state = soup.find(id="com.salesforce.visualforce.ViewState").get("value")
        view_state_version = soup.find(id="com.salesforce.visualforce.ViewStateVersion").get("value")
        view_state_mac = soup.find(id="com.salesforce.visualforce.ViewStateMAC").get("value")
        cookies = r.cookies

        raw = soup.find_all(id='Registration:SiteTemplate:theForm:theId')
        raw = raw[0].attrs['src'].replace('data:image;base64,', '')
        img = base64.b64decode(raw)
        gifname = 'try.gif'
        open(gifname, 'wb').write(img)
        open('gifname', 'w').write(gifname)
        captcha = cracker.solve(img).replace('1', 'l').lower()
        if len(captcha) == 0:
            open('state', 'w').write(
                '自动识别服务挂掉了,请到<a href="https://github.com/Trinkle23897/'
                'us-visa">GitHub</a>上提issue')
            return None

        # click and register
        username = ''.join([chr(np.random.randint(26) + ord('a')) for _ in range(15)]) + "@gmail.com"
        passwd = ''.join(np.random.permutation(' '.join('12345qwert').split()))
        data = {
            "Registration:SiteTemplate:theForm": "Registration:SiteTemplate:theForm",
            "Registration:SiteTemplate:theForm:username": username,
            "Registration:SiteTemplate:theForm:firstname": "Langpu",
            "Registration:SiteTemplate:theForm:lastname": "Te",
            "Registration:SiteTemplate:theForm:password": passwd,
            "Registration:SiteTemplate:theForm:confirmPassword": passwd,
            "Registration:SiteTemplate:theForm:j_id169": "on",
            "Registration:SiteTemplate:theForm:response": captcha,
            "Registration:SiteTemplate:theForm:recaptcha_response_field": "",
            "Registration:SiteTemplate:theForm:submit": "提交",
            "com.salesforce.visualforce.ViewState": view_state,
            "com.salesforce.visualforce.ViewStateVersion": view_state_version,
            "com.salesforce.visualforce.ViewStateMAC": view_state_mac
        }
        r = requests.post(REG_CAPTCHA_HK_URI if place == "香港" else REG_CAPTCHA_URI, data=data, cookies=cookies, proxies=proxies)
        if r.status_code != 200:
            return None
        front_door_uri = r.text.split("'")[-2]
        if front_door_uri.startswith("https"):
            break
        else:
            if '无法核实验证码' not in r.text:
                os.system('mv %s log/%s.gif' % (gifname, captcha))
            else:
                if not os.path.exists('fail'):
                    os.makedirs('fail')
                os.system('mv %s fail/%s.gif' % (gifname, captcha))
                if hasattr(cracker, 'wrong'):
                    cracker.wrong()

    # open front door
    r = requests.get(front_door_uri, cookies=cookies, proxies=proxies)
    cookies = r.cookies
    return username, passwd, cookies["sid"]
示例#28
0
def crawler(visa_type, places):
    open(visa_type + '_state', 'w').write('1')
    localtime = time.localtime()
    s = {'time': time.strftime('%Y/%m/%d %H:%M', localtime)}
    second = localtime.tm_sec
    cur = time.strftime('%Y/%m/%d', time.localtime())
    for place in places:
        try:
            # prepare session
            sess = session_op.get_session(visa_type, place)
            if not sess:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "No Session"))
                continue
            cookies = copy.deepcopy(g.COOKIES)
            cookies["sid"] = sess
            # send request
            r = requests.get(g.HOME_URI,
                             headers=g.HEADERS,
                             cookies=cookies,
                             proxies=g.value("proxies", None))
            if r.status_code != 200:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
                continue
            # parse HTML
            page = r.text
            date = get_date(page)
            if not date:
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Session Expired"))
                session_op.replace_session(visa_type, place, sess)
                continue
            elif date == (0, 0, 0):
                logger.warning("%s, %s, FAILED, %s" %
                               (visa_type, place, "Date Not Found"))
                last_status = g.value("status_%s_%s" % (visa_type, place),
                                      (0, 0, 0))
                if last_status != (0, 0, 0):
                    session_op.replace_session(visa_type, place, sess)
                elif random.random() < 0.05:
                    session_op.replace_session(visa_type, place, sess)
                continue
            logger.info("%s, %s, SUCCESS, %s" % (visa_type, place, date))
            g.assign("status_%s_%s" % (visa_type, place), date)
        except:
            logger.error(traceback.format_exc())

    # write to file
    for place in places:
        n = place + '-' + cur
        n2 = place + '2-' + cur
        y, m, d = g.value("status_%s_%s" % (visa_type, place), (0, 0, 0))
        s[n] = s[n2] = '{}/{}/{}'.format(y, m, d) if y > 0 else "/"
        if s[n] != '/':
            path = visa_type + '/' + n.replace('-', '/')
            os.makedirs('/'.join(path.split('/')[:-1]), exist_ok=True)
            open(path,
                 'a+').write(s['time'].split(' ')[-1] + ' ' + s[n] + '\n')
    merge(
        '../visa/visa.json' if visa_type == "F" else '../visa/visa-%s.json' %
        visa_type.lower(), s, cur)
    open(visa_type + '_state', 'w').write('0')
    os.system('python3 notify.py --type ' + visa_type + ' &')
示例#29
0
    def fetch_visa_status(cls, visa_type: str, location: str,
                          req: requests.Session):
        """ Fetch the latest visa status available from crawler server."""
        now = datetime.now().strftime('%H:%M:%S')
        try:
            session = SESSION_CACHE.get_session(visa_type, location)
            if session is None:
                LOGGER.warning('%s, %s, %s, FAILED - No Session', now,
                               visa_type, location)
                return

            if session.sys == 'ais':
                endpoint = G.CRAWLER_API['refresh']['ais'].format(
                    location, session.schedule_id, session.session)
            elif session.sys == 'cgi':
                endpoint = G.CRAWLER_API['refresh']['cgi'].format(
                    session.session)

            url = '{}{}'.format(G.value('current_crawler_node', ''), endpoint)
            try:
                res = req.get(url,
                              timeout=G.WAIT_TIME['refresh'],
                              proxies=G.value('proxies', None))
            except requests.exceptions.Timeout:
                LOGGER.warning('%s, %s, %s, FAILED - Endpoint Timeout.', now,
                               visa_type, location)
                cls.save_placeholder_at_exception(visa_type, location)
                cls.check_crawler_server_connection()
                return
            except requests.exceptions.ConnectionError:
                LOGGER.warning(
                    '%s, %s, %s, FAILED - Endpoint Connection Aborted.', now,
                    visa_type, location)
                cls.check_crawler_server_connection()
                return
            else:
                if res.status_code != 200:
                    LOGGER.warning('%s, %s, %s, FAILED - %d', now, visa_type,
                                   location, res.status_code)
                    cls.check_crawler_server_connection()
                    return

                result = res.json()
                LOGGER.debug(
                    'fetch_visa_status - Endpoint: %s | Response json: %s',
                    endpoint, json.dumps(result))

                if result[
                        'code'] != 0:  # code == 0 stands for success in crawler api code
                    LOGGER.warning('%s, %s, %s, FAILED - Session Expired', now,
                                   visa_type, location)

                    # session expired will trigger database update using the last successful fetch result
                    cls.save_placeholder_at_exception(visa_type, location)

                    SESSION_CACHE.produce_new_session_request(
                        visa_type, location, session)
                    return

                if session.sys == 'cgi':
                    dt_segments = [
                        int(dt_seg) for dt_seg in result['msg'].split('-')
                    ]
                    cls.save_fetched_data(visa_type, location, dt_segments)
                    LOGGER.info('%s, %s, %s, SUCCESS - %d/%d/%d', now,
                                visa_type, location, *dt_segments)

                elif session.sys == 'ais':
                    date_lst = result['msg']
                    for city, dt_segments in date_lst:
                        if city in G.AIS_MONITORING_CITY:
                            cls.save_fetched_data(visa_type, city, dt_segments)
                            LOGGER.info('%s, %s, %s, %s, SUCCESS - %d/%d/%d',
                                        now, visa_type, location, city,
                                        *dt_segments)

                    new_session = Session(session=(result['session'],
                                                   session.schedule_id),
                                          sys=session.sys)
                    SESSION_CACHE.replace_session(visa_type, location, session,
                                                  new_session)

        except Exception:
            LOGGER.error(traceback.format_exc())
示例#30
0
    def consume_new_session_request(cls,
                                    task_queue: Queue = G.SESSION_UPDATE_QUEUE
                                    ):
        """ Consume the session update event in the task queue to request new session
            from crawler server.
        """
        LOGGER.info('Listening to session update request task queue...')
        while True:
            visa_type, location, session = task_queue.get()
            LOGGER.debug(
                'Receive new session update request: %s-%s | Current queue size: %d',
                visa_type, location, task_queue.qsize())

            if session is None:
                LOGGER.error('A session object from %s-%s is NoneType',
                             visa_type, location)  # just in case

            if not SESSION_CACHE.contain_session(visa_type, location, session):
                LOGGER.debug(
                    'Session %s is no longer in the %s-%s session list.',
                    session, visa_type, location)
                continue

            try:
                if session.sys == 'ais':
                    email = G.value(f'ais_email_{visa_type}', None)
                    password = G.value(f'ais_pswd_{visa_type}', None)

                    LOGGER.debug('Fetching new session for AIS: %s, %s, %s',
                                 location, email, password)
                    endpoint = G.CRAWLER_API['register']['ais'].format(
                        location, email, password)
                    if email is None or password is None:
                        continue
                elif session.sys == 'cgi':
                    endpoint = G.CRAWLER_API['register']['cgi'].format(
                        visa_type, location)

                url = '{}{}'.format(G.value('current_crawler_node', ''),
                                    endpoint)
                res = requests.get(url,
                                   timeout=G.WAIT_TIME['register'],
                                   proxies=G.value('proxies', None))
                try:
                    result = res.json()
                except ValueError:
                    content = res.content.decode()
                    if 'Server Error (500)' in content:
                        SESSION_CACHE.mark_unavailable(visa_type, location)
                    else:
                        print(time.asctime(), visa_type, location, content)
                    continue
                LOGGER.debug(
                    'consume_new_session_request - Endpoint: %s | Response json: %s',
                    endpoint, json.dumps(result))

                if result['code'] != 0:
                    LOGGER.warning('%s, %s, %s, FAILED - %s',
                                   datetime.now().strftime('%H:%M:%S'),
                                   visa_type, location, result['msg'])
                    if result['msg'] == "Network Error":
                        SESSION_CACHE.mark_unavailable(visa_type, location)
                    else:
                        cls.check_crawler_server_connection()
                    continue

                # Generate new session object and update cache
                if session.sys == 'ais':
                    new_session = Session((result['session'], result['id']),
                                          sys='ais')
                    date_available = bool(len(result['msg']))
                elif session.sys == 'cgi':
                    new_session = Session(result['session'], sys='cgi')
                    date_available = bool(
                        tuple([dt_seg for dt_seg in result['msg'].split('-')
                               ]))  # Always True

                if date_available:  # why this flag is needed?
                    LOGGER.info(
                        'consume_new_session_request - %s, %s, %s, SUCCESS - %s',
                        datetime.now().strftime('%H:%M:%S'), visa_type,
                        location, result['msg'])
                    SESSION_CACHE.replace_session(visa_type, location, session,
                                                  new_session)
            except requests.exceptions.ReadTimeout:
                LOGGER.debug(
                    'consume_new_session_request - request time out for endpoint: %s | %s-%s',
                    endpoint, visa_type, location)
                cls.check_crawler_server_connection()
            except Exception:
                LOGGER.error('an unexpected error occured',
                             traceback.format_exc())