예제 #1
0
 def validate(self, apartment):
     logger.info('[{}] [Validator] START'.format(apartment.get('city')))
     try:
         self.examine_single_apartment(apartment)
     except Exception as e:
         logger.error('[{}] [Validator] [validate] err'.format(apartment.get('city')))
         logger.exception(e)
예제 #2
0
파일: devops.py 프로젝트: xx1820017/lixiang
def task_clean(taskid):
    '''清理任务数据'''
    try:
        if not taskid: return False

        #删除任务
        db.exec('delete from task where id=:id', {'id': taskid})

        #根据任务ID清理任务执行
        db.exec("delete from task_execute where task_id=:id", {'id': taskid})

        #使用in操作,仅为测试,性能较低
        taskIds = [taskid]
        sqlIn = "','".join(['spider_execute_%s'] * len([taskIds]))
        db.exec(
            "delete from djcelery_periodictask where name in ('%s')" % sqlIn,
            taskIds)

        #根据任务ID清理抓取到的URL
        db.exec("delete from spider_url where task_id=:id", {'id': taskid})

        #根据任务ID清理数据处理结果
        db.exec("delete from task_piping where task_id=:id", {'id': taskid})
        db.exec("delete from task_piping_result where task_id=:id",
                {'id': taskid})
        #db.exec("delete from piping_result where task_id=:id", {'id': taskid})

        #根据任务ID删除mongodb数据
        mgdb.remove_taskid(taskid)

        #根据任务ID清理文件及快照等   @todo
    except Exception as e:
        logger.exception(e)
        return False
예제 #3
0
 def start_one_url(self, task):
     try:
         logger.info('[{}] [DetailCrawler] Start crawl new link'.format(
             task.get('city')))
         self.get(task.get('url'))
         logger.info('[{}] [DetailCrawler] Url opened'.format(
             task.get('city')))
         info = get_info_of_single_url(self.driver, task.get('url'))
         logger.info('[{}] [DetailCrawler] Data get'.format(
             task.get('city')))
         mongo.insert_into_staing(task, info)
     except ApartmentExpiredException:
         logger.info('[{}] [DetailCrawler] Url expired'.format(
             task.get('city')))
         mongo.task_expired(task)
     except NoSuchElementException:
         # probably proxy blocked
         logger.info('[{}] [DetailCrawler] Elm not found'.format(
             task.get('city')))
         self.renew_driver()
     except (TimeoutException, WebDriverException,
             InvalidSessionIdException):
         logger.info('[{}] [DetailCrawler] Session timeout'.format(
             task.get('city')))
         self.renew_driver()
     except (TooManyTimesException):
         pass
     except Exception as e:
         logger.exception(e)
         mongo.update_failure_task(task, e, self.driver.page_source)
     finally:
         self.quit()
예제 #4
0
    async def on_message(self, message: discord.message):
        if not message.author.bot and message.guild and \
                message.guild.id != RELAY_ID:
            # moderation system
            if text_filter(message.content, message.author, message.guild) or \
                    content_filter(message):
                await message.delete()

            else:
                message_chain: list = self.bucket[
                    message.guild.id][message.author.id]
                message_chain.append(message)

                joined = " ".join([m.content for m in message_chain])

                if text_filter(joined, message.author, message.guild):
                    for m in message_chain:
                        try:
                            await m.delete()
                        except Exception:
                            logger.exception(
                                f"Cannot delete message {m.content}")
                    message_chain.clear()

                elif len(message_chain) > int(
                        cfg["Performance"]["moderation-buffer-limit"]):
                    message_chain.pop(0)
예제 #5
0
def load_post_list_file(archive_file_path: str):
    """
    Saves all posts and their comments into the database from the list of post URLs in the specified file.
    """

    archive_file = open(archive_file_path, "r")
    post_list = [s.strip() for s in archive_file.readlines()]

    for post in post_list:
        try:
            global reddit, subreddit
            reddit = praw.Reddit(**config_loader.REDDIT["auth"])
            subreddit = reddit.subreddit(config_loader.REDDIT["subreddit"])
            reddit_submission = reddit.submission(url=post)
            if reddit_submission.subreddit_name_prefixed != subreddit.display_name_prefixed:
                logger.info(
                    f"Post {post} is not on {subreddit.display_name_prefixed}, skipping..."
                )
                continue

            save_post_and_comments(reddit_submission)
        except Exception:
            logger.exception(
                f"Unable to save {post}, continuing in 30 seconds...")
            time.sleep(30)
예제 #6
0
def task_start(taskid):
    try:
        task = db.fetchone('select * from task where id=:id', {'id': taskid})
        if not task: return False
        startUrls = json.loads(task['start_urls'])
        executedata = {
            'site_id': task['site_id'],
            'task_id': task['id'],
            'app_id': task['app_id'],
            'task_type': task['type'],
            'start_urls': task['start_urls'],
            'domain': getDomainNoPort(startUrls[0]),
            'exec_level': task['exec_level'],
            'limit_depth': task['limit_depth'],
            'limit_total': task['limit_total'],
            'limit_time': task['limit_time'],
            'limit_subdomain': task['limit_subdomain'],
            'limit_image': task['limit_image'],
            'limit_js': task['limit_js'],
            'limit_jsevent': task['limit_jsevent'],
            'exclude_urls': task['exclude_urls'],
            'url_unique_mode': task['url_unique_mode'],
            'notify_url': task['notify_url'],
            'source_ip': task['source_ip'],
            'proxies': task['proxies'],
            'status': 0,
        }
        executeid = db.insert('task_execute', executedata)
        return executeid
    except Exception as e:
        logger.exception(e)
        return False
예제 #7
0
 def doInit(self, model_filename=Config.Model.MARS_DIR, batch_size=1):
     try:
         self.encoder = pedestrian_extractor.create_box_encoder(
             model_filename, batch_size=batch_size)
     except:
         logger.exception("CUDA out off memory", exc_info=True)
     print(self.name, '=' * 10)
예제 #8
0
파일: discord.py 프로젝트: r-anime/modbot
def send_webhook_message(channel_webhook_url, json_content, retries=3):
    """
    Send a message to the specified channel via a webhook.

    :param channel_webhook_url: full URL for the receiving webhook
    :param json_content: dictionary containing data to send (usually "content" or "embed" keys)
    :param retries: number of times to attempt to send message again if it fails
    :return: True if message was successfully sent, False otherwise
    """

    if not config_loader.DISCORD["enabled"]:
        return True

    attempt = 0
    while attempt <= retries:
        try:
            response = requests.post(channel_webhook_url, json=json_content)

            if response.status_code in (200, 204):
                return True

            logger.warning(
                f"Webhook response {response.status_code}: {response.text}")
        except Exception:
            logger.exception(
                "Unexpected error while attempting to send webhook message.")

        time.sleep(5)
        attempt += 1

    logger.error(f"Unable to send webhook message, content: {json_content}")
    return False
예제 #9
0
def scrape_image(image_url: str, slug: str) -> Path:
    if isinstance(image_url, str):  # Handles String Types
        image_url = image_url

    if isinstance(image_url, list):  # Handles List Types
        image_url = image_url[0]

    if isinstance(image_url, dict):  # Handles Dictionary Types
        for key in image_url:
            if key == "url":
                image_url = image_url.get("url")

    filename = slug + "." + image_url.split(".")[-1]
    filename = IMG_DIR.joinpath(filename)

    try:
        r = requests.get(image_url, stream=True)
    except:
        logger.exception("Fatal Image Request Exception")
        return None

    if r.status_code == 200:
        r.raw.decode_content = True

        with open(filename, "wb") as f:
            shutil.copyfileobj(r.raw, f)

        return filename

    return None
예제 #10
0
async def on_error(event, *args, **kwargs):
    try:
        raise event
    except discord.HTTPException:
        os.system("kill 1")  # hard restart on 429
    except Exception:
        logger.exception(event)
예제 #11
0
 def doInit(self):
     face_graph = tf_graph.FaceGraph()
     try:
         self.extractor = face_extractor.FacenetExtractor(
             face_graph, model_path=Config.Model.COEFF_DIR)
     except:
         logger.exception("CUDA out off memory", exc_info=True)
     self.preprocessor = preprocess.Preprocessor()
     print(self.name, '=' * 10)
예제 #12
0
def migrate_flairs():
    """
    Fetch all users with flairs in the subreddit and convert them to the emoji format.
    """

    global reddit, subreddit

    users_to_update = []
    total_users = 0
    users_with_flair = 0

    while True:
        try:
            logger.info("Connecting to Reddit...")
            reddit = praw.Reddit(**config_loader.REDDIT["auth"])
            subreddit = reddit.subreddit(config_loader.REDDIT["subreddit"])
            logger.info("Loading flairs...")
            # Generator will load in batches of 1000 from Reddit, this covers the entire sub.
            for user_flair in subreddit.flair(limit=None):
                new_flair_text, template_id = _parse_flair(user_flair)
                if user_flair["flair_css_class"] is not None and user_flair[
                        "flair_css_class"].startswith("v-"):
                    verified_users.append(user_flair["user"].name)

                if new_flair_text:
                    users_with_flair += 1

                users_to_update.append({
                    "user": user_flair["user"],
                    "flair_text": new_flair_text,
                    "flair_template_id": template_id
                })
                # Update users in a batch
                if len(users_to_update) >= 100:
                    # subreddit.flair.update(flair_list=users_to_update)
                    total_users += len(users_to_update)
                    logger.info(
                        f"Updated {len(users_to_update)} users, {total_users} total"
                    )
                    users_to_update = []
            if users_to_update:
                # subreddit.flair.update(flair_list=users_to_update)
                total_users += len(users_to_update)
                logger.info(
                    f"Updated {len(users_to_update)} users, {total_users} total"
                )
            logger.info(
                f"{users_with_flair} / {total_users} users successfully migrated!"
            )
            break
        except Exception:
            delay_time = 30
            logger.exception(
                f"Encountered an unexpected error, restarting in {delay_time} seconds..."
            )
            time.sleep(delay_time)
예제 #13
0
    def doInit(self):
        face_graph = tf_graph.FaceGraph()
        try:
            self.extractor = face_extractor.ArcFaceExtractor(
                model_path=Config.Model.ARCFACE_DIR)

        except:
            logger.exception("CUDA out off memory", exc_info=True)

        print(self.name, '=' * 10)
예제 #14
0
 def doInit(self):
     face_graph = tf_graph.FaceGraph()
     try:
         self.face_detector = face_detector.MTCNNDetector(
             face_graph, scale_factor=Config.MTCNN.SCALE_FACTOR)
     except:
         logger.exception("CUDA device out of memory")
     super(FaceDetectWorker, self).__init__()
     self.face_count = 0
     self.detected_frame_count = 0
     print(self.name, '=' * 10)
예제 #15
0
 def connect(self):
     """连接指定IP、端口"""
     if not self.connected:
         try:
             self._sock.connect((self.domain, self.port))
         except socket.error as e:
             logger.exception(e)
         else:
             self.connected = 1
             logger.debug('TCPClient connect to {0}:{1} success.'.format(
                 self.domain, self.port))
예제 #16
0
 def doInit(self, use_coeff_filter=True):
     try:
         self.embs_extractor = face_extractor.ArcFaceExtractor(
             model_path=Config.Model.ARCFACE_DIR)
         self.use_coeff_filter = use_coeff_filter
         if use_coeff_filter:
             coeff_graph = tf_graph.FaceGraph()
             self.coeff_extractor = face_extractor.FacenetExtractor(
                 coeff_graph, model_path=Config.Model.COEFF_DIR)
     except:
         logger.exception("CUDA out off memory", exc_info=True)
     print(self.name, '=' * 10)
예제 #17
0
def encrypt(string, salt='', encrypt_way='MD5'):
    u"""根据输入的string与加密盐,按照encrypt方式进行加密,并返回加密后的字符串"""
    string += salt
    if encrypt_way.upper() == 'MD5':
        hash_string = hashlib.md5()
    elif encrypt_way.upper() == 'SHA1':
        hash_string = hashlib.sha1()
    else:
        logger.exception(EncryptError('请输入正确的加密方式,目前仅支持 MD5 或 SHA1'))
        return False

    hash_string.update(string.encode())
    return hash_string.hexdigest()
예제 #18
0
 def doInit(self):
     try:
         self.pedestrian_detector = pedestrian_detector.YOLODetector()
     except:
         logger.exception("CUDA device out of memory")
     super(PedestrianDetectWorker, self).__init__()
     roi_cordinate = Config.ROI.ROI_CORDINATE[Config.ROI.USE]
     self.roi_cordinate_np = self.ConvertCordinates(roi_cordinate)
     self.rect = cv2.boundingRect(self.roi_cordinate_np)
     self.roi_cordinate_np_scale = self.roi_cordinate_np - np.array([self.rect[0],self.rect[1]])
     self.centroids = (int(self.rect[2]/2), int(self.rect[3]/2))
     self.pedestrian_count = 0
     self.detected_frame_count = 0
     self.background_subtraction = background_subtraction.BGSProcess()
     print(self.name, '=' * 10)
예제 #19
0
 def examine_single_apartment(self, apartment):
     try:
         examine_apartment(apartment)
         inserted_id = mongo.on_pass_validation(apartment)
         self.notify(inserted_id)
         logger.info('[{}] [Validator] pass validation'.format(apartment.get('city',None)))
     except ValidatorInvalidValueException as e1:
         logger.info('[{} ]Found invalid value'.format(apartment.get('city')))
         invalid_values = e1.args[1]
         mongo.report_invalid_value(apartment, invalid_values)
     except Exception as e:
         logger.error('[{}] [Validator] [examine_single_apartment] err'.format(apartment.get('city')))
         logger.exception(e)
         mongo.report_unexpected_error('data_validator', e, apartment.get(
             'house_url') if apartment else None)
예제 #20
0
파일: file.py 프로젝트: xx1820017/lixiang
def ydfs_upload(key, localfile):
    try:
        cmd = '%s/dfs_client -action upload -config %s/client.json --filekey %s --file=%s' % (
            PATH_GO, PATH_GO, key, localfile)
        child = Popen(cmd,
                      shell=True,
                      close_fds=True,
                      bufsize=-1,
                      stdout=PIPE,
                      stderr=STDOUT)
        output = child.stdout.read().decode()
        #remove(filename)
        return output
    except Exception as e:
        logger.exception(e)
        return False
예제 #21
0
def execute_register(patient: dict) -> None:
    try:
        driver = set_chrome_driver()
        driver.get(SEARCH_URL)
        search_bar = driver.find_element(
            By.CSS_SELECTOR, 'input[name="ctl00$ContentPlaceHolder1$tbxDt"]')
        search_bar.send_keys(patient['doc_name'])
        search_button = driver.find_element(
            By.CSS_SELECTOR,
            'input[name="ctl00$ContentPlaceHolder1$btnDtQuery"]')
        search_button.click()
        time.sleep(1)
        # get date table
        schedule = driver.find_elements(
            By.CSS_SELECTOR,
            'table[id="ctl00_ContentPlaceHolder1_gvDtQuery"] > tbody > tr')

        # get page list --> start at last page
        found_date = False
        page_list = schedule[-1].find_elements(By.CSS_SELECTOR, 'a')

        page_list[-1].click()
        time.sleep(1)
        rows = driver.find_elements(
            By.CSS_SELECTOR,
            'table[id="ctl00_ContentPlaceHolder1_gvDtQuery"] > tbody > tr')
        for j in range(2, len(rows) - 1):
            values = rows[j].find_elements(By.CSS_SELECTOR, 'td')
            date = values[1].text
            if date == patient['target_date']:
                found_date = True
                values[0].click()
                break

        if not found_date:
            print('unable to find the target date, 指定日期不存在')
            logger.error('unable to find the target date, 指定日期不存在')

        id_bar = driver.find_element(By.CSS_SELECTOR, 'input[name="txtMRNo"]')
        id_bar.send_keys(patient['id'])
        time.sleep(300)
        return
    except Exception as e:
        print(f'execute {patient} failed, {e}')
        logger.exception(f'execute {patient} failed, {e}')
        return
예제 #22
0
 def on_open_station(self, station_info):
     '''
     get apartments count
     '''
     try:
         priority = get_num_of_apartment(self.driver)
         mongo.update_priority_of_station(station_info.get('_id'), priority)
     except NoSuchElementException:
         logger.info(
             '[{}] [UrlCrawler] Unable to get apartment count'.format(
                 self.city))
         raise
     except Exception as e:
         logger.error('[{}] [UrlCrawler] [on_open_station] err'.format(
             self.city))
         logger.exception(e)
         mongo.report_unexpected_error_url_crawler(e)
예제 #23
0
파일: mod_log.py 프로젝트: r-anime/modbot
def monitor_stream():
    """
    Monitor the subreddit for new actions and parse them when they come in. Will restart upon encountering an error.
    """

    global reddit, subreddit
    while True:
        try:
            logger.info("Connecting to Reddit...")
            reddit = praw.Reddit(**config_loader.REDDIT["auth"])
            subreddit = reddit.subreddit(config_loader.REDDIT["subreddit"])
            _get_moderators()
            logger.info("Starting mod log stream...")
            for mod_action in subreddit.mod.stream.log():
                parse_mod_action(mod_action)
        except Exception:
            delay_time = 30
            logger.exception(f"Encountered an unexpected error, restarting in {delay_time} seconds...")
            time.sleep(delay_time)
예제 #24
0
def monitor_stream():
    """
    Monitor the subreddit for new comments and parse them when they come in. Will restart upon encountering an error.
    """

    global reddit, subreddit
    while True:
        try:
            logger.info("Connecting to Reddit...")
            reddit = praw.Reddit(**config_loader.REDDIT["auth"])
            subreddit = reddit.subreddit(config_loader.REDDIT["subreddit"])
            logger.info("Starting comment stream...")
            for comment in subreddit.stream.comments(skip_existing=False):
                process_comment(comment)
        except Exception:
            delay_time = 30
            logger.exception(
                f"Encountered an unexpected error, restarting in {delay_time} seconds..."
            )
            time.sleep(delay_time)
예제 #25
0
 def start_fill_missing(self, apartment):
     '''
     fill in missing info
     '''
     try:
         logger.info(
             '[{}] [DetailCrawler] Start fill in missing info'.format(
                 apartment.get('city')))
         self.get(apartment.get('house_url'))
         logger.info('[{}] [DetailCrawler] Url opened'.format(
             apartment.get('city')))
         info = get_info_of_single_url(self.driver,
                                       apartment.get('house_url'))
         logger.info('[{}] [DetailCrawler] Data get'.format(
             apartment.get('city')))
         mongo.update_missing_info(apartment, info)
         sleep(2)
     except ApartmentExpiredException:
         logger.info('[{}] [DetailCrawler] Url expired'.format(
             apartment.get('city')))
         mongo.update_missing_info(apartment, {
             'expired': True,
         })
     except NoSuchElementException:
         logger.info('[{}] [DetailCrawler] Elm not found'.format(
             apartment.get('city')))
     except (TimeoutException, WebDriverException,
             InvalidSessionIdException):
         logger.info('[{}] [DetailCrawler] Session timeout'.format(
             apartment.get('city')))
         self.renew_driver()
     except (TooManyTimesException):
         pass
     except Exception as e:
         logger.error(
             '[{}] [DetailCrawler] [start_fill_missing] err'.format(
                 apartment.get('city')))
         logger.exception(e)
     finally:
         self.quit()
예제 #26
0
def main():
    current_offset = 0
    while True:
        processed_posts = migrate_posts(current_offset)
        current_offset += processed_posts
        if processed_posts < 1000:
            break
        if current_offset % 1000 == 0:
            logger.info(f"Migrated {current_offset} posts total")

    current_datetime = datetime.fromisoformat("2020-05-12 04:00:00.000")
    now = datetime.utcnow()
    while current_datetime <= now:
        try:
            migrate_snapshots(current_datetime.date(), current_datetime.hour)
        except Exception:
            logger.exception(
                f"Failed to migrate {current_datetime.date()} - {current_datetime.hour}"
            )
        current_datetime += timedelta(hours=1)
        if current_datetime.hour == 0:
            logger.info(f"Finished migrating {current_datetime.date()}")
예제 #27
0
    def send(self, data, dtype='str', suffix=''):
        """向服务器端发送send_string,并返回信息,若报错,则返回None"""
        if dtype == 'json':
            send_string = json.dumps(data) + suffix
        else:
            send_string = data + suffix
        self.connect()
        if self.connected:
            try:
                self._sock.send(send_string.encode())
                logger.debug('TCPClient Send {0}'.format(send_string))
            except socket.error as e:
                logger.exception(e)

            try:
                rec = self._sock.recv(self.max_receive).decode()
                if suffix:
                    rec = rec[:-len(suffix)]
                logger.debug('TCPClient received {0}'.format(rec))
                return rec
            except socket.error as e:
                logger.exception(e)
예제 #28
0
def save_post_and_comments(reddit_submission: Submission):
    """
    Saves a single reddit post and its comments to the database.
    """

    post_name = reddit_submission.permalink

    # Ensure post is in the database first.
    post_service.add_post(reddit_submission)
    logger.info(
        f"Loading {reddit_submission.num_comments} comments on {post_name}")

    # Load all comments
    retry_count = 0
    while True:
        try:
            retry_count += 1
            if retry_count > 3:
                logger.info(f"Unable to load all comments for {post_name}")
                return
            reddit_submission.comments.replace_more(limit=None)
            break
        except Exception:
            logger.exception("Handling replace_more exception")
            time.sleep(5)

    logger.info(f"Processing comments on {post_name}")
    index = -1
    for index, reddit_comment in enumerate(reddit_submission.comments.list()):
        # Since all comments will reference a parent if it exists, add all parent comments first.
        logger.debug(f"Saving parent comments of {reddit_comment.id}")
        comment_service.add_comment_parent_tree(reddit, reddit_comment)
        logger.debug(f"Saving comment {reddit_comment.id}")
        comment_service.add_comment(reddit_comment)
        if (index + 1) % 500 == 0:
            logger.info(f"Completed {index + 1} comments on {post_name}")

    logger.info(f"Finished processing {post_name}, total {index + 1} comments")
예제 #29
0
async def on_command_error(ctx: commands.Context, exception):
    # When a command fails to execute
    await ctx.send(f"Error: {exception}", reference=ctx.message)
    logger.exception("Command Error", exc_info=exception)
예제 #30
0
 async def announcement_error(self):
     logger.exception("Announcement Error")