def start_writer(self):
        '''
        Creates a new thread responsible for creating a 
        downstream connection to the ground station.
        '''
        writerThread = Writer('127.0.0.1', 9000, self.broadcastQueue)
        writerThread.setName('ISRU Writer')
        writerThread.start()

        return writerThread
示例#2
0
def main():
    if len(sys.argv) > 1:
        config_path = sys.argv[1]
    else:
        config_path = './configs/config_default.txt'

    if not Path(config_path).is_file():
        logging.error("Could not find config file!")
        sys.exit(1)  # exiting with error code

    # load config
    config = configparser.ConfigParser()
    config.read(config_path)

    log_dir = config['PATHS']['log_dir']
    log_file_name = config['PATHS']['log_file_name']

    # check if config dir is present
    if not Path(log_dir).is_dir():
        logging.error("Logging directory is not present!")
        sys.exit(1)  # exiting with error code

    file_handler = TimedRotatingFileHandler(os.path.join(
        os.path.dirname(__file__), log_dir, log_file_name),
                                            when='midnight',
                                            interval=1)
    console_handler = logging.StreamHandler()
    logging.basicConfig(
        level=logging.DEBUG,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[file_handler, console_handler])

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    logging.getLogger("requests").setLevel(logging.WARNING)
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("apscheduler.scheduler").setLevel(logging.WARNING)
    logging.getLogger("apscheduler.executors.default").setLevel(
        logging.WARNING)
    logging.getLogger("chardet.charsetprober").setLevel(logging.WARNING)

    logger.info("=======Starting=Crawler=========")

    # store config preferences in variables
    article_download_pattern = ([
        (int(config['ARTICLE_DOWNLOAD_PATTERN']['number']),
         int(config['ARTICLE_DOWNLOAD_PATTERN']['delay'])),
    ])  # [(application number, period in seconds) ... ]
    number_download_worker = int(config['CRAWLING']['number_download_worker'])
    website_request_timeout = int(
        config['REQUESTS']['website_request_timeout'])
    rss_feed_crawl_period = int(config['CRAWLING']['rss_feed_crawl_period'])
    rss_feed_request_timeout = int(
        config['REQUESTS']['rss_feed_request_timeout'])
    warmup_iterations = int(config['CRAWLING']['warmup_iterations'])
    throttle_velocity = float(config['CRAWLING']['throttle_velocity'])
    max_offset = int(config['CRAWLING']['max_offset'])
    downloads_path = config['PATHS']['downloads']
    crawled_rss_articles_path = config['PATHS']['rss_articles']
    feed_path = config['PATHS']['feeds_list']
    requests_path = config['PATHS']['requests']

    # partly validating the config
    if not Path(feed_path).is_file():
        logging.error("Could not find RSS feeds list file!")
        sys.exit(1)  # exiting with error code

    parent_dir = os.path.dirname(requests_path)
    if not Path(parent_dir).is_dir():
        logging.error("Could not find requests directory!")
        sys.exit(1)  # exiting with error code

    writer = Writer()
    writer.start()

    throttle = Throttle(request_velocity=throttle_velocity)

    rss_requester = Requester(tag="RSS Requester",
                              path=requests_path,
                              throttle=throttle)
    website_requester = Requester(tag="Website Requester",
                                  path=requests_path,
                                  throttle=throttle)

    scheduler = Scheduler(patterns=article_download_pattern)

    crawler = Crawler(requester=rss_requester,
                      scheduler=scheduler,
                      feed_path=feed_path,
                      crawled_rss_articles_path=crawled_rss_articles_path,
                      rss_feed_crawl_period=rss_feed_crawl_period,
                      rss_feed_request_timeout=rss_feed_request_timeout,
                      warmup_iterations=warmup_iterations,
                      max_offset=max_offset)
    crawler.start()

    for i in range(number_download_worker):
        logger.info("Starting download worker #%d", i)
        DownloadWorker(requester=website_requester,
                       timeout=website_request_timeout,
                       path=downloads_path).start()

    while True:
        time.sleep(60)
        logger.debug("Number of threads running: %d", threading.active_count())
        process = psutil.Process(os.getpid())
        ram_usage = process.memory_full_info()
        # percent = absolute/mem.total
        logger.info("RAM usage: %s%%,  %s", process.memory_percent(),
                    ram_usage)
示例#3
0
grp.add_argument('--maxConnections',
                 type=int,
                 default=10,
                 metavar='count',
                 help='Maximum number of simultaneous connections')
args = parser.parse_args()

logger = MyLogger.mkLogger(args)
logger.info('args=%s', args)

try:
    fwd = Forwarder(args, logger)  # Create a packet forwarder
    fwd.start()  # Start the forwarder

    writer = Writer(args, logger)  # Create the db writer thread
    writer.start()  # Start the writer thread

    queues = [fwd.q, writer.q]

    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        logger.debug('Opened socket')
        s.bind(('', args.port))
        logger.debug('Bound to port %s', args.port)
        s.listen()
        logger.debug('Listening to socket')
        while writer.is_alive():
            (conn, addr) = s.accept()  # Wait for a connection
            logger.info('Connection from %s', addr)
            thrd = Reader(conn, addr, logger,
                          queues)  # Create a new reader thread
            thrd.start()  # Start the new reader thread