Пример #1
0
def main():
    """Main function to extract job offers."""
    logging.basicConfig(format='%(asctime)s | %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        level=logging.INFO)

    parser = argparse.ArgumentParser(description='Extract job offers.')
    parser.add_argument('-f','--filename', type=str,
                        help=f'Filename where data will be saved. ' \
                             f'The date will be added at the end of it.\n' \
                             f'This argument is optional, the default value ' \
                             f'will be saved under the data directory with the '\
                             f'name: offers_XXXX-XX-XX.csv. Where XXXX-XX-XX '\
                             f'is the actual date.',
                        default=DEFAULT_FILENAME)
    parser.add_argument('-wt','--wait_time', type=int,
                        help=f'Time to wait between requests, used to prevent ' \
                             f'the overload of the server.\n' \
                             f'The default wait time is 5 seconds.',
                        default=5)
    parser.add_argument('-nd', '--number_of_days', type=int,
                        help=f"Set the number of days to collect data.",
                        default=None)
    parser.add_argument('-td', '--today', action="store_true",
                        help=f"Collect the data from today.")
    parser.add_argument('--testing', action="store_true",
                        help=f"Test with a limited number of pages.")
    args = parser.parse_args()

    # Get ready the different constants to be used.
    today_date = datetime.date.today()
    filename = f'../data/partial_datasets/{args.filename}_{today_date}.csv'
    wait_time = args.wait_time

    requestor = RequestHandler()

    if args.today:
        logging.info(f"Only todays data: {today_date}")
        requestor.conf_query_todays_spanish_offers()
    
    elif args.number_of_days is not None:
        logging.info("Getting data from the last {args.number_of_days} days")
        requestor.conf_query_custom_days(args.number_of_days)
        filename = f'../data/partial_datasets/{args.filename}_last_{args.number_of_days}_days.csv'
    
    else:
        logging.info("All the data")
        requestor.conf_query_all_spanish_offers()


    logging.info(f'Data will be saved in: {filename}')
    export_data_to_file(filename, wait_time,
                        requestor, args.testing)
    logging.info('Data extracted correctly.')