示例#1
0
def parse_arguments():
    parser = argparse.ArgumentParser()

    parser.add_argument("-c",
                        "--cfg",
                        dest="cfgfile",
                        help="Configuration file",
                        required=True)
    parser.add_argument("-s",
                        "--searchstringst",
                        dest="string",
                        help="String to search for",
                        required=True)
    parser.add_argument('-d',
                        '--delete',
                        action='store_true',
                        help="Flag to delete records")
    parser.add_argument('-a',
                        '--always_commit',
                        action='store_true',
                        help="Flag to commit directly")

    args = parser.parse_args()

    if args.cfgfile is None or args.string is None:
        parser.print_help()
        parser.exit()

    return args
def process_arguments():
	parser = argparse.ArgumentParser(description='Retrieve revision information for Wikipedia article(s).')
	parser.add_argument('-c', '--category', metavar='category_title', dest='category',
						help='The name of a Wikipedia category (e.g. Category:2009_earthquakes).')
	parser.add_argument('-a', '--article', metavar='article_title', dest='article',
						help='The name of a Wikipedia article (e.g. 2009_Bhutan_earthquake).')
	parser.add_argument('-i', '--input', metavar='input_filename', dest='infilename',
						help='Name of input file a list of articles and categories, one per line.')
	parser.add_argument('-d', '--depth', metavar='depth', dest='depth', default=0,
						help='The crawling depth for the given category, integer >= 0. Default is 0.')
	parser.add_argument('-xc', metavar='excluded_categories', dest='excluded_categories',
						help='A list of categories to exclude from the results, separated by commas (e.g. Category:a,Category:b).')
	parser.add_argument('-xa', metavar='excluded_articles', dest='excluded_articles', 
						help='A list of articles to exclude from the results, separated by commas (e.g. article1,article2).')
	parser.add_argument('-xf', metavar='exclusions_filename', dest='exclusions_filename',
						help='Name of file containing list of articles and/or categories, one per line, to exclude from the results.')
	parser.add_argument('-o', '--output', metavar='output_filename', dest='outfilename', required=True,
						help='Name of output CSV file. *REQUIRED*')
	args = parser.parse_args()	
	if not (args.infilename or args.article or args.category):
		parser.exit(status=-1, message='At least one form of input (article, category, or infile) is needed!\n')
	articles = []
	categories = []
	excluded_articles = []
	excluded_categories = []
	if args.excluded_articles:
		excluded_articles = args.excluded_articles.split(',')
	if args.excluded_categories:
		excluded_categories = args.excluded_categories.split(',')
	if args.exclusions_filename:
		with open(args.exclusions_filename, 'rb') as exclusions_file:
			titles = exclusions_file.readlines()
		for title in titles:
			if title.find('Category:')==0:
				excluded_categories.append(title.rstrip())
			else:
				excluded_articles.append(title.rstrip())
	if args.article:
		articles.append(args.article)
	if args.category:
		categories.append(args.category)
	if args.infilename:
		titles = []
		with open(args.infilename, 'rb') as infile:	
			titles = infile.readlines()
		for title in titles:
			if title.find('Category:')==0:
				categories.append(title.rstrip())
			else:
				articles.append(title.rstrip())
	articles = list(set(articles))
	categories = list(set(categories))
	return (articles, categories, excluded_articles, excluded_categories, depth, outfilename)
def check_path(path, parser):
    """Checks the file path existence, type and permissions"""
    if not os.path.exists(path):
        print("File does not exist:\n%s", path)
        parser.exit(1)
    if not os.path.isfile(path):
        print("File is a directory:\n%s", path)
        parser.exit(1)
    if not os.access(path, os.R_OK):
        print("File does not have read permissions:\n%s", path)
        parser.exit(1)
示例#4
0
def main():
    colorama.init()
    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version='%s v%s' % (parser.prog, __version__))

    if sys.version_info[:2] >= (3, 7):
        subparsers = parser.add_subparsers(dest='command',
                                           required=True)
    else:
        subparsers = parser.add_subparsers(dest='command')

    parser_latest = subparsers.add_parser(CMD_LATEST,
                                          help="get the latest price of an asset",
                                          description="Get the latest [asset] price (in GBP). "
                                                      "If no data source [-ds] is given, "
                                                      "the same data source(s) as "
                                                      "'bittytax' are used.")
    parser_latest.add_argument('asset',
                               type=str,
                               nargs=1,
                               help="symbol of cryptoasset or fiat currency "
                                    "(i.e. BTC/LTC/ETH or EUR/USD)")
    parser_latest.add_argument('quantity',
                               type=validate_quantity,
                               nargs='?',
                               help="quantity to price (optional)")
    parser_latest.add_argument('targetasset',
                               type=str.upper,
                               nargs='?',
                               default=config.CCY,
                               help="convert into specificed target asset (optional, default=%s)" % config.CCY)
    parser_latest.add_argument('-ds',
                               choices=datasource_choices(upper=True) + ['ALL'],
                               metavar='{' + ', '.join(datasource_choices()) + '} or ALL',
                               dest='datasource',
                               type=str.upper,
                               help="specify the data source to use, or all")
    parser_latest.add_argument('-d',
                               '--debug',
                               action='store_true',
                               help="enable debug logging")

    parser_history = subparsers.add_parser(CMD_HISTORY,
                                           help="get the historical price of an asset",
                                           description="Get the historic [asset] price (in GBP) "
                                                       "for the [date] specified. "
                                                       "If no data source [-ds] is given, "
                                                       "the same data source(s) as "
                                                       "'bittytax' are used.")
    parser_history.add_argument('asset',
                                type=str.upper,
                                nargs=1,
                                help="symbol of cryptoasset or fiat currency "
                                     "(i.e. BTC/LTC/ETH or EUR/USD)")
    parser_history.add_argument('date',
                                type=validate_date,
                                nargs=1,
                                help="date (YYYY-MM-DD or DD/MM/YYYY)")
    parser_history.add_argument('quantity',
                                type=validate_quantity,
                                nargs='?',
                                help="quantity to price (optional)")
    parser_history.add_argument('targetasset',
                                type=str.upper,
                                nargs='?',
                                default=config.CCY,
                                help="convert into specificed target asset (optional, default=%s)" % config.CCY)
    parser_history.add_argument('-ds',
                                choices=datasource_choices(upper=True) + ['ALL'],
                                metavar='{' + ', '.join(datasource_choices()) + '} or ALL',
                                dest='datasource',
                                type=str.upper,
                                help="specify the data source to use, or all")
    parser_history.add_argument('-nc',
                                '--nocache',
                                action='store_true', help="bypass data cache")
    parser_history.add_argument('-d',
                                '--debug',
                                action='store_true',
                                help="enable debug logging")

    parser_list = subparsers.add_parser(CMD_LIST,
                                        help="list all assets",
                                        description='List all assets, or filter by [asset].')
    parser_list.add_argument('asset',
                             type=str,
                             nargs='?',
                             help="symbol of cryptoasset or fiat currency "
                                  "(i.e. BTC/LTC/ETH or EUR/USD)")
    parser_list.add_argument('-s',
                             type=str,
                             nargs='+',
                             metavar='STRING',
                             dest='search',
                             help="search assets using STRING")
    parser_list.add_argument('-ds',
                             choices=datasource_choices(upper=True) + ['ALL'],
                             metavar='{' + ', '.join(datasource_choices()) + '} or ALL',
                             dest='datasource',
                             type=str.upper,
                             help="specify the data source to use, or all")
    parser_list.add_argument('-d',
                             '--debug',
                             action='store_true',
                             help="enable debug logging")

    config.args = parser.parse_args()

    if config.args.debug:
        print("%s%s v%s" % (Fore.YELLOW, parser.prog, __version__))
        print("%spython: v%s" % (Fore.GREEN, platform.python_version()))
        print("%ssystem: %s, release: %s" % (Fore.GREEN, platform.system(), platform.release()))
        config.output_config()

    if config.args.command in (CMD_LATEST, CMD_HISTORY):
        symbol = config.args.asset[0]
        target_symbol = config.args.targetasset
        asset = price = False

        try:
            if config.args.datasource:
                if config.args.command == CMD_HISTORY:
                    assets = AssetData().get_historic_price_ds(symbol,
                                                               config.args.date[0],
                                                               config.args.datasource)
                else:
                    assets = AssetData().get_latest_price_ds(symbol,
                                                             config.args.datasource)
                btc = None
                for asset in assets:
                    if not asset['price']:
                        continue

                    output_ds_price(asset)
                    if asset['quote'] == 'BTC':
                        if btc is None:
                            if config.args.command == CMD_HISTORY:
                                btc = get_historic_btc_price(config.args.date[0])
                            else:
                                btc = get_latest_btc_price()

                        if btc['price'] is not None:
                            price_ccy = btc['price'] * asset['price']
                            output_ds_price(btc)
                            price = True
                    else:
                        price_ccy = asset['price']
                        price = True

                    output_price(symbol, price_ccy)

                if not assets:
                    asset = False
            else:
                value_asset = ValueAsset(price_tool=True)
                if config.args.command == CMD_HISTORY:
                    price_ccy, name, _ = value_asset.get_historical_price(symbol,
                                                                          config.args.date[0],
                                                                          target_symbol)
                else:
                    price_ccy, name, _ = value_asset.get_latest_price(symbol, target_symbol)

                if price_ccy is not None:
                    output_price(symbol, price_ccy, target_symbol)
                    price = True

                if name is not None:
                    asset = True

        except DataSourceError as e:
            parser.exit("%sERROR%s %s" % (Back.RED+Fore.BLACK, Back.RESET+Fore.RED, e))

        if not asset:
            parser.exit("%sWARNING%s Prices for %s are not supported" % (
                Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol))

        if not price:
            if config.args.command == CMD_HISTORY:
                parser.exit("%sWARNING%s Price for %s on %s is not available" % (
                    Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW,
                    symbol, config.args.date[0].strftime('%Y-%m-%d')))
            else:
                parser.exit("%sWARNING%s Current price for %s is not available" % (
                    Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol))
    elif config.args.command == CMD_LIST:
        symbol = config.args.asset
        try:
            assets = AssetData().get_assets(symbol, config.args.datasource, config.args.search)
        except DataSourceError as e:
            parser.exit("%sERROR%s %s" % (Back.RED+Fore.BLACK, Back.RESET+Fore.RED, e))

        if symbol and not assets:
            parser.exit("%sWARNING%s Asset %s not found" % (
                Back.YELLOW+Fore.BLACK, Back.RESET+Fore.YELLOW, symbol))

        if config.args.search and not assets:
            parser.exit("No results found")

        output_assets(assets)
示例#5
0
def usage() -> argparse.Namespace:
    """Parse the options provided on the command line.

    Returns:
        argparse.Namespace: The parameters provided on the command line.
    """
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-h',
                        '--help',
                        action='store_true',
                        help='show this help message and exit')
    group = parser.add_argument_group("General", "Simulation general settings")
    group.add_argument("--first-date",
                       help="The first date to be processed. "
                       "Default to the current date",
                       type=datetime_type,
                       default=np.datetime64("now"))
    group.add_argument("--last-date",
                       help="The last date to be processed. "
                       "Default to the last date allowing to cover an entire "
                       "cycle.",
                       type=datetime_type)
    group = parser.add_argument_group("Execution",
                                      "Runtime parameters options ")
    group.add_argument("--debug",
                       action="store_true",
                       help="Put swot simulator in debug mode")
    group.add_argument("--log",
                       metavar='PATH',
                       help="Path to the logbook to use",
                       type=argparse.FileType("w"))
    group.add_argument("--scheduler-file",
                       help="Path to a file with scheduler information to "
                       "launch swot simulator on a cluster. By "
                       "default, use a local cluster.",
                       metavar='PATH',
                       type=argparse.FileType("r"))
    group = parser.add_argument_group("LocalCluster",
                                      "Dask local cluster option")
    group.add_argument("--n-workers",
                       help="Number of workers to start (Default to 1)",
                       type=int,
                       metavar='N',
                       default=1)
    group.add_argument("--processes",
                       help="Whether to use processes (True) or threads "
                       "(False).  Defaults to False",
                       action="store_true")
    group.add_argument("--threads-per-worker",
                       help="Number of threads per each worker. "
                       "(Default to 1)",
                       type=int,
                       metavar='N',
                       default=1)
    group = parser.add_argument_group("Configuration")
    group.add_argument("--template",
                       help="Writes the default configuration of the "
                       "simulator into the file and ends the program.",
                       metavar="PATH",
                       type=argparse.FileType("w"))
    namespace = argparse.Namespace()
    namespace, _ = parser._parse_known_args(sys.argv[1:], namespace)

    def add_settings(parser):
        """Added the argument defining the settings of the simulator."""
        parser.add_argument("settings",
                            type=argparse.FileType('r'),
                            help="Path to the parameters file")

    # Displays help and ends the program.
    if "help" in namespace:
        add_settings(parser)
        parser.print_help()
        parser.exit(0)

    # Checking exclusive options.
    if "scheduler_file" in namespace:
        for item in ["n_workers", "processes", "threads_per_worker"]:
            if item in namespace:
                item = item.replace("_", "-")
                raise RuntimeError(
                    f"--{item}: not allowed with argument --scheduler-file")

    # Write the template configuration file and ends the program
    if "template" in namespace:
        namespace.template.write(settings.template())
        sys.stdout.write(f"""
The template has been written in the file: {namespace.template.name!r}.
""")
        parser.exit(0)

    # The partial analysis of the command line arguments is finished, the last
    # argument is added and parsed one last time.
    add_settings(parser)

    return parser.parse_args()
def main():
    """Main entry point for the script"""
    parser = argparse.ArgumentParser(description='Determines the top 10 most common source IP addresses, and their hit'
                                                 ' rates, for a fleet of 1000 web servers within the last hour')
    parser.add_argument('server_list', help='Path to the text file containing list of servers')
    parser.add_argument('-N', default=10, type=int,
                        help='Outputs N number of most common IP Addresses (Default: 10')
    parser.add_argument('-L', '--logpath', default='/var/log/httpd-access-log', type=str,
                        help='Change the log file location on server, common for all '
                             '(Default: /var/log/httpd-access-log)')
    parser.add_argument('-T', '--timediff', default=1, type=int,
                        help='Change the time difference when looking for common IPs, in hours (Default: 1 hour)')
    args = parser.parse_args()

    # Makes sure the server list file is valid
    check_path(args.server_list, parser)

    # Dictionaries for IP Address and Hit counts
    ip_dict = collections.defaultdict(int)
    hit_success = collections.defaultdict(int)

    with open(args.server_list, "rb") as servers:
        for server in servers:
            # The program expects a valid format for listing servers
            hostname, user, passwd = server.split()
            # Generate RSA key for host key verification (Skipped)
            #key = paramiko.RSAKey(data=base64.decodestring('AAA...'))  # needs host key
            # Starts the SSH Client
            client = paramiko.SSHClient()
            # Add the host to known hosts by adding the RSA key (Skipped)
            #client.get_host_keys().add('ssh.example.com', 'ssh-rsa', key)
            # Ignores the warnings for RSA Keys
            client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
            # Connects to the server
            client.connect(hostname.decode('UTF-8'),
                           username=user.decode('UTF-8'), password=passwd.decode('UTF-8'))
            # Copies the log file data to a variable
            _, data, _ = client.exec_command("cat {}".format(args.logpath))
            log_data = []
            # Stores the log data in a list
            for line in data:
                log_data.append(line.strip("\n"))

            # Parses each log data and stores the IP address and hit counts at each step
            for log in log_data:
                ip_address, date_time, status_code = parse_log(log)
                if check_time(date_time, args.timediff):
                    ip_dict[ip_address] += 1
                    if status_code == "200":
                        hit_success[ip_address] += 1

    # An ascending list of IP address occurrences
    ip_list = sorted(list(ip_dict.items()), key=operator.itemgetter(1))

    if ip_list:
        print("IP Address      Hit Rate")
        for _ in range(args.N):
            # Gets the last element that has the highest occurrence
            try:
                top_ip, total_hits = ip_list.pop()
            except IndexError:
                break
            # Hit Rate = # of successful connections/total connection attempts
            hit_rate = (hit_success[top_ip]/total_hits)*100

            print("{0} ---- {1:.2f}%".format(top_ip, hit_rate))
    else:
        print("No results found.")

    parser.exit(0)
示例#7
0
def main():
    import textwrap

    # Parse command-line arguments
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description=textwrap.dedent("""\
Scrapes subreddits and puts their content in a plain text file.
Use with --posts to download posts, --subs to download
subreddits, and --config to make custom Pushshift API calls. 
"""),
    )

    mode_group = parser.add_mutually_exclusive_group(required=True)

    mode_group.add_argument(
        "--posts",
        dest="posts_file",
        type=str,
        default="",
        help="A file containing the list of posts to download, one per line.",
    )

    mode_group.add_argument(
        "--subs",
        dest="subs_file",
        type=str,
        # required=False,
        default="",
        help=
        "A file containing the list of subreddits to download, one per line.",
    )

    mode_group.add_argument(
        "--config",
        dest="config_file",
        type=str,
        # required=False,
        default="",
        help=
        "A file containing the arguments for the Pushshift APIs. See config.default.txt for a sample config file.",
    )

    parser.add_argument(
        "--start",
        dest="start_date",
        type=str,
        # required=True,
        help="The date to start parsing from, in YYYY-MM-DD format",
    )
    parser.add_argument(
        "--end",
        dest="end_date",
        type=str,
        # required=True,
        help="The final date of the parsing, in YYYY-MM-DD format",
    )

    parser.add_argument(
        "--output",
        dest="output_folder",
        type=str,
        required=True,
        help="The output folder",
    )

    parser.add_argument(
        "--blacklist",
        dest="blacklist_file",
        type=str,
        required=False,
        default="",
        help="A file containing the lines to skip.",
    )
    parser.add_argument(
        "--workers",
        dest="num_workers",
        type=int,
        required=False,
        default=1,
        help="Number of parallel workers",
    )

    if len(sys.argv[1:]) == 0:
        parser.print_help()
        parser.exit()

    args = parser.parse_args()

    if args.config_file or args.subs_file:
        if not (args.start_date and args.end_date):
            parser.error(
                "Start date and end date are required in --config or --subs mode."
            )

        pattern = re.compile("^[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]$")
        if not (pattern.match(args.start_date)
                and pattern.match(args.end_date)):
            parser.error("Invalid date format.")

    check_output_directory(args.output_folder)

    try:
        reddit = do_reddit_login()
    except ImportError:
        parser.error(
            "Failed to load configuration. Did you create reddit_config.py?")

    if args.subs_file:

        subs = load_list_from_file(args.subs_file)

        blacklist = load_blacklist(
            args.blacklist_file) if args.blacklist_file else []

        if args.num_workers > 1:
            with Pool(args.num_workers) as p:
                p.map(
                    process_subs,
                    [(
                        reddit,
                        sub,
                        args.start_date,
                        args.end_date,
                        args.output_folder,
                        blacklist,
                    ) for sub in subs],
                )

        else:
            for sub in subs:
                process_subs((
                    reddit,
                    sub,
                    args.start_date,
                    args.end_date,
                    args.output_folder,
                    blacklist,
                ))

    elif args.posts_file:

        posts = load_list_from_file(args.posts_file)

        blacklist = load_blacklist(
            args.blacklist_file) if args.blacklist_file else []

        if args.num_workers > 1:
            with Pool(args.num_workers) as p:
                p.map(
                    process_posts,
                    [(reddit, post, blacklist, args.output_folder)
                     for post in posts],
                )

        else:
            for post in posts:
                process_posts((reddit, post, blacklist, args.output_folder))

    else:
        blacklist = load_blacklist(
            args.blacklist_file) if args.blacklist_file else []
        config = load_config(args.config_file) if args.config_file else {}

        if args.num_workers > 1:
            with Pool(args.num_workers) as p:
                p.map(
                    process_all,
                    [(
                        reddit,
                        start_split,
                        end_split,
                        args.output_folder,
                        config,
                        blacklist,
                    ) for (start_split, end_split) in make_splits(
                        args.start_date, args.end_date, args.num_workers)],
                )

        else:

            start_ts, end_ts = make_splits(args.start_date, args.end_date,
                                           1)[0]
            process_all((reddit, start_ts, end_ts, args.output_folder, config,
                         blacklist))

    print("Done!")
    exit(0)