Пример #1
0
def parseargs() -> argparse.ArgumentParser:
    """Parse arguments"""
    parser = worker.parseargs("Get act feeds from act sharing directories")

    parser.add_argument(
        "--feed-uri",
        help="URI to retrieve feed from",
    )

    parser.add_argument(
        "--dump-dir",
        type=Path,
        help=
        "Dump manifest/bundles to directory instead of sendings facts to uploader",
    )

    parser.add_argument(
        "--feed-cache",
        type=Path,
        default=caep.get_cache_dir("act_feed_cache"),
        help="The directory to store information about last run",
    )

    parser.add_argument(
        "--no-exit-on-error",
        action="store_true",
        help="Log errors and continue on platform upload errors",
    )

    return parser
Пример #2
0
def get_args() -> argparse.Namespace:
    """initialize argument parser"""

    parser = argparse.ArgumentParser(description="pull feeds into html files")
    parser.add_argument("-l",
                        "--log",
                        type=str,
                        help="Which file to log to (default: stdout)")
    parser.add_argument("--file-format",
                        nargs="+",
                        default="pdf doc xls csv xml")
    parser.add_argument(
        "--exclude-filenames",
        nargs="+",
        default="sitemap.xml robots.txt rss.xml atom.xml",
    )
    parser.add_argument(
        "--store-path",
        help=f"Location for stored files. Default {XDG_CACHE}/scio-feeds",
    )
    parser.add_argument("--proxy-string",
                        help="Proxy to use for external queries")
    parser.add_argument("--ignore", type=str, help="file with ignore patterns")
    parser.add_argument(
        "--feeds",
        default=caep.get_config_dir("scio/etc/feeds.txt"),
        type=str,
        help=
        f"feed urls (one pr. line). Default: {XDG_CONFIG}/scio/etc/feeds.txt",
    )
    parser.add_argument(
        "--cache",
        default=caep.get_cache_dir("scio-feeds/cache.db"),
        help=
        f"sqlite db containing cached hashes. Default = {XDG_CACHE}/scio-feeds/cache.db",
    )
    parser.add_argument(
        "--tlp",
        help=
        "Set TLP (RED, AMBER, GREEN, WHITE) on document upload. Default=WHITE",
        default="WHITE",
    )
    parser.add_argument(
        "--scio",
        help="Upload to scio engine API url. " +
        "Set to empty value to not upload files.",
        default="http://localhost:3000/submit",
    )
    parser.add_argument(
        "--stoplist",
        default=caep.get_config_dir("scio/etc/secstoplist.txt"),
        help="Provided own stoplist for text extraction",
    )
    parser.add_argument("--logfile")
    parser.add_argument("--loglevel", default="info")

    args: argparse.Namespace = caep.config.handle_args(parser, "scio/etc",
                                                       "scio.ini", "feeds")

    return args
Пример #3
0
def update_last_update(last_update: int) -> None:
    "Write last update from disk (~/.cache/<worker_name>/last_update)"
    cache_filename: Text = os.path.join(
        caep.get_cache_dir(worker.worker_name(), create=True), "last_update")

    # Write last update timestamp to disk
    with open(cache_filename, "w") as f:
        f.write(str(last_update))
Пример #4
0
def parseargs() -> argparse.ArgumentParser:
    """ Parse arguments """
    parser = worker.parseargs('Get MISP feeds from MISP sharing directories')

    parser.add_argument('--manifest-dir',
                        default=caep.get_cache_dir('misp_manifest'),
                        help='The directory to store latest manifests')

    return parser
Пример #5
0
    def __init__(
            self,
            cache_prefix: Text = CACHE_PREFIX,
            requests_common_kwargs: Optional[Dict] = None) -> None:

        self.requests_common_kwargs = \
            requests_common_kwargs if requests_common_kwargs else {}

        cache_dir = caep.get_cache_dir(cache_prefix, create=True)
        self.cache: sqlite3.Connection = get_db_cache(cache_dir)
Пример #6
0
def parseargs() -> argparse.ArgumentParser:
    """Parse arguments"""
    parser = worker.parseargs("Get MISP feeds from MISP sharing directories")

    parser.add_argument(
        "--manifest-dir",
        default=caep.get_cache_dir("misp_manifest"),
        help="The directory to store latest manifests",
    )

    return parser
Пример #7
0
def verify_manifest_dir(manifest_dir: Text) -> None:
    """Verify that the directory structure exists and that there is
    always a feed file (Even empty)"""

    # Manifest is at default location - create directory if it does not exists
    if manifest_dir == caep.get_cache_dir('misp_manifest'):
        caep.get_cache_dir('misp_manifest', create=True)

    # If there is specified a manifest directory in the .ini file we
    # verify that it exists (or fail hard). If no such directory
    # is defined, we default to using $XDG_CACHE_DIR and create a new
    # 'misp_maifest' directory there.
    if not os.path.isdir(manifest_dir):
        print("Could not open manifest directory:", manifest_dir)
        sys.exit(1)

    # Check that the misp_feeds.txt file actually exists. If not 'touch'
    # the file to make sure there is at least some default config present.
    feed_file = os.path.join(manifest_dir, 'misp_feeds.txt')
    if not os.path.isfile(feed_file):
        with open(feed_file, 'w') as feed_h:
            feed_h.write("https://www.circl.lu/doc/misp/feed-osint/")
Пример #8
0
def get_last_update() -> int:
    "Get last update from disk (~/.cache/<worker_name>/last_update)"
    cache_filename: Text = os.path.join(
        caep.get_cache_dir(WORKER_NAME, create=True), "last_update"
    )

    if os.path.isfile(cache_filename):
        # Read last_update from last recorded succsfully recieved event
        with open(cache_filename) as f:
            last_update = int(f.read().strip())
            debug("last update starting at {}".format(last_update))
    else:
        # last_update not specified, set to now-1w
        last_update = int((time.time() - 3600 * 24 * 7) * 1000)
        info("last update not specified, autoconfigured as {}".format(last_update))

    return last_update
Пример #9
0
def parse_args() -> argparse.Namespace:
    """Helper setting up the argsparse configuration"""

    arg_parser = act.scio.config.parse_args("Scio API")
    arg_parser.add_argument("--port",
                            type=int,
                            default=3000,
                            help="API port to listen on (default=3000)")
    arg_parser.add_argument(
        "--max-jobs",
        type=int,
        default=10,
        help="Max jobs in queue before submit responds " +
        "with backpressure (429)",
    )
    arg_parser.add_argument(
        "--reload",
        action="store_true",
        help="Reload web server on file change (dev mode)",
    )
    arg_parser.add_argument(
        "--document-path",
        default=caep.get_cache_dir("scio/documents"),
        help=f"Storage path for documents = {XDG_CACHE}/scio/documents",
    )
    arg_parser.add_argument(
        "--host",
        dest="host",
        default="127.0.0.1",
        help="Host interface (default=127.0.0.1)",
    )

    args = caep.config.handle_args(arg_parser, "scio/etc", "scio.ini", "api")

    if not os.path.isdir(args.document_path):
        os.makedirs(args.document_path)
        logging.info("Created directory: %s", args.document_path)

    args.beanstalk_client = act.scio.config.beanstalk_client(args,
                                                             use="scio_doc")
    args.elasticsearch_client = act.scio.config.elasticsearch_client(args)

    return args  # type: ignore
Пример #10
0
import time
import traceback
from ipaddress import AddressValueError, IPv4Address
from logging import debug, error, info, warning
from typing import Dict, Generator, List, Text, Tuple, Union

import act.api
import caep
import RashlyOutlaid.api as shadowserver
from act.api.helpers import handle_fact
from act.api.libs import cli
from RashlyOutlaid.libwhois import QueryError

from act.workers.libs import worker

CACHE_DIR = caep.get_cache_dir("shadowserver-asn-worker", create=True)
ISO_3166_FILE = (
    "https://raw.githubusercontent.com/lukes/"
    + "ISO-3166-Countries-with-Regional-Codes/master/all/all.json"
)

# Blacklists of IPs record values
# If value matches blacklist it should not be used
BLACKLIST = {
    "ip": [  # Blacklist IP addresses. Values is IP
        lambda ip: not ip.strip(),  # Empty values
        lambda ip: ip.strip()
        .lstrip("0")
        .startswith("."),  # IP addreses starting with "0."
        lambda ip: ip == "255.255.255.255",  # broadcast
        lambda ip: IPv4Address(ip).is_multicast,
Пример #11
0
def get_last_run_filename(feed_cache: Path, feed_uri: Text) -> Path:
    """Use hash of feed as cache file, so you can run multiple processes with feed"""
    if str(feed_cache) == caep.get_cache_dir("act_feed_cache"):
        caep.get_cache_dir("act_feed_cache", create=True)

    return feed_cache / Path(hashlib.sha256(feed_uri.encode()).hexdigest())