Exemplo n.º 1
0
    def __init__(self, config, cache):
        """
		Create CIPRES gateway data source object

		:param config: AmassConfig object containing configuration information
		"""
        amass.features.gateway.Gateway.__init__(self, config, cache)
        self.name = "gateway.cipres"
        self.db_conn = None
        try:
            self.logger.info('Reading db params for CIPRES database')
            self.db = config.get_db_vars(self.name)
        except Exception as e:
            amass.abort(
                'Problem reading db params from "%s" section in config file: %s'
                % (self.name, str(e)))
        try:
            self.resources_as_string = config.get_resources(self.name, True)
            self.resources = config.get_resources(self.name)
        except:
            amass.abort(
                'Problem finding "resources" in "%s" section in config file')
        # TODO get rid of pickle file
        self.tools = pickle.load(open("data/tool_ids.pkl", "rb"))
        self.logger.debug("Read %d tools from file" % len(self.tools))
Exemplo n.º 2
0
    def __init__(self, config):
        """
		Constructor for FeatureCache.

		:param config: AmassConfig object containing config info for local
		SQL db cache.

		:return: A new FeatureCache object.
		"""
        self.cache = {}
        self.config = config
        self.db_conn = None
        self.logger = logging.getLogger(self.__module__)
        self.cache_prefix = "amass"
        self.name = "cache"
        try:
            self.logger.debug('Reading db params for cache')
            self.db = config.get_db_vars(self.name)
        except Exception as e:
            amass.abort('Problem reading "cache" section in config file: %s' %
                        (str(e)))
        try:
            self.logger.debug('Reading resource params for cache')
            self.resources = config.get_resources(self.name)
        except Exception as e:
            amass.abort('Problem reading "cache" section in config file: %s' %
                        (str(e)))
        self.load()
Exemplo n.º 3
0
    def generate_features(self, features_name, gateway_name, refresh=False):
        """
		Generate the features from all configured feature sources.

		:return:
		"""
        cache_name = self.cache.features_to_table_name(features_name,
                                                       gateway_name)
        gw = self.sources["gateway"]
        if self.cache.has_features(features_name, gateway_name):
            if refresh:
                self.cache.drop_features(features_name, gateway_name)
            else:
                amass.abort(
                    "Cached features already exists; " +
                    "re-run with refresh if you want to re-generate them")
        self.cache.create_features(features_name, gateway_name, self.columns,
                                   self.primary_keys)
        db_cursor = gw.query_jobs()
        self.logger.info("Found %i gateway jobs" % db_cursor.rowcount)

        for job_info in db_cursor:
            ordered_features = self.generate_feature(features_name, job_info)
            entry = {
                "GATEWAY_JOB_ID": gw.get_job_field(job_info, "JOB_ID"),
                "FEATURES": json.dumps(ordered_features)
            }
            self.cache.insert(cache_name, entry)

        return True
Exemplo n.º 4
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        server_model_names = ["SourceConfig", "Resource", "SourceResource"]
        server_models = self.get_django_models(config, *server_model_names)
        server_config = {}
        for i, model in enumerate(server_models):
            server_config[server_model_names[i]] = model

        startDate = "\'2016-03-01 03:00:00\'"
        endDate = "\'2016-06-01 4:00:00\'"

        features = amass.features.Features(config, server_config)
        features.cache_init()
        refresh_features = self.is_arg_true(arg_vals["refresh-features"])
        refresh_sources = []
        if arg_vals["refresh-sources"] != "":
            refresh_sources = re.split("\s*,\s*", arg_vals["refresh-sources"])
        if not features.load_or_fetch_sources(
                arg_vals["features"], arg_vals["gateway"],
                arg_vals["startdate"], arg_vals["enddate"], refresh_sources):
            amass.abort("Unable to fetch source data for features %s" %
                        arg_vals["features"])
        if arg_vals["jobid"] == "":
            features.generate_features(arg_vals["features"],
                                       arg_vals["gateway"], refresh_features)
        else:
            gateway = features.get_gateway(arg_vals["gateway"])
            job = gateway.query_job(arg_vals["jobid"])
            print features.generate_feature(arg_vals["features"], job)
Exemplo n.º 5
0
	def run(self, config, args):
		arg_vals = self.parse_args(args)
		[source_gateway_type] = self.get_django_models(config, "SourceGatewayType")
		try:
			source_gateway_type.objects.create(type=arg_vals["type"])
			print "Gateway type '%s' sucessfully added" % arg_vals["type"]
		except:
			amass.abort("Problem creating gatewa type '%s'" % arg_vals["type"])
Exemplo n.º 6
0
	def run(self, config, args):
		arg_vals = self.parse_args(args)
		[source_config] = self.get_django_models(config, "SourceConfig")

		try:
			source_config.objects.create(source=arg_vals["source"], name=arg_vals["name"], value=arg_vals["value"])
			print "Config parameter '%s' sucessfully added for source '%s'" % (arg_vals["name"], arg_vals["source"])
		except Exception as e:
			amass.abort("Problem adding config '%s' for source '%s': %s" % (arg_vals["name"], arg_vals["source"], str(e)))
Exemplo n.º 7
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        [resource] = self.get_django_models(config, "Resource")

        try:
            resource.objects.create(name=arg_vals["resource"], )
            print "Resource '%s' sucessfully added" % arg_vals["resource"]
        except Exception as e:
            amass.abort("Problem adding resource '%s': %s" %
                        (arg_vals["resource"], str(e)))
Exemplo n.º 8
0
	def run(self, config, args):
		arg_vals = self.parse_args(args)
		[source_config] = self.get_django_models(config, "SourceConfig")

		try:
			cfg = source_config.objects.get(source=arg_vals["source"], name=arg_vals["name"])
			cfg.delete()
			print "Configuration parameter '%s' for source '%s' sucessfully deleted" % (arg_vals["name"], arg_vals["source"])
		except Exception as e:
			amass.abort("Problem deleting configuration parameter '%s' for source '%s': %s" % (arg_vals["name"], arg_vals["source"], str(e)))
Exemplo n.º 9
0
	def run(self, config, args):
		arg_vals = self.parse_args(args)
		[source_gateway_type, source_gateway_error] = self.get_django_models(config, "SourceGatewayType", "SourceGatewayError")
		gw = None
		try:
			gw_type = source_gateway_type.objects.get(type=arg_vals["gateway type"])
		except:
			amass.abort("'%s' is not a known gateway type" % arg_vals["gateway type"])

		try:
			source_gateway_error.objects.create(gateway_type=gw_type, error=arg_vals["error"], regex=arg_vals["regex"])
			print "Error type '%s' sucessfully added for gateway '%s'" % (arg_vals["error"], arg_vals["gateway type"])
		except Exception as e:
			amass.abort("Problem adding gateway error '%s' for gateway '%s': %s" % (arg_vals["error"], arg_vals["gateway type"], str(e)))
Exemplo n.º 10
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        [source_gateway] = self.get_django_models(config, "SourceGateway")
        gw = None
        try:
            gw = source_gateway.objects.get(name=arg_vals["gateway"])
        except:
            amass.abort("Gateway '%s' does not exist" % arg_vals["gateway"])

        try:
            gw.delete()
            print "Gateway '%s' sucessfully deleted" % arg_vals["gateway"]
        except Exception as e:
            amass.abort("Problem deleting gateway '%s': %s" %
                        (arg_vals["gateway"], str(e)))
Exemplo n.º 11
0
    def load_or_fetch_sources(self,
                              features_def,
                              gw_name,
                              start_date,
                              end_date,
                              refreshes=[]):
        """
		Load or fetch data from available sources to satisfy features in
		specified feature definition

		:param features_def: A string containing the name of a feature def
		:param gw_name: A string containing the name of the gateway
		:param start_date: A datetime object indicating the start date to fetch
		data if needed
		:param end_date: A datetime object indicating the end date to fetch
		data if needed
		:param refreshes: A string containing the name of any sources to refresh
		if already cached

		:return: True if sources were successfully loaded; otherwise False
		"""
        if features_def not in self.features:
            self.logger.error("Unable to find feature set %s" % features_def)
        self.logger.info("Adding feature sources")
        for feature in self.features[features_def]:
            feature_source, feature_name = feature.split(".")
            if feature_source not in self.sources:
                self.sources[feature_source] = self.get_feature_source(
                    feature_source, gw_name)
            self.sources[feature_source].add_feature(feature_name)

        for source_name, source in self.sources.items():
            self.logger.debug("Looking at source %s in '%s'" %
                              (source_name, ", ".join(refreshes)))
            self.logger.debug("%s" % self.cache.has_source(source_name))
            self.logger.debug("%s" % source_name in refreshes)
            if self.cache.has_source(source_name) and source_name in refreshes:
                self.logger.info("Refreshing feature source %s" % source_name)
                if self.cache.drop_source(source_name):
                    self.logger.info("Dropped feature source table %s" %
                                     source)
                else:
                    amass.abort("Unable to drop feature source %s" % source)
            source.cache_history(start_date, end_date)

        return True
Exemplo n.º 12
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        [source_gateway,
         source_gateway_type] = self.get_django_models(config, "SourceGateway",
                                                       "SourceGatewayType")
        gw_type = None
        try:
            gw_type = source_gateway_type.objects.get(type=arg_vals["type"])
        except:
            amass.abort("'%s' is not a known gateway type" % arg_vals["type"])

        try:
            source_gateway.objects.create(name=arg_vals["gateway"],
                                          type=gw_type)
            print "Gateway '%s' sucessfully added" % arg_vals["gateway"]
        except Exception as e:
            amass.abort("Problem adding gateway '%s': %s" %
                        (arg_vals["gateway"], str(e)))
Exemplo n.º 13
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        self.logger.info("Running prediction")

        features = amass.features.Features(config)

        filter_errors = None
        if arg_vals["filtererrors"] != "":
            filter_errors = re.split("\s*,\s*", arg_vals["filtererrors"])

        features, results = features.get_features_results(
            arg_vals["features"], arg_vals["gateway"], filter_errors)
        if len(results) < 1:
            amass.abort("Unable to find matching features to run prediction")
        split_percentage = float(arg_vals["split"])
        split_index = int(split_percentage * len(features))
        train_features = features[:split_index]
        test_features = features[split_index:]
        train_results = results[:split_index]
        test_results = results[split_index:]
        predict = amass.predict.Prediction()
        print predict.train(train_features, train_results)
Exemplo n.º 14
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        [resource,
         source_resource] = self.get_django_models(config, "Resource",
                                                   "SourceResource")
        amass_resource = None
        try:
            amass_resource = resource.objects.get(name=arg_vals["resource"])
        except Exception as e:
            amass.abort("'%s' is not a known AMASS resource" %
                        arg_vals["resource"])

        try:
            source_resource.objects.create(
                resource=amass_resource,
                source=arg_vals["source"],
                source_name=arg_vals["source resource"])
            print "Successfully added source resource '%s' for source '%s'" % (
                arg_vals["source resource"], arg_vals["source"])
        except Exception as e:
            amass.abort("Problem adding source resource '%s': %s" %
                        (arg_vals["resource"], str(e)))
Exemplo n.º 15
0
    def run(self, config, args):
        arg_vals = self.parse_args(args)
        [source_gateway_type, source_gateway_error
         ] = self.get_django_models(config, "SourceGatewayType",
                                    "SourceGatewayError")
        gw_type = None
        try:
            gw_type = source_gateway_type.objects.get(
                type=arg_vals["gateway type"])
        except:
            amass.abort("Gateway type '%s' does not exist" %
                        arg_vals["gateway type"])

        try:
            e = source_gateway_error.objects.get(gateway_type=gw_type,
                                                 error=arg_vals["error"])
            e.delete()
            print "Gateway error type '%s' for gateway '%s' sucessfully deleted" % (
                arg_vals["error"], arg_vals["gateway type"])
        except Exception as e:
            amass.abort(
                "Problem deleting gateway error type '%s' for gateway '%s': %s"
                % (arg_vals["error"], arg_vals["gateway type"], str(e)))