Пример #1
0
def valgendp(cdmrectype, cdmrecval):
    """Generate a single value for a DARPA CDM/Cadets E2 record.

	Currently, only type information is used.

	Arguments:
	cdmrectype - CDM record type
	cdmrecval - CDM record

	Return:
	a single integer value of the record
	"""
    val = list()

    if cdmrectype == CDM_TYPE_SOCK or \
     cdmrectype == CD2_TYPE_SOCK:
        val.append('NET_FLOW_OBJECT')
    elif cdmrectype == CDM_TYPE_PIPE or \
     cdmrectype == CD2_TYPE_PIPE:
        val.append('UNNAMED_PIPE_OBJECT')
    elif cdmrectype == CDM_TYPE_MEMORY or \
     cdmrectype == CD2_TYPE_MEMORY:
        val.append('MEMORY_OBJECT')
    elif cdmrectype == CDM_TYPE_HOST or \
     cdmrectype == CD2_TYPE_HOST:
        val.append(cdmrecval['hostType'])
    else:
        val.append(cdmrecval['type'])

    return hashgen(val)
Пример #2
0
def valgencf(cfrecval):
    """Generate a single value for a CamFlow record.

	Currently, only type information is used.

	Arguments:
	cfrecval - CamFlow record

	Return:
	a single integer value of the record
	"""
    val = list()
    val.append(cfrecval["prov:type"])
    return hashgen(val)
Пример #3
0
def gencd(parser, i, dbs, out):
    """Generate CADETS2/FiveDirections outputs using a list of databases.

	Arguments:
	parser - ijson parser that feeds JSON objects
	i - the start index of the database list
	dbs - a list of database
	out - output file object
	"""
    logging.basicConfig(filename='error.log', level=logging.DEBUG)

    description = '\x1b[6;30;43m[i]\x1b[0m Progress of Generating Output from File \x1b[6;30;42m{}\x1b[0m'.format(
        i)
    pb = tqdm.tqdm(desc=description, mininterval=5.0, unit="recs", position=i)
    for cdmrec in parser:
        pb.update()
        cdmrectype = cdmrec['datum'].keys()[0]
        cdmrecval = cdmrec['datum'][cdmrectype]

        if cdmrectype == CD2_TYPE_EVENT:
            if 'type' not in cdmrecval:
                logging.debug('CD2_TYPE_EVENT: type is missing. Event UUID: ' +
                              repr(cdmrecval['uuid']))
                continue
            else:
                edgetype = valgendp(cdmrectype, cdmrecval)

            if 'timestampNanos' not in cdmrecval:
                logging.debug(
                    'CD2_TYPE_EVENT: timestamp is missing. Event UUID: ' +
                    repr(cdmrecval['uuid']))
                continue
            else:
                timestamp = cdmrecval['timestampNanos']

            srcUUID, dstUUID, bidirection = processevent(cdmrecval, 'cadets2')

            if srcUUID == None or dstUUID == None:
                continue

            srcVal = getfromdb(dbs, i, srcUUID)
            if srcVal == None:
                logging.error('An unmatched srcUUID from edge (' +
                              repr(cdmrecval['uuid']) + ') of type: ' +
                              cdmrecval['type'])
                continue

            dstVal = getfromdb(dbs, i, dstUUID)
            if dstVal == None:
                logging.error('An unmatched dstUUID from edge (' +
                              repr(cdmrecval['uuid']) + ') of type: ' +
                              cdmrecval['type'])
                continue

            out.write(str(hashgen([srcUUID.encode('utf-8')])) + '\t' \
              + str(hashgen([dstUUID.encode('utf-8')])) + '\t' \
              + str(srcVal) + ':' + str(dstVal) \
              + ':' + str(edgetype) \
              + ':' + str(timestamp) + '\t' + '\n')

            if bidirection:
                out.write(str(hashgen([dstUUID.encode('utf-8')])) + '\t' \
                 + str(hashgen([srcUUID.encode('utf-8')])) + '\t' \
                 + str(dstVal) + ':' + str(srcVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')
        else:
            pass
    pb.close()
    return
Пример #4
0
def gencf(parser, i, dbs, out):
    """Generate CamFlow outputs using a list of databases.

	Arguments:
	parser - ijson parser that feeds JSON objects
	i - the start index of the database list
	dbs - a list of database
	out - output file object
	"""
    logging.basicConfig(filename='error.log', level=logging.DEBUG)

    description = '\x1b[6;30;43m[i]\x1b[0m Progress of Generating Output from File \x1b[6;30;42m{}\x1b[0m'.format(
        i)
    pb = tqdm.tqdm(desc=description, mininterval=5.0, unit="recs", position=i)

    # for camflow, each file is independent
    db = dbs[i]

    for cfrec in parser:
        pb.update()

        if "used" in cfrec:
            used = cfrec["used"]
            for uid in used:
                if "prov:type" not in used[uid]:
                    logging.debug("Edge (used) record without type. UUID: %s",
                                  uid)
                    continue
                else:
                    edgetype = valgencf(used[uid])

                if "cf:id" not in used[uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (used) record without timestamp. UUID: %s", uid)
                    continue
                else:
                    timestamp = used[uid]["cf:id"]

                if "prov:entity" not in used[uid]:
                    logging.debug(
                        "Edge (used/{}) record without srcUUID. UUID: {}".
                        format(used[uid]["prov:type"], uid))
                    continue

                if "prov:activity" not in used[uid]:
                    logging.debug(
                        "Edge (used/{}) record without dstUUID. UUID: {}".
                        format(used[uid]["prov:type"], uid))
                    continue

                srcUUID = used[uid]["prov:entity"]
                dstUUID = used[uid]["prov:activity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (used/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(used[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (used/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(used[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasGeneratedBy" in cfrec:
            wasGeneratedBy = cfrec["wasGeneratedBy"]
            for uid in wasGeneratedBy:
                if "prov:type" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasGeneratedBy[uid])

                if "cf:id" not in wasGeneratedBy[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasGeneratedBy) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasGeneratedBy[uid]["cf:id"]

                if "prov:entity" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record without srcUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                if "prov:activity" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record without dstUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                srcUUID = wasGeneratedBy[uid]["prov:activity"]
                dstUUID = wasGeneratedBy[uid]["prov:entity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasInformedBy" in cfrec:
            wasInformedBy = cfrec["wasInformedBy"]
            for uid in wasInformedBy:
                if "prov:type" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasInformedBy[uid])

                if "cf:id" not in wasInformedBy[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasInformedBy) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasInformedBy[uid]["cf:id"]

                if "prov:informant" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record without srcUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                if "prov:informed" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record without dstUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                srcUUID = wasInformedBy[uid]["prov:informant"]
                dstUUID = wasInformedBy[uid]["prov:informed"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasDerivedFrom" in cfrec:
            wasDerivedFrom = cfrec["wasDerivedFrom"]
            for uid in wasDerivedFrom:
                if "prov:type" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasDerivedFrom[uid])

                if "cf:id" not in wasDerivedFrom[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasDerivedFrom) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasDerivedFrom[uid]["cf:id"]

                if "prov:usedEntity" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record without srcUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                if "prov:generatedEntity" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record without dstUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                srcUUID = wasDerivedFrom[uid]["prov:usedEntity"]
                dstUUID = wasDerivedFrom[uid]["prov:generatedEntity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

    pb.close()
    return
Пример #5
0
def cgencd(parser, db, out):
    """Generate Cadets E2/FiveDirections outputs from compressed/single file.

	Note: Only one subset of FiveDirections dataset uses this function. Other subsets use regular DARPA functions.

	Arguments:
	parser - ijson parser that feeds JSON objects
	db - database
	out - output file object
	"""
    logging.basicConfig(filename='error.log', level=logging.DEBUG)

    description = '\x1b[6;30;43m[i]\x1b[0m Progress of Generating Output'
    pb = tqdm.tqdm(desc=description, mininterval=5.0, unit="recs")

    for cdmrec in parser:
        pb.update()
        cdmrectype = cdmrec['datum'].keys()[0]
        cdmrecval = cdmrec['datum'][cdmrectype]

        if cdmrectype == CD2_TYPE_EVENT:
            if 'type' not in cdmrecval:
                logging.debug('CD2_TYPE_EVENT: type is missing. Event UUID: ' +
                              repr(cdmrecval['uuid']))
                continue
            else:
                edgetype = valgendp(cdmrectype, cdmrecval)

            if 'timestampNanos' not in cdmrecval:
                logging.debug(
                    'CD2_TYPE_EVENT: timestamp is missing. Event UUID: ' +
                    repr(cdmrecval['uuid']))
                continue
            else:
                timestamp = cdmrecval['timestampNanos']

            srcUUID, dstUUID, bidirection = processevent(cdmrecval, 'cadets2')

            if srcUUID == None or dstUUID == None:
                continue

            srcVal = db.get(srcUUID.encode('utf-8'))
            if srcVal == None:
                logging.error('An unmatched srcUUID from edge (' +
                              repr(cdmrecval['uuid']) + ') of type: ' +
                              cdmrecval['type'])
                continue

            dstVal = db.get(dstUUID.encode('utf-8'))
            if dstVal == None:
                logging.error('An unmatched dstUUID from edge (' +
                              repr(cdmrecval['uuid']) + ') of type: ' +
                              cdmrecval['type'])
                continue

            out.write(str(hashgen([srcUUID.encode('utf-8')])) + '\t' \
              + str(hashgen([dstUUID.encode('utf-8')])) + '\t' \
              + str(srcVal) + ':' + str(dstVal) \
              + ':' + str(edgetype) \
              + ':' + str(timestamp) + '\t' + '\n')

            if bidirection:
                out.write(str(hashgen([dstUUID.encode('utf-8')])) + '\t' \
                 + str(hashgen([srcUUID.encode('utf-8')])) + '\t' \
                 + str(dstVal) + ':' + str(srcVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')
        else:
            pass
    return
Пример #6
0
def cgencf(parser, db, out):
    """Generate CamFlow outputs from compressed/single file.
	"""
    logging.basicConfig(filename='error.log', level=logging.DEBUG)

    description = '\x1b[6;30;43m[i]\x1b[0m Progress of Generating Output'
    pb = tqdm.tqdm(desc=description, mininterval=5.0, unit="recs")

    for cfrec in parser:
        pb.update()

        if "used" in cfrec:
            used = cfrec["used"]
            for uid in used:
                if "prov:type" not in used[uid]:
                    logging.debug("Edge (used) record without type. UUID: %s",
                                  uid)
                    continue
                else:
                    edgetype = valgencf(used[uid])

                if "cf:id" not in used[uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (used) record without timestamp. UUID: %s", uid)
                    continue
                else:
                    timestamp = used[uid]["cf:id"]

                if "prov:entity" not in used[uid]:
                    logging.debug(
                        "Edge (used/{}) record without srcUUID. UUID: {}".
                        format(used[uid]["prov:type"], uid))
                    continue

                if "prov:activity" not in used[uid]:
                    logging.debug(
                        "Edge (used/{}) record without dstUUID. UUID: {}".
                        format(used[uid]["prov:type"], uid))
                    continue

                srcUUID = used[uid]["prov:entity"]
                dstUUID = used[uid]["prov:activity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (used/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(used[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (used/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(used[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasGeneratedBy" in cfrec:
            wasGeneratedBy = cfrec["wasGeneratedBy"]
            for uid in wasGeneratedBy:
                if "prov:type" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasGeneratedBy[uid])

                if "cf:id" not in wasGeneratedBy[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasGeneratedBy) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasGeneratedBy[uid]["cf:id"]

                if "prov:entity" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record without srcUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                if "prov:activity" not in wasGeneratedBy[uid]:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record without dstUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                srcUUID = wasGeneratedBy[uid]["prov:activity"]
                dstUUID = wasGeneratedBy[uid]["prov:entity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasGeneratedBy/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasGeneratedBy[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasInformedBy" in cfrec:
            wasInformedBy = cfrec["wasInformedBy"]
            for uid in wasInformedBy:
                if "prov:type" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasInformedBy[uid])

                if "cf:id" not in wasInformedBy[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasInformedBy) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasInformedBy[uid]["cf:id"]

                if "prov:informant" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record without srcUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                if "prov:informed" not in wasInformedBy[uid]:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record without dstUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                srcUUID = wasInformedBy[uid]["prov:informant"]
                dstUUID = wasInformedBy[uid]["prov:informed"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasInformedBy/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasInformedBy[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

        if "wasDerivedFrom" in cfrec:
            wasDerivedFrom = cfrec["wasDerivedFrom"]
            for uid in wasDerivedFrom:
                if "prov:type" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom) record without type. UUID: %s",
                        uid)
                    continue
                else:
                    edgetype = valgencf(wasDerivedFrom[uid])

                if "cf:id" not in wasDerivedFrom[
                        uid]:  # Can be used as timestamp
                    logging.debug(
                        "Edge (wasDerivedFrom) record without timestamp. UUID: %s",
                        uid)
                    continue
                else:
                    timestamp = wasDerivedFrom[uid]["cf:id"]

                if "prov:usedEntity" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record without srcUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                if "prov:generatedEntity" not in wasDerivedFrom[uid]:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record without dstUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                srcUUID = wasDerivedFrom[uid]["prov:usedEntity"]
                dstUUID = wasDerivedFrom[uid]["prov:generatedEntity"]

                srcVal = db.get(srcUUID)
                if srcVal == None:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record with an unmatched srcUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                dstVal = db.get(dstUUID)
                if dstVal == None:
                    logging.debug(
                        "Edge (wasDerivedFrom/{}) record with an unmatched dstUUID. UUID: {}"
                        .format(wasDerivedFrom[uid]["prov:type"], uid))
                    continue

                out.write(str(hashgen([srcUUID])) + '\t' \
                 + str(hashgen([dstUUID])) + '\t' \
                 + str(srcVal) + ':' + str(dstVal) \
                 + ':' + str(edgetype) \
                 + ':' + str(timestamp) + '\t' + '\n')

    pb.close()
    return