Exemplo n.º 1
0
def _vector_table(info, entry_detail):
    vector_version = info.get('vector_version', '')
    vector_info = info.get('vector_info', {})
    exists_str = _exists_str(info, entry_detail, data.input_mat_exists)
    if entry_detail == 'short':
        return _sub_table([('Vector Version', 'header',
                            vector_version + exists_str)])
    elif entry_detail == 'long':
        bcp, vector_id = config.split(vector_version)
        table_data = [('Vector', 'header', exists_str),
                      ('Version', 'std', vector_version)]
        if bcp == 'B':
            table_data += [('TF', 'std', vector_info.get('tf')),
                           ('IDF', 'std', vector_info.get('idf'))]
        elif bcp == 'C':
            vector_cbow_type = vector_info.get('type')
            table_data += [('Type', 'std', vector_cbow_type)]
            if vector_cbow_type == 'TfIdf':
                table_data += [('TF', 'std', vector_info.get('tf')),
                               ('IDF', 'std', vector_info.get('idf'))]
        elif bcp == 'P':
            embedding_info = info.get('embedding_info', {})
            table_data += [('Name', 'std', embedding_info.get('name')),
                           ('Model', 'std', embedding_info.get('model'))]
        return _sub_table(table_data)
Exemplo n.º 2
0
 def update_info(vector_version):
     bcp, id = config.split(vector_version)
     info['vector_version'] = vector_version
     if bcp == 'B' or bcp == 'C':
         info['vector_info'] = config.vectorizer[bcp][id]
     elif bcp == 'P':
         info['embedding_info'] = config.embeddings['P'][id]
Exemplo n.º 3
0
def data_token_vocab_vector_base(info):
    bcp = config.split(info['vector_version'])[0]
    if bcp == 'B' or bcp == 'C':
        return '{}_Tok{}_Voc{}_Vec{}'.format(info['data_name'],
                                             info['token_version'],
                                             info['vocab_version'],
                                             info['vector_version'])
    elif bcp == 'P':
        return '{}_Vec{}'.format(info['data_name'], info['vector_version'])
Exemplo n.º 4
0
def get_tokenizer_func(token_version):
    bcp, id = config.split(token_version)
    info = {'token_version': token_version}
    if bcp == 'B':
        info['token_info'] = config.tokenizer['B'][id]
    elif bcp == 'C':
        info['embedding_name'] = id
        info['embedding_info'] = config.embeddings['C'][id]
        info['token_info'] = config.embeddings['C'][id]['token_info']
    return get_tokenizer(info)
Exemplo n.º 5
0
 def update_info(token_version):
     bcp, id = config.split(token_version)
     info['token_version'] = token_version
     if bcp == 'B':
         info['token_info'] = config.tokenizer['B'][id]
         info.pop('embedding_name', None)
         info.pop('embedding_info', None)
     elif bcp == 'C':
         info['token_info'] = config.embeddings['C'][id]['token_info']
         info['embedding_name'] = id
         info['embedding_info'] = config.embeddings['C'][id]
Exemplo n.º 6
0
    def _load_train_results_from_folder(self, root):

        f_config = glob.glob(
            os.path.join(root, '*' + BackboneTrainResult.config_ext))
        f_model = glob.glob(
            os.path.join(root, '*' + BackboneTrainResult.model_ext))
        f_log = glob.glob(os.path.join(root,
                                       '*' + BackboneTrainResult.log_ext))

        f_config.sort(), f_model.sort(), f_log.sort()

        ret = []

        for config, model, log in zip(f_config, f_model, f_log):
            assert len(
                set((config.split('.')[0], model.split('.')[0],
                     log.split('.')[0]))) == 1
            base = config.split('.')[0]
            ret.append(BackboneTrainResult(base))

        return ret
Exemplo n.º 7
0
 def token_changed():
     bcp, id = config.split(token_selector.value)
     if bcp == 'B' and not vocab_selector_c.disabled:
         vocab_selector_b.disabled = False
         vocab_selector_c.disabled = True
         val = vocab_selector_b.value
         vocab_selector_b.value = None
         vocab_selector_b.value = val
     elif bcp == 'C' and not vocab_selector_b.disabled:
         vocab_selector_b.disabled = True
         vocab_selector_c.disabled = False
         val = vocab_selector_c.value
         vocab_selector_c.value = None
         vocab_selector_c.value = val
     callback()
Exemplo n.º 8
0
def load_ground_truth_classes(info):
    if config.split(info['vector_version'])[0] == 'P':
        with data.document_reader(info) as documents:
            return [document['class_id'] for document in documents]

    mat_ids = data.load_mat_ids(info)
    ground_truth_classes = []
    with data.document_reader(info) as documents:
        for document in documents:
            if mat_ids[0] == document['id']:
                ground_truth_classes.append(document['class_id'])
                del mat_ids[0]
            if len(mat_ids) == 0:
                break
    return ground_truth_classes
Exemplo n.º 9
0
 def vector_changed():
     vector_bcp, vector_id = config.split(info['vector_version'])
     if vector_bcp == 'B':
         token_vocab_selector.layout.visibility = 'visible'
         token_selector.layout.visibility = 'hidden'
         vocab_selector.layout.visibility = 'hidden'
     elif vector_bcp == 'C':
         token_vocab_selector.layout.visibility = 'hidden'
         token_selector.layout.visibility = 'visible'
         vocab_selector.layout.visibility = 'visible'
     elif vector_bcp == 'P':
         token_vocab_selector.layout.visibility = 'hidden'
         token_selector.layout.visibility = 'hidden'
         vocab_selector.layout.visibility = 'hidden'
     update_output(False)
Exemplo n.º 10
0
def load_ground_truth_classes(info):
    if config.split(info['vector_version'])[0] == 'P':
        with data.document_reader(info) as documents:
            _labels_true = [document['class_id'] for document in documents]
    else:
        mat_ids = data.load_mat_ids(info)
        _labels_true = [0] * len(mat_ids)
        idx = 0
        with data.document_reader(info) as documents:
            for document in documents:
                if mat_ids[idx] == document['id']:
                    _labels_true[idx] = document['class_id']
                    idx = idx + 1
                if idx >= len(_labels_true):
                    break
    return _labels_true
Exemplo n.º 11
0
def run_vectorizer(info=None):
    nbprint('Vectorizer').push()
    global runvars

    if info is None:
        if config.vectorizer['run_B']:
            nbprint('BoW').push()
            runvars = {}
            iterate(['data', 'token:BC', 'vocab', 'vector:B'],
                    [count_mat, bow])
            nbprint.pop()

        if config.vectorizer['run_C']:
            nbprint('cBoW').push()
            runvars = {}
            iterate(['data', 'token:C', 'vocab', 'vector:C'],
                    [count_mat, cbow])
            nbprint.pop()

        if config.vectorizer['run_P']:
            nbprint('Phrase').push()
            runvars = {}
            iterate(['data', 'vector:P'], [phrase])
            nbprint.pop()
    else:
        runvars = {}
        vector_bcp, vector_id = config.split(info['vector_version'])
        if vector_bcp == 'B' or vector_bcp == 'C':
            count_mat(info)
            if vector_bcp == 'B':
                bow(info)
            else:
                cbow(info)
        else:
            phrase(info)

    runvars = None
    nbprint.pop()
Exemplo n.º 12
0
 def update_info(vocab_version):
     vocab_type, vocab_idx = config.split(vocab_version)
     info['vocab_version'] = vocab_version
     info['vocab_info'] = config.vocab[vocab_type][vocab_idx]
Exemplo n.º 13
0
 def _output_of(self, info):
     if config.split(info['vector_version'])[0] == 'B':
         return ['W','H']
Exemplo n.º 14
0
def load_input_mat(info):
    bcp = config.split(info["vector_version"])[0]
    if bcp == 'B':
        return sparse.load_npz(input_mat_filename(info))
    elif bcp == 'C' or bcp == 'P':
        return first_array_in(input_mat_filename(info))
Exemplo n.º 15
0
def iterate(what, callbacks, info={}, depth=1, print_iterates=True):
    global _required_model_outputs
    if not isinstance(what, list):
        what = [
            what,
        ]
    if not isinstance(callbacks, list):
        callbacks = [
            callbacks,
        ]
    callbacks = [None] * (len(what) - len(callbacks)) + callbacks

    # data
    # token[:BC]
    # vocab
    # vector[:BCP]
    # models[:W,H]
    # modelinputs
    # num_topics
    try:
        category, detail = what[0].split(':')
    except ValueError:
        category, detail = what[0], None

    if category == "data":
        for data_name, data_info in config.datasets.items():
            if data_info["run"]:
                new_data = {"data_name": data_name, "data_info": data_info}
                call_next(what, callbacks, data_info["name"], new_data, info,
                          depth, print_iterates)
    elif category == 'token':
        for token_version in config.token_version_list(detail or 'BCP'):
            bcp, id = config.split(token_version)
            if bcp == 'B':
                token_info = config.tokenizer['B'][id]
                if token_info["run"]:
                    new_data = {
                        'token_version': token_version,
                        'token_info': token_info
                    }
                    call_next(what, callbacks,
                              "Token {}".format(token_version), new_data, info,
                              depth, print_iterates)
            elif bcp == 'C':
                embedding_info = config.embeddings['C'][id]
                if embedding_info["run"]:
                    new_data = {
                        'token_version': token_version,
                        'token_info': embedding_info['token_info'],
                        'embedding_name': id,
                        'embedding_info': embedding_info
                    }
                    call_next(what, callbacks,
                              "Token {}".format(token_version), new_data, info,
                              depth, print_iterates)
    elif category == "vocab":
        if "token_version" not in info:
            print(
                "{}WARNING: Cannot iterate 'vocab' without knowing token version"
                .format("  " * depth))
            return
        bcp = config.split(info["token_version"])[0]
        for vocab_version in config.vocab_version_list(bcp):
            bcp, id = config.split(vocab_version)
            vocab_info = config.vocab[bcp][id]
            if vocab_info["run"]:
                new_data = {
                    "vocab_version": vocab_version,
                    "vocab_info": vocab_info
                }
                call_next(what, callbacks,
                          "Vocab {}".format(new_data["vocab_version"]),
                          new_data, info, depth, print_iterates)
    elif category == 'vector':
        for vector_version in config.vector_version_list(detail or 'BCP'):
            bcp, id = config.split(vector_version)
            if bcp == 'B' or bcp == 'C':
                vector_info = config.vectorizer[bcp][id]
                if vector_info["run"]:
                    new_data = {
                        "vector_version": vector_version,
                        "vector_info": vector_info
                    }
                    call_next(what, callbacks,
                              "Vector {}".format(new_data["vector_version"]),
                              new_data, info, depth, print_iterates)
            elif bcp == 'P':
                embedding_info = config.embeddings['P'][id]
                if embedding_info["run"]:
                    new_data = {
                        "vector_version": vector_version,
                        "embedding_info": embedding_info
                    }
                    call_next(what, callbacks,
                              "Vector {}".format(new_data["vector_version"]),
                              new_data, info, depth, print_iterates)
    elif category == "models":
        if detail is not None:
            detail = detail.split(',')
        _required_model_outputs = detail
        for model_name, model_info in config.models['list'].items():
            if model_info["run"]:
                model = import_cls('models', model_info['mod'],
                                   model_info['cls'])(model_info)
                new_data = {
                    'model_name': model_name,
                    'model_info': model_info,
                    'model': model
                }
                call_next(what, callbacks,
                          "Model {}".format(model_info["name"]), new_data,
                          info, depth, print_iterates)
    elif category == 'modelinputs':
        vector_bcps = info['model_info'].get('vector', 'BCP')
        original_callback = callbacks[0]
        callbacks[0] = lambda i: check_model_output(i, original_callback)
        if 'B' in vector_bcps:
            token_bcps = info['model_info'].get('token', 'BC')
            what_b = what.copy()
            what_b[1:1] = [
                'data', 'token:{}'.format(token_bcps), 'vocab', 'vector:B'
            ]
            callbacks_b = [None] * (len(what_b) - len(callbacks)) + callbacks
            call_next(what_b, callbacks_b, 'Model Input BoW', {}, info, depth,
                      print_iterates)
        if 'C' in vector_bcps:
            what_c = what.copy()
            what_c[1:1] = ['data', 'token:C', 'vocab', 'vector:C']
            callbacks_c = [None] * (len(what_c) - len(callbacks)) + callbacks
            call_next(what_c, callbacks_c, 'Model Input cBoW', {}, info, depth,
                      print_iterates)
        if 'P' in vector_bcps:
            what_p = what.copy()
            what_p[1:1] = ['data', 'vector:P']
            callbacks_p = [None] * (len(what_p) - len(callbacks)) + callbacks
            call_next(what_p, callbacks_p, 'Model Input Phrase', {}, info,
                      depth, print_iterates)
    elif category == "num_topics":
        if "data_info" in info:
            for num_topics in info["data_info"]["num_topics"]:
                num_topics = convert_num_topics(info, num_topics)
                new_data = {"num_topics": num_topics}
                call_next(what, callbacks, "Topics {}".format(num_topics),
                          new_data, info, depth, print_iterates)
        else:
            raise UtilException(
                'Cannot iterate "num_topics" without knowing data')
    elif category == "distiller":
        for distiller_name, distiller_info in config.distiller['list'].items():
            if distiller_info["run"]:
                distiller = import_cls('distiller', distiller_info['mod'],
                                       distiller_info['cls'])(distiller_info)
                new_data = {
                    'distiller_name': distiller_name,
                    'distiller_info': distiller_info,
                    'distiller': distiller
                }
                call_next(what, callbacks,
                          "Distiller {}".format(distiller_info["name"]),
                          new_data, info, depth, print_iterates)
    elif category == "distillerinputs":
        model_out = info['distiller_info']['model_out']
        original_callback = callbacks[0]
        for model_out_entry in model_out:
            what_version = what.copy()
            what_version[1:1] = [
                'models:{}'.format(model_out_entry), 'modelinputs',
                'num_topics'
            ]
            callbacks_version = [None] * (len(what_version) -
                                          len(callbacks)) + callbacks
            call_next(what_version, callbacks_version, 'Model Input BoW', {},
                      info, depth, print_iterates)
    else:
        print("{}WARNING: Cannot iterate '{}'".format("  " * depth, what[0]))
Exemplo n.º 16
0
    def import_from(self, infile, maxsize=10000000):
        errors = []
        results = []

        filetype = infile.content_type
        filename = infile.name
        raw = infile.read()

        # filelen = len(raw)
        # if filelen > maxsize:
        #     errors.append(_('Import too large, must be smaller than %i bytes.' % maxsize ))

        format = os.path.splitext(filename)[1]
        if format and format.startswith("."):
            format = format[1:]
        if not format:
            errors.append(_("Could not parse format from filename: %s") % filename)

        if format == "zip":
            zf = zipfile.ZipFile(StringIO(raw), "r")
            files = zf.namelist()
            image_dir = config_value("PRODUCT", "IMAGE_DIR")
            other_image_dir = None
            export_file = None
            if "VARS" in files:
                config = zf.read("VARS")
                lines = [line.split("=") for line in config.split("\n")]
                for key, val in lines:
                    if key == "PRODUCT.IMAGE_DIR":
                        other_image_dir = val
                    elif key == "EXPORT_FILE":
                        export_file = val

                if other_image_dir is None or export_file is None:
                    errors.append(_("Bad VARS file in import zipfile."))

                else:
                    # save out all the files which start with other_image_dr
                    rename = image_dir == other_image_dir
                    for f in files:
                        if f.startswith(other_image_dir):
                            buf = zf.read(f)
                            if rename:
                                f = f[len(other_image_dir) :]
                                if f[0] in ("/", "\\"):
                                    f = f[1:]
                                f = os.path.join(settings.MEDIA_ROOT, image_dir, f)
                            outf = open(f, "w")
                            outf.write(buf)
                            outf.close()
                            results.append("Imported image: %s" % f)

                    infile = zf.read(export_file)
                    zf.close()

                    format = os.path.splitext(export_file)[1]
                    if format and format.startswith("."):
                        format = format[1:]
                    if not format:
                        errors.append(_("Could not parse format from filename: %s") % filename)
                    else:
                        raw = infile

            else:
                errors.append(_("Missing VARS in import zipfile."))

        else:
            raw = StringIO(str(raw))

        if not format in serializers.get_serializer_formats():
            errors.append(_("Unknown file format: %s") % format)

        if not errors:

            from django.db import connection, transaction

            transaction.commit_unless_managed()
            transaction.enter_transaction_management()
            transaction.managed(True)

            try:

                ct = 0
                models = set()

                for obj in serializers.deserialize(format, raw):
                    obj.save()
                    models.add(obj.object.__class__)
                    ct += 1
                if ct > 0:
                    style = no_style()
                    sequence_sql = connection.ops.sequence_reset_sql(style, models)
                    if sequence_sql:
                        cursor = connection.cursor()
                        for line in sequence_sql:
                            cursor.execute(line)

                results.append(_("Added %(count)i objects from %(filename)s") % {"count": ct, "filename": filename})
                transaction.commit()
                # label_found = True
            except Exception, e:
                # fixture.close()
                errors.append(
                    _("Problem installing fixture '%(filename)s': %(error_msg)s\n")
                    % {"filename": filename, "error_msg": str(e)}
                )
                errors.append("Raw: %s" % raw)
                transaction.rollback()
                transaction.leave_transaction_management()
Exemplo n.º 17
0
 def _output_of(self, info):
     if config.split(info['vector_version'])[0] in ['C','P']:
         return ['H']
     return None
Exemplo n.º 18
0
    def import_from(self, infile, maxsize=10000000):
        errors = []
        results = []

        filename = infile.name
        raw = infile.read()

        format = os.path.splitext(filename)[1]
        if format and format.startswith('.'):
            format = format[1:]
        if not format:
            errors.append(
                _('Could not parse format from filename: %s') % filename)

        if format == 'zip':
            zf = zipfile.ZipFile(StringIO(raw), 'r')
            files = zf.namelist()
            image_dir = config_value('PRODUCT', 'IMAGE_DIR')
            other_image_dir = None
            export_file = None
            if 'VARS' in files:
                config = zf.read('VARS')
                lines = [line.split('=') for line in config.split('\n')]
                for key, val in lines:
                    if key == 'PRODUCT.IMAGE_DIR':
                        other_image_dir = val
                    elif key == 'EXPORT_FILE':
                        export_file = val

                if other_image_dir is None or export_file is None:
                    errors.append(_('Bad VARS file in import zipfile.'))

                else:
                    # save out all the files which start with other_image_dr
                    rename = image_dir == other_image_dir
                    for f in files:
                        if f.startswith(other_image_dir):
                            buf = zf.read(f)
                            if rename:
                                f = f[len(other_image_dir):]
                                if f[0] in ('/', '\\'):
                                    f = f[1:]
                                f = os.path.join(settings.MEDIA_ROOT,
                                                 image_dir, f)
                            outf = open(f, 'w')
                            outf.write(buf)
                            outf.close()
                            results.append('Imported image: %s' % f)

                    infile = zf.read(export_file)
                    zf.close()

                    format = os.path.splitext(export_file)[1]
                    if format and format.startswith('.'):
                        format = format[1:]
                    if not format:
                        errors.append(
                            _('Could not parse format from filename: %s') %
                            filename)
                    else:
                        raw = infile

            else:
                errors.append(_('Missing VARS in import zipfile.'))

        else:
            raw = StringIO(str(raw))

        if format not in serializers.get_serializer_formats():
            errors.append(_('Unknown file format: %s') % format)

        if not errors:

            with transaction.atomic():
                try:
                    ct = 0
                    models = set()

                    for obj in serializers.deserialize(format, raw):
                        obj.save()
                        models.add(obj.object.__class__)
                        ct += 1
                    if ct > 0:
                        style = no_style()
                        sequence_sql = connection.ops.sequence_reset_sql(
                            style, models)
                        if sequence_sql:
                            cursor = connection.cursor()
                            for line in sequence_sql:
                                cursor.execute(line)

                    results.append(
                        _('Added %(count)i objects from %(filename)s') % {
                            'count': ct,
                            'filename': filename
                        })

                except Exception, e:
                    errors.append(
                        _("Problem installing fixture '%(filename)s': %(error_msg)s\n"
                          ) % {
                              'filename': filename,
                              'error_msg': str(e)
                          })
                    errors.append("Raw: %s" % raw)
Exemplo n.º 19
0
def load_mat_ids(info):
    if config.split(info['vector_version'])[0] == 'P':
        meta = data.load_document_meta(info)
        return range(meta['num_documents'])
    else:
        return data.load_mat_ids(info)
Exemplo n.º 20
0
    # Initialize data
    rt_data = open('{0}/releventTech.cfg'.format(config.CONFIG_FILE_PATH),
                   'r').read().split()
    releventTech = []
    for i in rt_data:
        releventTech.append(i.split(','))

    configurations = []
    f = open('{0}/system_configs.cfg'.format(config.CONFIG_FILE_PATH), 'r')
    while True:
        config = f.readline().strip()
        if config == '':
            break
        else:
            configurations.append(config.split(","))
    f.close()

    attackers = attacker.getAttackers()

    cve_list = []
    if start < 2002 or end > 2017:
        showHelp()
        sys.exit()

    print 'Parsing NVD data and making CVE list . . .'
    cve_list = cve.getCVEList(start, end, releventTech)
    print 'DONE !'

    # For each attack, obtain defender and attacker rewards tables
    #----------------------------------------------------------------
Exemplo n.º 21
0
    def import_from(self, infile, maxsize=10000000):
        errors = []
        results = []

        filename = infile.name
        raw = infile.read()

        format = os.path.splitext(filename)[1]
        if format and format.startswith('.'):
            format = format[1:]
        if not format:
            errors.append(_('Could not parse format from filename: %s') % filename)

        if format == 'zip':
            zf = zipfile.ZipFile(StringIO(raw), 'r')
            files = zf.namelist()
            image_dir = config_value('PRODUCT', 'IMAGE_DIR')
            other_image_dir = None
            export_file = None
            if 'VARS' in files:
                config = zf.read('VARS')
                lines = [line.split('=') for line in config.split('\n')]
                for key, val in lines:
                    if key == 'PRODUCT.IMAGE_DIR':
                        other_image_dir = val
                    elif key == 'EXPORT_FILE':
                        export_file = val

                if other_image_dir is None or export_file is None:
                    errors.append(_('Bad VARS file in import zipfile.'))

                else:
                    # save out all the files which start with other_image_dr
                    rename = image_dir == other_image_dir
                    for f in files:
                        if f.startswith(other_image_dir):
                            buf = zf.read(f)
                            if rename:
                                f = f[len(other_image_dir):]
                                if f[0] in ('/', '\\'):
                                    f = f[1:]
                                f = os.path.join(settings.MEDIA_ROOT, image_dir, f)
                            outf = open(f, 'w')
                            outf.write(buf)
                            outf.close()
                            results.append('Imported image: %s' % f)

                    infile = zf.read(export_file)
                    zf.close()

                    format = os.path.splitext(export_file)[1]
                    if format and format.startswith('.'):
                        format = format[1:]
                    if not format:
                        errors.append(_('Could not parse format from filename: %s') % filename)
                    else:
                        raw = infile

            else:
                errors.append(_('Missing VARS in import zipfile.'))

        else:
            raw = StringIO(str(raw))

        if format not in serializers.get_serializer_formats():
            errors.append(_('Unknown file format: %s') % format)

        if not errors:

            with transaction.atomic():
                try:
                    ct = 0
                    models = set()

                    for obj in serializers.deserialize(format, raw):
                        obj.save()
                        models.add(obj.object.__class__)
                        ct += 1
                    if ct > 0:
                        style = no_style()
                        sequence_sql = connection.ops.sequence_reset_sql(style, models)
                        if sequence_sql:
                            cursor = connection.cursor()
                            for line in sequence_sql:
                                cursor.execute(line)

                    results.append(_('Added %(count)i objects from %(filename)s') % {'count': ct, 'filename': filename})

                except Exception, e:
                    errors.append(_("Problem installing fixture '%(filename)s': %(error_msg)s\n") % {'filename': filename, 'error_msg': str(e)})
                    errors.append("Raw: %s" % raw)