def _vector_table(info, entry_detail): vector_version = info.get('vector_version', '') vector_info = info.get('vector_info', {}) exists_str = _exists_str(info, entry_detail, data.input_mat_exists) if entry_detail == 'short': return _sub_table([('Vector Version', 'header', vector_version + exists_str)]) elif entry_detail == 'long': bcp, vector_id = config.split(vector_version) table_data = [('Vector', 'header', exists_str), ('Version', 'std', vector_version)] if bcp == 'B': table_data += [('TF', 'std', vector_info.get('tf')), ('IDF', 'std', vector_info.get('idf'))] elif bcp == 'C': vector_cbow_type = vector_info.get('type') table_data += [('Type', 'std', vector_cbow_type)] if vector_cbow_type == 'TfIdf': table_data += [('TF', 'std', vector_info.get('tf')), ('IDF', 'std', vector_info.get('idf'))] elif bcp == 'P': embedding_info = info.get('embedding_info', {}) table_data += [('Name', 'std', embedding_info.get('name')), ('Model', 'std', embedding_info.get('model'))] return _sub_table(table_data)
def update_info(vector_version): bcp, id = config.split(vector_version) info['vector_version'] = vector_version if bcp == 'B' or bcp == 'C': info['vector_info'] = config.vectorizer[bcp][id] elif bcp == 'P': info['embedding_info'] = config.embeddings['P'][id]
def data_token_vocab_vector_base(info): bcp = config.split(info['vector_version'])[0] if bcp == 'B' or bcp == 'C': return '{}_Tok{}_Voc{}_Vec{}'.format(info['data_name'], info['token_version'], info['vocab_version'], info['vector_version']) elif bcp == 'P': return '{}_Vec{}'.format(info['data_name'], info['vector_version'])
def get_tokenizer_func(token_version): bcp, id = config.split(token_version) info = {'token_version': token_version} if bcp == 'B': info['token_info'] = config.tokenizer['B'][id] elif bcp == 'C': info['embedding_name'] = id info['embedding_info'] = config.embeddings['C'][id] info['token_info'] = config.embeddings['C'][id]['token_info'] return get_tokenizer(info)
def update_info(token_version): bcp, id = config.split(token_version) info['token_version'] = token_version if bcp == 'B': info['token_info'] = config.tokenizer['B'][id] info.pop('embedding_name', None) info.pop('embedding_info', None) elif bcp == 'C': info['token_info'] = config.embeddings['C'][id]['token_info'] info['embedding_name'] = id info['embedding_info'] = config.embeddings['C'][id]
def _load_train_results_from_folder(self, root): f_config = glob.glob( os.path.join(root, '*' + BackboneTrainResult.config_ext)) f_model = glob.glob( os.path.join(root, '*' + BackboneTrainResult.model_ext)) f_log = glob.glob(os.path.join(root, '*' + BackboneTrainResult.log_ext)) f_config.sort(), f_model.sort(), f_log.sort() ret = [] for config, model, log in zip(f_config, f_model, f_log): assert len( set((config.split('.')[0], model.split('.')[0], log.split('.')[0]))) == 1 base = config.split('.')[0] ret.append(BackboneTrainResult(base)) return ret
def token_changed(): bcp, id = config.split(token_selector.value) if bcp == 'B' and not vocab_selector_c.disabled: vocab_selector_b.disabled = False vocab_selector_c.disabled = True val = vocab_selector_b.value vocab_selector_b.value = None vocab_selector_b.value = val elif bcp == 'C' and not vocab_selector_b.disabled: vocab_selector_b.disabled = True vocab_selector_c.disabled = False val = vocab_selector_c.value vocab_selector_c.value = None vocab_selector_c.value = val callback()
def load_ground_truth_classes(info): if config.split(info['vector_version'])[0] == 'P': with data.document_reader(info) as documents: return [document['class_id'] for document in documents] mat_ids = data.load_mat_ids(info) ground_truth_classes = [] with data.document_reader(info) as documents: for document in documents: if mat_ids[0] == document['id']: ground_truth_classes.append(document['class_id']) del mat_ids[0] if len(mat_ids) == 0: break return ground_truth_classes
def vector_changed(): vector_bcp, vector_id = config.split(info['vector_version']) if vector_bcp == 'B': token_vocab_selector.layout.visibility = 'visible' token_selector.layout.visibility = 'hidden' vocab_selector.layout.visibility = 'hidden' elif vector_bcp == 'C': token_vocab_selector.layout.visibility = 'hidden' token_selector.layout.visibility = 'visible' vocab_selector.layout.visibility = 'visible' elif vector_bcp == 'P': token_vocab_selector.layout.visibility = 'hidden' token_selector.layout.visibility = 'hidden' vocab_selector.layout.visibility = 'hidden' update_output(False)
def load_ground_truth_classes(info): if config.split(info['vector_version'])[0] == 'P': with data.document_reader(info) as documents: _labels_true = [document['class_id'] for document in documents] else: mat_ids = data.load_mat_ids(info) _labels_true = [0] * len(mat_ids) idx = 0 with data.document_reader(info) as documents: for document in documents: if mat_ids[idx] == document['id']: _labels_true[idx] = document['class_id'] idx = idx + 1 if idx >= len(_labels_true): break return _labels_true
def run_vectorizer(info=None): nbprint('Vectorizer').push() global runvars if info is None: if config.vectorizer['run_B']: nbprint('BoW').push() runvars = {} iterate(['data', 'token:BC', 'vocab', 'vector:B'], [count_mat, bow]) nbprint.pop() if config.vectorizer['run_C']: nbprint('cBoW').push() runvars = {} iterate(['data', 'token:C', 'vocab', 'vector:C'], [count_mat, cbow]) nbprint.pop() if config.vectorizer['run_P']: nbprint('Phrase').push() runvars = {} iterate(['data', 'vector:P'], [phrase]) nbprint.pop() else: runvars = {} vector_bcp, vector_id = config.split(info['vector_version']) if vector_bcp == 'B' or vector_bcp == 'C': count_mat(info) if vector_bcp == 'B': bow(info) else: cbow(info) else: phrase(info) runvars = None nbprint.pop()
def update_info(vocab_version): vocab_type, vocab_idx = config.split(vocab_version) info['vocab_version'] = vocab_version info['vocab_info'] = config.vocab[vocab_type][vocab_idx]
def _output_of(self, info): if config.split(info['vector_version'])[0] == 'B': return ['W','H']
def load_input_mat(info): bcp = config.split(info["vector_version"])[0] if bcp == 'B': return sparse.load_npz(input_mat_filename(info)) elif bcp == 'C' or bcp == 'P': return first_array_in(input_mat_filename(info))
def iterate(what, callbacks, info={}, depth=1, print_iterates=True): global _required_model_outputs if not isinstance(what, list): what = [ what, ] if not isinstance(callbacks, list): callbacks = [ callbacks, ] callbacks = [None] * (len(what) - len(callbacks)) + callbacks # data # token[:BC] # vocab # vector[:BCP] # models[:W,H] # modelinputs # num_topics try: category, detail = what[0].split(':') except ValueError: category, detail = what[0], None if category == "data": for data_name, data_info in config.datasets.items(): if data_info["run"]: new_data = {"data_name": data_name, "data_info": data_info} call_next(what, callbacks, data_info["name"], new_data, info, depth, print_iterates) elif category == 'token': for token_version in config.token_version_list(detail or 'BCP'): bcp, id = config.split(token_version) if bcp == 'B': token_info = config.tokenizer['B'][id] if token_info["run"]: new_data = { 'token_version': token_version, 'token_info': token_info } call_next(what, callbacks, "Token {}".format(token_version), new_data, info, depth, print_iterates) elif bcp == 'C': embedding_info = config.embeddings['C'][id] if embedding_info["run"]: new_data = { 'token_version': token_version, 'token_info': embedding_info['token_info'], 'embedding_name': id, 'embedding_info': embedding_info } call_next(what, callbacks, "Token {}".format(token_version), new_data, info, depth, print_iterates) elif category == "vocab": if "token_version" not in info: print( "{}WARNING: Cannot iterate 'vocab' without knowing token version" .format(" " * depth)) return bcp = config.split(info["token_version"])[0] for vocab_version in config.vocab_version_list(bcp): bcp, id = config.split(vocab_version) vocab_info = config.vocab[bcp][id] if vocab_info["run"]: new_data = { "vocab_version": vocab_version, "vocab_info": vocab_info } call_next(what, callbacks, "Vocab {}".format(new_data["vocab_version"]), new_data, info, depth, print_iterates) elif category == 'vector': for vector_version in config.vector_version_list(detail or 'BCP'): bcp, id = config.split(vector_version) if bcp == 'B' or bcp == 'C': vector_info = config.vectorizer[bcp][id] if vector_info["run"]: new_data = { "vector_version": vector_version, "vector_info": vector_info } call_next(what, callbacks, "Vector {}".format(new_data["vector_version"]), new_data, info, depth, print_iterates) elif bcp == 'P': embedding_info = config.embeddings['P'][id] if embedding_info["run"]: new_data = { "vector_version": vector_version, "embedding_info": embedding_info } call_next(what, callbacks, "Vector {}".format(new_data["vector_version"]), new_data, info, depth, print_iterates) elif category == "models": if detail is not None: detail = detail.split(',') _required_model_outputs = detail for model_name, model_info in config.models['list'].items(): if model_info["run"]: model = import_cls('models', model_info['mod'], model_info['cls'])(model_info) new_data = { 'model_name': model_name, 'model_info': model_info, 'model': model } call_next(what, callbacks, "Model {}".format(model_info["name"]), new_data, info, depth, print_iterates) elif category == 'modelinputs': vector_bcps = info['model_info'].get('vector', 'BCP') original_callback = callbacks[0] callbacks[0] = lambda i: check_model_output(i, original_callback) if 'B' in vector_bcps: token_bcps = info['model_info'].get('token', 'BC') what_b = what.copy() what_b[1:1] = [ 'data', 'token:{}'.format(token_bcps), 'vocab', 'vector:B' ] callbacks_b = [None] * (len(what_b) - len(callbacks)) + callbacks call_next(what_b, callbacks_b, 'Model Input BoW', {}, info, depth, print_iterates) if 'C' in vector_bcps: what_c = what.copy() what_c[1:1] = ['data', 'token:C', 'vocab', 'vector:C'] callbacks_c = [None] * (len(what_c) - len(callbacks)) + callbacks call_next(what_c, callbacks_c, 'Model Input cBoW', {}, info, depth, print_iterates) if 'P' in vector_bcps: what_p = what.copy() what_p[1:1] = ['data', 'vector:P'] callbacks_p = [None] * (len(what_p) - len(callbacks)) + callbacks call_next(what_p, callbacks_p, 'Model Input Phrase', {}, info, depth, print_iterates) elif category == "num_topics": if "data_info" in info: for num_topics in info["data_info"]["num_topics"]: num_topics = convert_num_topics(info, num_topics) new_data = {"num_topics": num_topics} call_next(what, callbacks, "Topics {}".format(num_topics), new_data, info, depth, print_iterates) else: raise UtilException( 'Cannot iterate "num_topics" without knowing data') elif category == "distiller": for distiller_name, distiller_info in config.distiller['list'].items(): if distiller_info["run"]: distiller = import_cls('distiller', distiller_info['mod'], distiller_info['cls'])(distiller_info) new_data = { 'distiller_name': distiller_name, 'distiller_info': distiller_info, 'distiller': distiller } call_next(what, callbacks, "Distiller {}".format(distiller_info["name"]), new_data, info, depth, print_iterates) elif category == "distillerinputs": model_out = info['distiller_info']['model_out'] original_callback = callbacks[0] for model_out_entry in model_out: what_version = what.copy() what_version[1:1] = [ 'models:{}'.format(model_out_entry), 'modelinputs', 'num_topics' ] callbacks_version = [None] * (len(what_version) - len(callbacks)) + callbacks call_next(what_version, callbacks_version, 'Model Input BoW', {}, info, depth, print_iterates) else: print("{}WARNING: Cannot iterate '{}'".format(" " * depth, what[0]))
def import_from(self, infile, maxsize=10000000): errors = [] results = [] filetype = infile.content_type filename = infile.name raw = infile.read() # filelen = len(raw) # if filelen > maxsize: # errors.append(_('Import too large, must be smaller than %i bytes.' % maxsize )) format = os.path.splitext(filename)[1] if format and format.startswith("."): format = format[1:] if not format: errors.append(_("Could not parse format from filename: %s") % filename) if format == "zip": zf = zipfile.ZipFile(StringIO(raw), "r") files = zf.namelist() image_dir = config_value("PRODUCT", "IMAGE_DIR") other_image_dir = None export_file = None if "VARS" in files: config = zf.read("VARS") lines = [line.split("=") for line in config.split("\n")] for key, val in lines: if key == "PRODUCT.IMAGE_DIR": other_image_dir = val elif key == "EXPORT_FILE": export_file = val if other_image_dir is None or export_file is None: errors.append(_("Bad VARS file in import zipfile.")) else: # save out all the files which start with other_image_dr rename = image_dir == other_image_dir for f in files: if f.startswith(other_image_dir): buf = zf.read(f) if rename: f = f[len(other_image_dir) :] if f[0] in ("/", "\\"): f = f[1:] f = os.path.join(settings.MEDIA_ROOT, image_dir, f) outf = open(f, "w") outf.write(buf) outf.close() results.append("Imported image: %s" % f) infile = zf.read(export_file) zf.close() format = os.path.splitext(export_file)[1] if format and format.startswith("."): format = format[1:] if not format: errors.append(_("Could not parse format from filename: %s") % filename) else: raw = infile else: errors.append(_("Missing VARS in import zipfile.")) else: raw = StringIO(str(raw)) if not format in serializers.get_serializer_formats(): errors.append(_("Unknown file format: %s") % format) if not errors: from django.db import connection, transaction transaction.commit_unless_managed() transaction.enter_transaction_management() transaction.managed(True) try: ct = 0 models = set() for obj in serializers.deserialize(format, raw): obj.save() models.add(obj.object.__class__) ct += 1 if ct > 0: style = no_style() sequence_sql = connection.ops.sequence_reset_sql(style, models) if sequence_sql: cursor = connection.cursor() for line in sequence_sql: cursor.execute(line) results.append(_("Added %(count)i objects from %(filename)s") % {"count": ct, "filename": filename}) transaction.commit() # label_found = True except Exception, e: # fixture.close() errors.append( _("Problem installing fixture '%(filename)s': %(error_msg)s\n") % {"filename": filename, "error_msg": str(e)} ) errors.append("Raw: %s" % raw) transaction.rollback() transaction.leave_transaction_management()
def _output_of(self, info): if config.split(info['vector_version'])[0] in ['C','P']: return ['H'] return None
def import_from(self, infile, maxsize=10000000): errors = [] results = [] filename = infile.name raw = infile.read() format = os.path.splitext(filename)[1] if format and format.startswith('.'): format = format[1:] if not format: errors.append( _('Could not parse format from filename: %s') % filename) if format == 'zip': zf = zipfile.ZipFile(StringIO(raw), 'r') files = zf.namelist() image_dir = config_value('PRODUCT', 'IMAGE_DIR') other_image_dir = None export_file = None if 'VARS' in files: config = zf.read('VARS') lines = [line.split('=') for line in config.split('\n')] for key, val in lines: if key == 'PRODUCT.IMAGE_DIR': other_image_dir = val elif key == 'EXPORT_FILE': export_file = val if other_image_dir is None or export_file is None: errors.append(_('Bad VARS file in import zipfile.')) else: # save out all the files which start with other_image_dr rename = image_dir == other_image_dir for f in files: if f.startswith(other_image_dir): buf = zf.read(f) if rename: f = f[len(other_image_dir):] if f[0] in ('/', '\\'): f = f[1:] f = os.path.join(settings.MEDIA_ROOT, image_dir, f) outf = open(f, 'w') outf.write(buf) outf.close() results.append('Imported image: %s' % f) infile = zf.read(export_file) zf.close() format = os.path.splitext(export_file)[1] if format and format.startswith('.'): format = format[1:] if not format: errors.append( _('Could not parse format from filename: %s') % filename) else: raw = infile else: errors.append(_('Missing VARS in import zipfile.')) else: raw = StringIO(str(raw)) if format not in serializers.get_serializer_formats(): errors.append(_('Unknown file format: %s') % format) if not errors: with transaction.atomic(): try: ct = 0 models = set() for obj in serializers.deserialize(format, raw): obj.save() models.add(obj.object.__class__) ct += 1 if ct > 0: style = no_style() sequence_sql = connection.ops.sequence_reset_sql( style, models) if sequence_sql: cursor = connection.cursor() for line in sequence_sql: cursor.execute(line) results.append( _('Added %(count)i objects from %(filename)s') % { 'count': ct, 'filename': filename }) except Exception, e: errors.append( _("Problem installing fixture '%(filename)s': %(error_msg)s\n" ) % { 'filename': filename, 'error_msg': str(e) }) errors.append("Raw: %s" % raw)
def load_mat_ids(info): if config.split(info['vector_version'])[0] == 'P': meta = data.load_document_meta(info) return range(meta['num_documents']) else: return data.load_mat_ids(info)
# Initialize data rt_data = open('{0}/releventTech.cfg'.format(config.CONFIG_FILE_PATH), 'r').read().split() releventTech = [] for i in rt_data: releventTech.append(i.split(',')) configurations = [] f = open('{0}/system_configs.cfg'.format(config.CONFIG_FILE_PATH), 'r') while True: config = f.readline().strip() if config == '': break else: configurations.append(config.split(",")) f.close() attackers = attacker.getAttackers() cve_list = [] if start < 2002 or end > 2017: showHelp() sys.exit() print 'Parsing NVD data and making CVE list . . .' cve_list = cve.getCVEList(start, end, releventTech) print 'DONE !' # For each attack, obtain defender and attacker rewards tables #----------------------------------------------------------------
def import_from(self, infile, maxsize=10000000): errors = [] results = [] filename = infile.name raw = infile.read() format = os.path.splitext(filename)[1] if format and format.startswith('.'): format = format[1:] if not format: errors.append(_('Could not parse format from filename: %s') % filename) if format == 'zip': zf = zipfile.ZipFile(StringIO(raw), 'r') files = zf.namelist() image_dir = config_value('PRODUCT', 'IMAGE_DIR') other_image_dir = None export_file = None if 'VARS' in files: config = zf.read('VARS') lines = [line.split('=') for line in config.split('\n')] for key, val in lines: if key == 'PRODUCT.IMAGE_DIR': other_image_dir = val elif key == 'EXPORT_FILE': export_file = val if other_image_dir is None or export_file is None: errors.append(_('Bad VARS file in import zipfile.')) else: # save out all the files which start with other_image_dr rename = image_dir == other_image_dir for f in files: if f.startswith(other_image_dir): buf = zf.read(f) if rename: f = f[len(other_image_dir):] if f[0] in ('/', '\\'): f = f[1:] f = os.path.join(settings.MEDIA_ROOT, image_dir, f) outf = open(f, 'w') outf.write(buf) outf.close() results.append('Imported image: %s' % f) infile = zf.read(export_file) zf.close() format = os.path.splitext(export_file)[1] if format and format.startswith('.'): format = format[1:] if not format: errors.append(_('Could not parse format from filename: %s') % filename) else: raw = infile else: errors.append(_('Missing VARS in import zipfile.')) else: raw = StringIO(str(raw)) if format not in serializers.get_serializer_formats(): errors.append(_('Unknown file format: %s') % format) if not errors: with transaction.atomic(): try: ct = 0 models = set() for obj in serializers.deserialize(format, raw): obj.save() models.add(obj.object.__class__) ct += 1 if ct > 0: style = no_style() sequence_sql = connection.ops.sequence_reset_sql(style, models) if sequence_sql: cursor = connection.cursor() for line in sequence_sql: cursor.execute(line) results.append(_('Added %(count)i objects from %(filename)s') % {'count': ct, 'filename': filename}) except Exception, e: errors.append(_("Problem installing fixture '%(filename)s': %(error_msg)s\n") % {'filename': filename, 'error_msg': str(e)}) errors.append("Raw: %s" % raw)