示例#1
0
def featurize(generate):
    # File paths from project level
    # fp_train_A = 'tweet_irony_detection/train/SemEval2018-T3-train-taskA.txt'
    fp_train_A = 'train/SemEval2018-T3-train-taskA.txt'
    fp_train_B = 'train/SemEval2018-T3-train-taskB.txt'
    fp_test_A = 'test_TaskA/SemEval2018-T3_input_test_taskA.txt'
    fp_test_B = 'test_TaskB/SemEval2018-T3_input_test_taskB.txt'
    fp_labels_A = 'goldtest_TaskA/SemEval2018-T3_gold_test_taskA_emoji.txt'
    fp_labels_B = 'goldtest_TaskB/SemEval2018-T3_gold_test_taskB_emoji.txt'

    # Training data for task A and B , test data & correct labels for both tasks
    pre_process_url = True  # Set to remove URLs
    pre_process_usr = True
    train_A = read_non_emoji_tweets(fp_train_A, "train", pre_process_url,
                                    pre_process_usr)
    train_B = read_non_emoji_tweets(fp_train_B, "train", pre_process_url,
                                    pre_process_usr)
    tr_labels_A = [t.tweet_label for t in train_A]
    tr_label_B = [t.tweet_label for t in train_B]

    test_A = read_non_emoji_tweets(fp_test_A, "test", pre_process_url,
                                   pre_process_usr)
    test_B = read_non_emoji_tweets(fp_test_B, "test", pre_process_url,
                                   pre_process_usr)
    gold_A = get_label(fp_labels_A)
    tst_labels_A = [v for k, v in gold_A.items()]
    gold_B = get_label(fp_labels_B)
    tst_labels_B = [v for k, v in gold_B.items()]

    # Print class stats

    print_class_stats(train_A, train_B, gold_A, gold_B)

    # Read features from files
    # if not generate:
    #     feats_tr_A = read_features("feats_tr_A.csv")
    #     feats_tst_A= read_features("feats_tst_A.csv")
    #     feats_tr_B= read_features("feats_tr_B.csv")
    #     feats_tst_B=read_features("feats_tst_B.csv")
    #     return feats_tr_A, feats_tst_A, feats_tr_B, feats_tst_B, tr_labels_A, tr_label_B, tst_labels_A, tst_labels_B

    # Generate features
    feats_tr_A = get_features(train_A, generate, 'train_A')
    feats_tst_A = get_features(test_A, generate, 'test_A')
    feats_tr_B = get_features(train_B, generate,
                              'train_B')  # Same as A's features
    feats_tst_B = get_features(test_B, generate,
                               'test_B')  # Same as A's features

    # save_features(feats_tr_A,"feats_tr_A.csv")
    # save_features(feats_tst_A,"feats_tst_A.csv")
    # save_features(feats_tr_B,"feats_tr_B.csv")
    # save_features(feats_tst_B,"feats_tst_B.csv")
    return feats_tr_A, feats_tst_A, feats_tr_B, feats_tst_B, tr_labels_A, tr_label_B, tst_labels_A, tst_labels_B
示例#2
0
文件: Batch.py 项目: jkjan/NLP
    def get_batch(self):
        input = torch.Tensor()
        output = torch.Tensor()

        for i in range(0, batch_size):
            if self.word_in_sentence == len(tokenized[self.sentence]):
                self.sentence += 1
                self.word_in_sentence = 0

            index = vocabulary[tokenized[self.sentence][self.word_in_sentence]]
            adding = index_to_one_hot(index)
            input = torch.cat([input, adding], 0)

            get_label(tokenized[self.sentence], self.word_in_sentence)
示例#3
0
    def __init__(self,
                 args,
                 graph_out,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        self.graph_out = graph_out
        # self.early_stopping = EarlyStopping(patience=10, verbose=True)

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)
        self.hidden_states_list = None
        self.config_class, _, _ = MODEL_CLASSES[args.model_type]

        self.config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            output_hidden_states=True,
            output_attentions=True)
        self.model = NumericHGN(self.args, config=self.config)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)
        print(
            "*****************Config & Pretrained Model load complete**********************"
        )
    def on_root_parse(self, root):
        """On root parse event handler.

        """
        def _get_change(i):
            return collections.OrderedDict(
                version=i[0],
                date=i[1],
                author=i[2],
                note=i[3]
                )

        obj = collections.OrderedDict()
        obj['id'] = root.id
        obj['label'] = get_label(root.name)
        obj['description'] = root.description
        obj['contact'] = root.contact
        obj['authors'] = [i.strip() for i in root.authors.split(',')]
        obj['contributors'] = [i.strip() for i in root.contributors.split(',')]
        obj['project'] = 'cmip6'
        obj['changeHistory'] = [_get_change(i) for i in root.change_history]
        obj['shortTables'] = []
        obj['subTopics'] = []

        self._maps[root] = obj
示例#5
0
    def __init__(self,
                 args,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config_class = AutoConfig
        self.model_class = BertForSequenceClassification

        self.config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            id2label={str(i): label
                      for i, label in enumerate(self.label_lst)},
            label2id={label: i
                      for i, label in enumerate(self.label_lst)})
        self.model = self.model_class.from_pretrained(args.model_name_or_path,
                                                      config=self.config)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)
示例#6
0
    def __init__(self,
                 args,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.train_dataset = ConcatDataset([train_dataset, test_dataset])
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        print("Train Dataset:", len(self.train_dataset))

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]

        self.config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            id2label={str(i): label
                      for i, label in enumerate(self.label_lst)},
            label2id={label: i
                      for i, label in enumerate(self.label_lst)})
        self.model = self.model_class.from_pretrained(args.model_name_or_path,
                                                      config=self.config)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)
示例#7
0
def get_slices(slice_len=512):
    """Slice audiofiles into slices of size 'slice_len'

    In addition to slicing, each slice is normalized between -1 and 1.

    :param slice_len: number of samples of one slice

    :return: returns a tuple containg:
                -a 2-D array with the shape (slice_len, num_slices) 
                 where num_slices is defined by slice_len and the number of files specified in the 
                 global variables section
                -a 2-D array with the shape (num_labels, num_slices) containg one-hot-encoded labels
    """

    features = None
    labels = None

    num_files = len(filenames)
    progress = 0

    printProgressBar(progress, num_files, prefix='Progress',
                     suffix='Complete', length=50)

    for f in filenames:

        # read file
        _, data = wav.read(f)
        data = data[:, 0]

        num_slices = len(data) // slice_len
        assert num_slices > 0, 'slice_len is to big'
        num_samples = num_slices * slice_len

        new_features = np.array(
            np.split(data[:num_samples], num_slices), dtype=np.float16)

        if features is None:
            features = new_features
        else:
            features = np.vstack((features, new_features))

        label = get_label(f)
        num_labels = np.shape(new_features)[0]
        new_labels = np.repeat(label, num_labels)

        if labels is None:
            labels = new_labels
        else:
            labels = np.append(labels, new_labels)

        progress += 1
        printProgressBar(progress, num_files, prefix='Progress',
                         suffix='Complete', length=50)

    for feature in features:
        feature_max = np.max(np.abs(feature))
        if feature_max != 0:
            feature /= feature_max

    return features, labels
示例#8
0
def create_dataframe_domain():
    """Creates a dataframe with the specified features 

    The specified features are extracted from the specified files and scaled using the StandardScaler.

    :return: pandas dataframe with the corresponding label as the last column
    """

    features = None
    labels = None

    num_files = len(filenames)
    progress = 0

    printProgressBar(progress, num_files, prefix='Progress',
                     suffix='Complete', length=50)

    for f in filenames:

        new_features = extract_features_file(f)
        if features is None:
            features = new_features
        else:
            features = np.vstack((features, new_features))

        label = get_label(f)
        num_labels = np.shape(new_features)[0]
        new_labels = np.repeat(label, num_labels)

        if labels is None:
            labels = new_labels
        else:
            labels = np.append(labels, new_labels)

        progress += 1
        printProgressBar(progress, num_files, prefix='Progress',
                         suffix='Complete', length=50)

    # scale data
    features_scaled = StandardScaler().fit_transform(features)

    df_features = pd.DataFrame(features_scaled)
    df_features.columns = feature_names

    # df['Label'] = labels NOTE: Old version simple string versions instead of one hot

    labels_enc = label_encoder.transform(labels)
    labels_one_hot = to_categorical(labels_enc)


    df_labels = pd.DataFrame(labels_one_hot)
    df_labels.columns = instruments

    assert len(df_features.index) == len(df_labels.index)

    df = pd.concat([df_features, df_labels], axis=1, join='inner')

    print(df.head())

    return df
示例#9
0
    def __init__(
        self,
        args,
    ):
        self.args = args
        self.biased_model = None
             
        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config = BertConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            id2label={str(i): label for i, label in enumerate(self.label_lst)},
            label2id={label: i for i, label in enumerate(self.label_lst)},
        )
        self.model = RBERT.from_pretrained(
            args.model_name_or_path, config=self.config, args=args
        )

        # GPU or CPU
        self.device = (
            "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        )
        self.model.to(self.device)
    def __init__(self,
                 args,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config = BertConfig.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task,
            id2label={str(i): label
                      for i, label in enumerate(self.label_lst)},
            label2id={label: i
                      for i, label in enumerate(self.label_lst)},
        )
        self.model = RBERT.from_pretrained(args.model_name_or_path,
                                           config=self.config,
                                           args=args)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)

        for name, parameters in self.model.named_parameters():
            print(name, ':', parameters.size())
示例#11
0
    def __init__(self,
                 args,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]

        self.bert_config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task)
        self.model = self.model_class(self.bert_config, args)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)
示例#12
0
    def __init__(self,
                 args,
                 train_dataset=None,
                 dev_dataset=None,
                 test_dataset=None):
        '''
        initial trainer
        :param args,train_dataset,dev_dataset, test_dataset
        '''
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset
        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)
        self.config_class, self.model_class, _ = MODEL_CLASSES[args.model_type]
        self.bert_config = self.config_class.from_pretrained(
            args.model_name_or_path,
            num_labels=self.num_labels,
            finetuning_task=args.task)
        self.model = self.model_class(self.bert_config, args)

        self.graph = load_graph(args.graph_file)
        self.edge_feature = load_edge_feature(args.edge_feature_file)
        self.entity_feature = load_entity_feature(args.entity_feature_file)
        # self.entity2id = load_entity2id(args.entity2id_file)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available(
        ) and not args.no_cuda else "cpu"
        self.model.to(self.device)
示例#13
0
def search():
    t = request.args.get('type', 'all')
    s = request.args.get('term', '').lower()
    c = request.args.get('custom', '') == 'true';
    #l = request.args.get('label', '%(uri)s')

    if (s == ''):
        return "{}"

    ontologies=['']
    ontologies.extend(os.listdir(ONTOLOGY_DIR))
    results = []
    if t == 'all' or t == 'subjects':
        for subj in maingraph.subjects():
            subj = str(subj)
            try:
                subjlabel = str(get_label(subj))
            except:
                subjlabel = subj
            try:
                subjqname =  str(get_qname(subj))
            except:
                subjqname =  subj
            if s in subj.lower() or s in subjlabel.lower() or s in subjqname.lower():
                item = {'uri':subj, 'qname': subjqname, 'label': subjlabel}
                if item not in results:
                    results.append(item)
    if t == 'predicates':
        for pred,_,_ in maingraph.triples((None, RDF.type, RDF.Property)):
            pred = str(pred)
            try:
                predlabel = str(get_label(pred))
            except:
                predlabel = pred
            try:
                predqname =  str(get_qname(pred))
            except:
                predqname =  pred
            if s in pred.lower() or s in predlabel.lower() or s in predqname.lower():
                item = {'uri':pred, 'qname': predqname, 'label': predlabel}
                if item not in results:
                    results.append(item)
    if c:
        results.append({'uri':s})
    return json.dumps(results)
示例#14
0
def get_data(input):
    print("###############get data#################")
    prosessed_data = np.load(input)
    validation_imgs = prosessed_data[2]
    validation_labels = prosessed_data[3]
    print("###############finish get data!#################")
    num_f = 0
    labels, num_f = utils.get_label(validation_labels, train.THRESHOLD, num_f)
    return validation_imgs, labels, num_f
示例#15
0
 def _draw_match(fname, hists):
     for pair in hists:
         myc = None
         for obj in pair:
             if fnmatchcase(obj.GetName(), options.dump):
                 xaxis, yaxis = obj.GetXaxis(), obj.GetYaxis()
                 xaxis.SetTitle(get_label(xaxis.GetTitle()))
                 yaxis.SetTitle(get_label(yaxis.GetTitle()))
                 yaxis.SetTitleOffset(1.25)
                 if not myc:
                     myc = ROOT.TCanvas('myc', '', 800, 500)
                     myc.cd()
                     obj.Draw()
                 else:
                     obj.Draw('same')
         if myc:
             myc.Print(fname)
         del myc
示例#16
0
def prepare_nquads_for_template(nquads):
    changes = {}
    for q in nquads:
        ctx = changes.get(q['context'], {'prefix': get_prefix_from_uri(q['context'])})
        sub = ctx.get(q['subject'], {'qname': get_qname(q['subject']), 
                                     'label': get_label(q['subject'])})
        pred = sub.get(q['predicate'], {'qname': get_qname(q['predicate']), 
                                        'label': get_label(q['predicate']),
                                        'values': []})
        vals = pred.get('values')
        v = rdfstring2dict(q['object'])
        v['class'] = q['type']
        v['id'] = q['id']
        vals.append(v)
        sub[q['predicate']] = pred
        ctx[q['subject']] = sub
        changes[q['context']] = ctx
    return changes
    def on_enum_parse(self, enum):
        """On enum parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(enum.name)
        obj['description'] = enum.description
        obj['isOpen'] = enum.is_open
        obj['choices'] = []

        self._maps[enum] = obj
    def on_enum_parse(self, enum):
        """On enum parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(enum.name)
        obj['description'] = enum.description
        obj['isOpen'] = enum.is_open
        obj['choices'] = []

        self._maps[enum] = obj
示例#19
0
def kernel_svm(**args):
    global result
    print(args)
    dataGetter = massageData.massageData(folder=FLAGS.data_folder,samples = 2000) #train = 0.5, test=0.2
    X_train, Y_train = dataGetter.getTrain()
    X_dev, Y_dev = dataGetter.getDev()
    
    scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
    X_train = scaling.transform(X_train)
    X_dev = scaling.transform(X_dev)

    print("Starting " + args['kernel'] + " SVM training ...")
    start_time_secs = time.time()
    clf = svm.SVC(**args)
    clf.fit(X_train, Y_train)
    end_time_secs = time.time()
    print("Trained")

    training_duration_secs = end_time_secs - start_time_secs
    
    disp=[0,0]
    class_names = utils.get_label(Y_dev)
    file_name = "cm_svm_50_" + args['kernel']
    f = open('imgs/cm_imgs/'+file_name+'.txt','w')
    titles_options = [(file_name+"\nNormalized confusion matrix", 'true'),
                    (file_name+"\nConfusion matrix, without normalization", None)]
    for i,[title, normalize] in enumerate(titles_options):
        disp[i] = plot_confusion_matrix(clf, X_dev, Y_dev,
                                     display_labels=class_names,
                                     cmap=plt.cm.Blues,
                                     xticks_rotation='vertical',
                                     include_values=False,
                                     normalize=normalize)
        disp[i].ax_.set_title(title)

        print(title,file=f)
        print(disp[i].confusion_matrix,file=f)
        
    #Y_dev_prediction = clf.predict(X_dev)
    accuracy = np.average(np.diagonal(disp[0].confusion_matrix))
    #accuracy = clf.score(X_dev, Y_dev)
    experiment_result_string = "-------------------\n"
    #experiment_result_string += "\nPrediction: {}".format(Y_dev_prediction)
    #experiment_result_string += "\nActual Label: {}".format(Y_dev)
    experiment_result_string += "\nAcurracy: {}".format(accuracy)
    experiment_result_string += "\nTraining time(secs): {}".format(training_duration_secs)
    print(experiment_result_string,file=f)
    result += experiment_result_string+'\n'

    confusion = disp[1].confusion_matrix
    #print ("Confusion matrix: ", confusion)
    pickle.dump(class_names, open("class_names_svm_l", 'wb'))
    pickle.dump(confusion, open("confusion_matrix_nclass_svm_l", 'wb'))
示例#20
0
	def read(self, uri, progress = None, recursive = None):
		pos = 0; files = []; dirs = [];
		""" La primera vez que se llama """
		try:
			path = gnomevfs.get_local_path_from_uri(uri)
		except:
			path = uri

		if not recursive:
			self.progress_bar = progress
			self.clear_progress_bar()
			self._cancel_import_disk = False
			self.count_files = 1;	self.position = 0;	self.root = path

			""" Creamos un nuevo disco """
			self.disk = fstypes.Disk(utils.get_label(path), path, None, None, 0, utils.get_drive_icon(str(path)))

			""" Contamos los archivos primero, para luego poder poner el progreso """
			self.count_files = self.dir_count(uri)
# 			self.count_files = sum((len(f) for _, _, f in os.walk(path)))	# don't count correctly
			gobject.timeout_add(500, self.update_progress_bar)


		for info in gnomevfs.DirectoryHandle(uri):
			if(self._cancel_import_disk):
				return None

			""" Insertamos el archivo, si no es un archivo oculto """
			if info.name == '.' or info.name == '..' or info.name[0] == ".":
				continue

			self.position += 1

			pathfile = uri + "/" +  urllib.pathname2url(info.name)
			self.path = pathfile
			file = fstypes.File(pathfile, self.disk)
			if info.type and info.type == 2:
				file.add_files(self.read(pathfile, None, True))	# Directory

			files.append(file)

			if not recursive:
				self.disk.add(file)


		if(self._cancel_import_disk):
			return None

		if not recursive:
			self.progress_bar = None
			return self.disk
		else:
			return files
示例#21
0
    def read(self, uri, progress=None, recursive=None):
        pos = 0
        files = []
        dirs = []
        """ La primera vez que se llama """
        try:
            path = gnomevfs.get_local_path_from_uri(uri)
        except:
            path = uri

        if not recursive:
            self.progress_bar = progress
            self.clear_progress_bar()
            self._cancel_import_disk = False
            self.count_files = 1
            self.position = 0
            self.root = path
            """ Creamos un nuevo disco """
            self.disk = fstypes.Disk(utils.get_label(path), path, None, None,
                                     0, utils.get_drive_icon(str(path)))
            """ Contamos los archivos primero, para luego poder poner el progreso """
            self.count_files = self.dir_count(uri)
            #             self.count_files = sum((len(f) for _, _, f in os.walk(path)))    # don't count correctly
            gobject.timeout_add(500, self.update_progress_bar)

        for info in gnomevfs.DirectoryHandle(uri):
            if (self._cancel_import_disk):
                return None
            """ Insertamos el archivo, si no es un archivo oculto """
            if info.name == '.' or info.name == '..' or info.name[0] == ".":
                continue

            self.position += 1

            pathfile = uri + "/" + urllib.pathname2url(info.name)
            self.path = pathfile
            file = fstypes.File(pathfile, self.disk)
            if info.type and info.type == 2:
                file.add_files(self.read(pathfile, None, True))  # Directory

            files.append(file)

            if not recursive:
                self.disk.add(file)

        if (self._cancel_import_disk):
            return None

        if not recursive:
            self.progress_bar = None
            return self.disk
        else:
            return files
    def on_property_set_parse(self, prop_set):
        """On property set parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(prop_set.name)
        obj['description'] = prop_set.description
        obj['id'] = prop_set.id
        obj['properties'] = []
        obj['propertySets'] = []

        self._maps[prop_set] = obj
    def on_topic_parse(self, topic):
        """On topic parse event handler.

        """
        obj = collections.OrderedDict()
        obj['id'] = topic.id
        obj['label'] = get_label(topic.name)
        obj['description'] = topic.description
        obj['contact'] = topic.contact
        obj['properties'] = []

        self._maps[topic] = obj
    def on_property_set_parse(self, prop_set):
        """On property set parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(prop_set.name)
        obj['description'] = prop_set.description
        obj['id'] = prop_set.id
        obj['properties'] = []
        obj['propertySets'] = []

        self._maps[prop_set] = obj
    def on_topic_parse(self, topic):
        """On topic parse event handler.

        """
        obj = collections.OrderedDict()
        obj['id'] = topic.id
        obj['label'] = get_label(topic.name)
        obj['description'] = topic.description
        obj['contact'] = topic.contact
        obj['properties'] = []

        self._maps[topic] = obj
    def on_property_parse(self, prop):
        """On property parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(prop.name)
        obj['description'] = prop.description
        obj['id'] = prop.id
        obj['cardinality'] = prop.cardinality
        obj['type'] = "enum" if prop.enum else prop.typeof
        obj['is_cim_property'] = prop.was_injected

        self._maps[prop] = obj
 def convert_index_to_data(self, list_of_files):
     """
     Convert paths to data in form (feature paths, labels)
 """
     random.shuffle(list_of_files)
     paths = [self.data_dir + file for file in list_of_files]
     labels = [utils.get_label(file) for file in list_of_files]
     tokens = utils.convert_labels_to_tokens(
         labels)  # Possible to set num_class
     if self.num_class == 2:
         return paths, np.array(tokens)
     y_categorical = tf.keras.utils.to_categorical(tokens, self.num_class)
     return paths, y_categorical
    def on_property_parse(self, prop):
        """On property parse event handler.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(prop.name)
        obj['description'] = prop.description
        obj['id'] = prop.id
        obj['cardinality'] = prop.cardinality
        obj['type'] = "enum" if prop.enum else prop.typeof
        obj['is_cim_property'] = prop.was_injected

        self._maps[prop] = obj
    def _map_topic(self, topic):
        """Maps a specialization topic to a dictionary.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(topic.name)
        obj['description'] = topic.description
        obj['id'] = topic.id
        obj['contact'] = topic.contact
        obj['properties'] = []
        obj['propertySets'] = []
        self._maps[topic] = obj

        return obj
    def _map_topic(self, topic):
        """Maps a specialization topic to a dictionary.

        """
        obj = collections.OrderedDict()
        obj['label'] = get_label(topic.name)
        obj['description'] = topic.description
        obj['id'] = topic.id
        obj['contact'] = topic.contact
        obj['properties'] = []
        obj['propertySets'] = []
        self._maps[topic] = obj

        return obj
示例#31
0
 def scribe(self, line, program_ast):
     """The internal method called for basic printing of
     identifer, type, and value
     """
     variable_id, variable_type = utils.get_id_and_type(line, program_ast)
     label = utils.get_label(line, program_ast)
     output = (variable_id + " is the ' + " + variable_type +
               " + ' ' + str(" + variable_id + ")")
     if len(self.filtered_labels) > 0:
         if not label or label not in self.filtered_labels:
             return ""
     if label:
         output += (" + ' (" + label + ")'")
     return output
示例#32
0
def run():
    print("folder = ", FLAGS.data_folder)

    dataGetter = massageData.massageData(folder=FLAGS.data_folder)

    X_train, Y_train = dataGetter.getTrain()  #TODO: feature extractions
    X_dev, Y_dev = dataGetter.getDev()
    start_time_secs = time.time()
    print("Starting Logistic Regression training ...")
    clf = LogisticRegression(random_state=0,
                             solver='lbfgs',
                             multi_class='multinomial',
                             verbose=10,
                             n_jobs=-1,
                             max_iter=FLAGS.max_iter).fit(X_train, Y_train)
    print("Training done.")
    end_time_secs = time.time()
    training_duration_secs = end_time_secs - start_time_secs
    Y_dev_prediction = clf.predict(X_dev)

    accuracy = clf.score(X_dev, Y_dev)

    experiment_result_string = "-------------------\n"
    experiment_result_string += "\nPrediction: {}".format(Y_dev_prediction)
    experiment_result_string += "\nActual Label: {}".format(Y_dev)
    experiment_result_string += "\nAcurracy: {}".format(accuracy)
    experiment_result_string += "\nTraining time(secs): {}".format(
        training_duration_secs)
    experiment_result_string += "\nMax training iterations: {}".format(
        FLAGS.max_iter)
    experiment_result_string += "\nTraining time / Max training iterations: {}".format(
        1.0 * training_duration_secs / FLAGS.max_iter)

    class_names = utils.get_label(Y_dev)
    classification_report_string = classification_report(
        Y_dev, Y_dev_prediction, target_names=class_names)
    experiment_result_string += "\nClassification report: {}".format(
        classification_report_string)

    print(experiment_result_string)

    # Save report to file
    utils.write_contents_to_file(get_experiment_report_filename(),
                                 experiment_result_string)
    confusion = confusion_matrix(Y_dev, Y_dev_prediction, labels=class_names)

    print("Confusion matrix: ", confusion)
    pickle.dump(class_names, open(get_class_filename(), 'wb'))
    pickle.dump(confusion, open(get_confusion_matrix_filename(), 'wb'))
示例#33
0
    def __init__(self, args, train_dataset=None, dev_dataset=None, test_dataset=None):
        self.args = args
        self.train_dataset = train_dataset
        self.dev_dataset = dev_dataset
        self.test_dataset = test_dataset

        self.label_lst = get_label(args)
        self.num_labels = len(self.label_lst)

        self.bert_config = BertConfig.from_pretrained(args.pretrained_model_name, num_labels=self.num_labels, finetuning_task=args.task)
        self.model = RBERT(self.bert_config, args)

        # GPU or CPU
        self.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
        self.model.to(self.device)
def poly_c_v_f1(results, sp=True):
	# poly: C vs f1
	if sp: plt.subplot(rows,cols,get_gn())
	k_poly = [x for x in results if str(x["param"]["ID"]).startswith("PvC") and x["param"]["degree"] == 3]
	k_poly = sorted(k_poly, key = lambda i: i["param"]['C'])
	log_C = [np.log10(x["param"]["C"]) for x in k_poly]

	for ci, c in enumerate(get_classes()):
		f1s = [x["test"]["f1_pc"][ci] for x in k_poly]
		plot(log_C, f1s, ci, label="{}".format(get_label(ci)))

	plt.title('Polynomial Kernel: F1 Score Against C Value')
	plt.ylabel('F1')
	plt.xlabel('log(C)')
	plt.legend(loc="lower right")
    def on_property_parse(self, prop):
        """On property parse event handler.

        """
        obj = collections.OrderedDict()
        obj['id'] = prop.id
        obj['label'] = " > ".join([get_label(i) for i in prop.id.split('.')[3:]])
        obj['description'] = prop.description
        obj['cardinality'] = prop.cardinality
        obj['type'] = "enum" if prop.enum else prop.typeof
        obj['is_cim_property'] = prop.was_injected
        if prop.enum:
            obj['enum'] = self._get_enum(prop.enum)

        properties = self._maps[prop.root_topic]['properties']
        properties.append(obj)
示例#36
0
def get_data(input):
	print("###############get data#################")
	prosessed_data = np.load(input)
	training_imgs = prosessed_data[0]
	training_labels = prosessed_data[1]
	validation_imgs = prosessed_data[2]
	validation_labels = prosessed_data[3]
	testing_imgs = prosessed_data[4]
	testing_labels = prosessed_data[5]
	print("%d train, %d validation, %d test" % (len(training_imgs),
	                                            len(validation_imgs),
	                                            len(testing_imgs)))
	print("###############finish get data!#################")
	num_f = 0
	labels, num_f = utils.get_label(training_labels, THRESHOLD, num_f)
	return training_imgs, labels, num_f
    def _get_enum(self, enum):
        """Returns enumeration encoded as a dictionary.

        """
        obj = collections.OrderedDict()
        obj['id'] = enum.description
        obj['label'] = get_label(enum.name)
        obj['description'] = enum.description
        obj['is_open'] = enum.is_open
        obj['choices'] = []
        for choice in enum:
            obj['choices'].append({
                'description': choice.description,
                'value': choice.value
            })

        return obj
    def _get_enum(self, enum):
        """Returns enumeration encoded as a dictionary.

        """
        obj = collections.OrderedDict()
        obj['id'] = enum.description
        obj['label'] = get_label(enum.name)
        obj['description'] = enum.description
        obj['is_open'] = enum.is_open
        obj['choices'] = []
        for choice in enum:
            obj['choices'].append({
                'description': choice.description,
                'value': choice.value
                })

        return obj
    def on_property_parse(self, prop):
        """On property parse event handler.

        """
        obj = collections.OrderedDict()
        obj['id'] = prop.id
        obj['label'] = " > ".join(
            [get_label(i) for i in prop.id.split('.')[3:]])
        obj['description'] = prop.description
        obj['cardinality'] = prop.cardinality
        obj['type'] = "enum" if prop.enum else prop.typeof
        obj['is_cim_property'] = prop.was_injected
        if prop.enum:
            obj['enum'] = self._get_enum(prop.enum)

        properties = self._maps[prop.root_topic]['properties']
        properties.append(obj)
示例#40
0
def predict(pred_config):
    # load model and args
    args = get_args(pred_config)
    device = get_device(pred_config)
    model = load_model(pred_config, args, device)
    logger.info(args)

    # Convert input file to TensorDataset
    dataset = convert_input_file_to_tensor_dataset(pred_config, args)

    # Predict
    sampler = SequentialSampler(dataset)
    data_loader = DataLoader(dataset,
                             sampler=sampler,
                             batch_size=pred_config.batch_size)

    preds = None

    for batch in tqdm(data_loader, desc="Predicting"):
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "labels": None,
                "e1_mask": batch[3],
                "e2_mask": batch[4]
            }
            outputs = model(**inputs)
            logits = outputs[0]

            if preds is None:
                preds = logits.detach().cpu().numpy()
            else:
                preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)

    preds = np.argmax(preds, axis=1)

    # Write to output file
    label_lst = get_label(args)
    with open(pred_config.output_file, "w", encoding="utf-8") as f:
        for pred in preds:
            f.write("{}\n".format(label_lst[pred]))

    logger.info("Prediction Done!")
示例#41
0
 def scribe(self, line, program_ast):
     """The internal method called for basic printing of
     identifer, type, and value
     """
     variable_id, variable_type = utils.get_id_and_type(line, program_ast)
     label = utils.get_label(line, program_ast)
     output = (variable_id +
               " is the ' + " +
               variable_type +
               " + ' ' + str(" +
               variable_id +
               ")")
     if len(self.filtered_labels) > 0:
         if not label or label not in self.filtered_labels:
             return ""
     if label:
         output += (" + ' (" + label + ")'")
     return output
    def set_id(self, owner, identifier=None):
        """Appends an identifier to managed collection.

        """
        try:
            identifier = identifier or owner.id
        except AttributeError:
            pass
        finally:
            if not identifier:
                print "Invalid identifier: ", type(owner), owner.name, identifier
                return

        # Derive label.
        label = " > ".join([get_label(i) for i in identifier.split(".")[1:]])

        # Append to managed collection.
        self._ids.append((identifier, label, owner.type_key, identifier))
示例#43
0
def predict():
    project_id = 'stable-hybrid-249623'
    model_id = 'ICN4772510494057073039'
    message = request.get_json(force=True)
    encoded = message['image']
    decoded = base64.b64decode(encoded)
    prediction = get_prediction(decoded, project_id,  model_id)
    pred_label = prediction.payload[0].display_name
    lbl = get_label(pred_label)
    score = prediction.payload[0].classification.score

    response = {
        'prediction' : {
        'label' : lbl,
        'score' : score
        }
    }
    return jsonify(response)
示例#44
0
def ontology(ontology_):
    ont = query_db('select * from ontologies where prefix = ?', [ontology_], one=True)
    # get the graph for the relevant ontology
    graph = get_graph(ontology_)
    if graph is None:
        # TODO: 404
        return redirect(url_for('index'))
    resource_ = request.args.get('resource_', None)
    errors = []
    # for recreating a page if there are errors
    changes = {'addition':[], 'removal':[]}

    # if ?resource_=xxx is present and xxx is not None then go into "resource viewing mode"
    if resource_ is not None:
        properties = {}
        # get the label of the resource
        res_name = get_label(resource_)

        # force the resource to a URIRef
        uri2res = lambda uri: uri if isinstance(uri, rdflib.URIRef) else rdflib.URIRef(uri)
        r = uri2res(resource_)

        # build list of (type,predicate,object)s, using an empty string for the type of original triples
        tpos = [('', p, o) for p, o in graph.predicate_objects(subject=r)]
        # include additions/removals from uncommited proposal
        if session.get('logged_in', False):
            tpos.extend([(s['type'], uri2res(s['predicate']), parse_string_rdfobject(s['object'])) for s in get_uncommited_quads(g.userid, resource_)])

        # TODO: these 2 lines may be redundant now as most form validation is done in the UI, remove them  completely when this is confirmed
        # include additions from changes (only present if errors in form submission)
        tpos.extend([('addition', uri2res(stmt['pred']), parse_string_rdfobject(stmt['val'])) for stmt in changes['addition']])
        # include removals from changes (only present if errors in form submission)
        tpos.extend([('removal', uri2res(stmt['pred']), parse_string_rdfobject(stmt['val'])) for stmt in changes['removal']])
        # TODO: add "modified" type (maybe)
        for t,p,o in tpos:
            # get existing values for this predicate
            item = properties.get(p,
                                  {'value': [],
                                   'qname': get_qname(p), 
                                   'label': get_label(p)})
            # convert rdf object to a dict
            v = rdfobject2dict(o)
            # add 'deleted' or 'added' 'class' value (used by templates)
            if t == 'removal':
                try:
                    # if it's a removal, it should already exist in the values list
                    # find it and add the class to the existing entry
                    idx = item['value'].index(v)
                    v['class'] = 'deleted'
                    item['value'][idx] = v
                except ValueError:
                    pass # caused when .index fails
            else:
                if t == 'addition':
                    v['class'] = 'added'
                item['value'].append(v)

            # update the changes
            properties[p] = item

            # TODO: this may be redundant with the get_label call above
            # simply sets the resource name variable to the value of the RDFS.label predicate
            if res_name is '' and p == RDFS.label:
                res_name = v['value']

        # if there were no predicates, consider this a "new resource" and present the "create resource" view
        # with the URI already filled in
        # TODO: a lot of this is duplicate code from the create_resource function
        is_new = False
        if len(properties) == 0:
            # create new resource
            properties = {}
            properties[RDF.type] = {'value': [{'type':"URI", 'value':"", 'class':'added'}],
                                    'qname': 'rdf:type',
                                    'label': 'type'}
            properties[RDFS.label] = {'value': [{'type':"Literal", 'value':"", 'class':'added'}],
                                      'qname': 'rdfs:label', 
                                      'label': 'label'}
            res_name='Create New Resource'
            is_new=True

        # TODO: proposal/history stuff
        proposals = []
        history = []
        return render_template('resource.html',
                               ontology_=ont,
                               uri=resource_,
                               name=res_name,
                               properties_=properties,
                               proposals=proposals,
                               history=history,
                               is_new=is_new,
                               auto_save=False)

    # if no resource is requested, go to the ontology view, retrieving a list of all the subjects in the ontology
    resources = [{'uri':s[0], 'qname':get_qname(s[0]), 'label':get_label(s[0])} for s in graph.triples((None, RDF.type, None))]
    proposals = None #[s for s,_ in groupby(pgraph.subjects()) if isinstance(s, rdflib.URIRef)] # TODO and not s.startswith(changeset_base_uri)
    return render_template('ontology.html', ontology_=ont, resources=resources, proposals=proposals)
	basedir, lxpath = probe_wsl(True)

	if basedir:

		names = glob.glob(os.path.join(basedir, 'rootfs*'))
		not_trusty = True
		has_trusty = False

		if len(names) > 0:

			print('\nThe following distributions are currently installed:\n')

			for name in names:
				active = os.path.basename(name) == 'rootfs'
				name   = get_label(name).split('_', 1)

				if len(name) != 2:
					continue

				if name[0] == 'ubuntu' and name[1] == 'trusty':
					has_trusty = True

					if active:
						not_trusty = False

				print('  - %s%s%s:%s%s%s%s' % (Fore.YELLOW, name[0], Fore.RESET, Fore.YELLOW, name[1], Fore.RESET, ('%s*%s' % (Fore.GREEN, Fore.RESET) if active else '')))

		if not_trusty:
			print()
示例#46
0
	def read(self, uri, progress = None, recursive = None):
		pos = 0; files = []; dirs = [];
		finfo = fileinfo.Info()
		""" La primera vez que se llama """
		try:
			path = gnomevfs.get_local_path_from_uri(uri)
		except:
			path = uri

		if not recursive:
			self.progress_bar = progress
			self.clear_progress_bar()
			self._cancel_import_disk = False
			self.count_files = 1;	self.position = 0;	self.root = path

			""" Creamos un nuevo disco """
			utils.get_drive_icon(str(path)).save('/tmp/gnomecatalog_icon_disk.png', 'png')
			data = open('/tmp/gnomecatalog_icon_disk.png', 'rb').read()
			self.disk = Disks({'name' : utils.get_label(path), 'volname' : utils.get_label(path), 'root' : path, 'icon' : buffer(data) })
			self.disk.save()

			""" Contamos los archivos primero, para luego poder poner el progreso """
			self.count_files = self.dir_count(uri)
			self.path = path
			self.timeout_handler_id = gobject.timeout_add(150, self.update_progress_bar)

		for info in gnomevfs.DirectoryHandle(uri):
			if(self._cancel_import_disk):
				return None

			""" Insertamos el archivo, si no es un archivo oculto """
			if info.name == '.' or info.name == '..' or info.name[0] == ".":
				continue

			self.position += 1

			pathfile = uri + "/" +  urllib.pathname2url(info.name)
			self.path = pathfile
			#file = fstypes.File(pathfile, self.disk)

			path , name, size, type, mime, meta, date = finfo.get(pathfile)
			file = Files({'name' : name, 'size' : str(size), 'type' : type, 'mime' : str(mime),  'date' : str(date), 'idparent' : '0' })
			file.save()

			while gtk.events_pending():
				gtk.main_iteration()

			if not recursive:
				self.disk.add(file)

			if info.type and info.type == 2:
				file.add(self.read(pathfile, None, True))	# Directory

			files.append(file)

		if(self._cancel_import_disk):
			return None

		if not recursive:
			self.progress_bar = None
			self.disk.commit()
			return self.disk
		else:
			return files
示例#47
0
        for i, idx in enumerate(zip(*triu)):
            hist = [
                sig_hists[transform+'_corr'][i*2],
                bkg_hists[transform+'_corr'][i*2]
            ]

            if idx[0] == idx[1]:    # set bin label using diagonal
                for i in xrange(len(hist)):
                    name = hist[i].GetName()
                    if i == 0:
                        name = name[5:name.find('_Signal')]
                    else:
                        name = name[5:name.find('_Background')]
                    varnames = name.split('_vs_', 1)
                    corrn[i].GetXaxis().SetBinLabel(idx[0]+1, get_label(varnames[1]))
                    corrn[i].GetYaxis().SetBinLabel(idx[1]+1, get_label(varnames[0]))

            for i in xrange(len(corrn)):
                corrn[i].SetBinContent(idx[0]+1, idx[1]+1, 100*hist[i].GetCorrelationFactor())

        matrices[transform] = corrn

    # colour palette
    # red = np.array([0.0, 0.0, 1.0])
    # green = np.array([0.0, 1.0, 0.0])
    # blue = np.array([1.0, 0.0, 0.0])
    # stops = np.array([0.00, 0.5, 1.0])
    # ROOT.TColor.CreateGradientColorTable(len(stops), stops, red, green, blue, 50)

    # correlation after various transforms