Пример #1
0
def parse(path, crawl = False):
    if crawl == True:
        raise StandardError()
    pos_filename = os.path.join(path, "pos.lst")
    neg_filename = os.path.join(path, "neg.lst")
    pos_dir = os.path.join(path, "pos")
    neg_dir = os.path.join(path, "neg")
    if not os.path.isfile(pos_filename):
        print "%s is not a file."%(pos_filename,)
        return None
    if not os.path.isfile(neg_filename):
        print "%s is not a file."%(neg_filename,)
        return None
    if not os.path.isdir(pos_dir):
        print "%s is not a directory."%(pos_dir,)
        return None
    if not os.path.isdir(neg_dir):
        print "%s is not a directory."%(neg_dir,)
        return None

    ret = DataSet()
    pos = open(pos_filename, "r")
    pos_names = [line[line.rfind("/")+1:] for line in pos.read().split()]
    pos.close()
    for name in pos_names:
        filename = os.path.join(pos_dir, name)
        ret.add_obj(name, WholeImage(name))

    neg = open(neg_filename, "r")
    neg_names = [line[line.rfind("/")+1:] for line in neg.read().split()]
    neg.close()
    for name in neg_names:
        ret.add_empty_image(name)
        
    return ret
Пример #2
0
	def __init__(self, data, interval_type=ClassIntervalType.ROOT):
		
		f = []
		for d in data:
			f.append(float(d))      
		data = f
		
		DataSet.__init__(self, data)
		self.interval_type = interval_type
		
		if self.interval_type != ClassIntervalType.THREESIGMA:    
			self.class_interval = self.calc_class_interval(interval_type, self.min, self.max, self.n);
			self.construct_bins(self.min, self.max, self.class_interval, False);
		else:
			sigma_span = 6
			min = self.mean - self.stdev * (sigma_span / 2)
			max = self.mean + self.stdev * (sigma_span / 2)
			self.class_interval = self.calc_class_interval(ClassIntervalType.THREESIGMA, min, max, sigma_span)
			self.construct_bins(min, max, self.class_interval, True)
			
		self.fill_bins()
		self.sort_bins()

		total = 0
		for bin in self.bins:
			total = total + bin.count()
		self.bin_contents_count = total
Пример #3
0
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"):

    print("Chosen feature: {0}".format(featuresToUse) )
    print("Chosen classifier: {0}".format(classifierToUse))

    fe = FeatureExtractor(featuresToUse)
    dataset = DataSet(fe)
    classifier = Classifier()
    evaluate = Evaluation()

    print "test or Train %s" % testOrTrain
    for feature_class, files in getTestData(testOrTrain).items():
        print "%s" % testOrTrain
        for f in files:
            dataset.addFile(feature_class, f)

    print "Dataset initialized"
    print_class_stats(dataset.classes)

    print "Test set created."
    a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9)
    
    c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse)
    
    evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)
Пример #4
0
 def __vectorize(self, data):
     """\
     Train vectorization and subsequently vectorize. Accepts a DataSet
     or a list of dictionaries to be vectorized.
     """
     # no vectorization performed, only converted to matrix
     if self.vectorizer is None:
         if not isinstance(data, DataSet):
             data_set = DataSet()
             data_set.load_from_dict(data)
             data = data_set
         data.match_headers(self.data_headers, add_values=True)
         # TODO pre-filtering here?
         return data.as_bunch(target=self.class_attr,
                              select_attrib=self.select_attr).data
     # vectorization needed: converted to dictionary
     # and passed to the vectorizer
     if isinstance(data, DataSet):
         data = data.as_dict(select_attrib=self.select_attr,
                             mask_attrib=self.class_attr)
     else:
         data = [{key: val for key, val in inst.items()
                  if key != self.class_attr and key in self.select_attr}
                 for inst in data]
     # pre-filter attributes if filter_attr is set
     if self.filter_attr:
         data = [{key: val for key, val in inst.items()
                  if self.filter_attr(key, val)} for inst in data]
     if not self.vectorizer_trained:
         self.vectorizer.fit(data)
         self.vectorizer_trained = True
     return self.vectorizer.transform(data).tocsr()
Пример #5
0
def extract_data(raw_data_file, format_data_path, n_vectors, n_components, shift=-1, n_datums=-1, test_percentage=0):
    """
    Extrait les données brutes de raw_data_file, et, à partir des paramètres de formatage renseignés, construit deux fichiers de données préformatées train et test. 
    Un item de données (datum) est une matrice de la forme (n_vectors,n_components*20), où n_vectors est le nombre de vecteurs à prendre pour un item de donnée, et n_components le nombre de composantes à garder sur chaque vecteur.

    :param file raw_data_file: 		Fichier contenant les données brutes
    :param str format_data_path:	Chemin vers le dossier où placer les données préformatées
    :param int n_vectors:		Nombre de vecteurs à considérer comme étant un item de données
    :param int n_components:		Nombre de composantes à garder sur chaque vecteur. Si = 1, un vecteur ne contiendra que les mfcc, si = 2 un vecteur contiendra les mfcc et leurs dérivées premières, si = 3, il y aura aussi les dérivées secondes.
    :param int shift: 			Décalage/Overlapping. Nombre de vecteurs en commun entre le dernier item de données extrait et le prochain. Attention ! Laisser à -1 pour désactiver l'overlapping. Introduire un overlapping donne des résultats surestimés en apprentissage (voir rapport de stage).
    :param int n_datums: 		the first n_datums will be extracted. -1 to extract data from the whole file
    :param int n_datums: 		Nombre d'items donnée à lire dans le fichier de données brutes avant l'arrêt du script. -1 = Aller jusqu'à la fin du fichier.
    :param float test_percentage: 	Rapport attendu du nombre de données à mettre en généralisation divisé par le nombre de données à mettre en apprentissage. (\*100 = pourcentage de données en test)
    :return: 				Les bases de données train et test (sous forme de classe DataSet)

    """
    train = DataSet(format_data_path, "train")
    test = DataSet(format_data_path, "test") 
    data = []
    datum = []
    feature_list = []
    line_count = 0
    total_line_count = 0
    for feature in raw_data_file:
        line_count += 1
        if feature[0] == ' ':
            # New data vector
            feature_list = feature.split()
            if feature_list[-1] == ']': feature_list.pop() # remove ending "]" for the last vector of the signal
            datum.append([ float(x) for x in feature_list[:(20*n_components)] ])
            if len(datum) >= n_vectors:
                # Append the datum
                data.append(datum)
                # Shift the datum
                datum = datum[shift:] if shift > 0 else []
                if len(data)%20000 == 0: print "extract data >> ", len(data), " datums extracted for", line_count, "lines read"
        else:
            # New signal
            new_str_label = feature.split('#')[0]
            if new_str_label != DataSet.str_label:
                if data:
                    # There is data to split in train/test
                    DataSet.split_train_test(data, test_percentage, train, test)
                    # Append to files
                    train.flush_buffer()
                    test.flush_buffer()
                    data = []
                    print "SPLIT : ", "train =", len(train), " - test =", len(test)
                    print "Line count for this label : ", line_count
                print "TOTAL : ", len(train)+len(test), " datums extracted for", total_line_count + line_count, "lines read"
                if n_datums > 0 and len(train) + len(test) >= n_datums: break
                # Update current label
                DataSet.update_label(new_str_label)
                print "New LABEL : ", DataSet.str_label, "int : ", DataSet.int_label
                total_line_count += line_count
                line_count = 0
            datum = []
    print "extract data >> GRAND TOTAL : ", (len(train) + len(test)), " datums extracted for", total_line_count + line_count, "lines read"
    return train, test
Пример #6
0
 def setField(self, label, arr, **kwargs):
     """Set the given array `arr` as the new array of the field specfied by
     `label`."""
     DataSet.setField(self, label, arr, **kwargs)
     # refresh dimensions, in case any of these fields were modified
     if label == 'input':
         self.indim = self.getDimension('input')
     elif label == 'target':
         self.outdim = self.getDimension('target')
Пример #7
0
 def load_training_set(self, filename, encoding='UTF-8'):
     """\
     Load the given training data set into memory and strip it if
     configured to via the train_part parameter.
     """
     log_info('Loading training data set from ' + str(filename) + '...')
     train = DataSet()
     train.load_from_arff(filename, encoding)
     if self.train_part < 1:
         train = train.subset(0, int(round(self.train_part * len(train))),
                              copy=False)
     return train
Пример #8
0
def parse(path, crawl = False):
    if crawl == True:
        raise StandardError()
    ret = DataSet()
    filenames = os.listdir(path)
    for filename in filenames:
        #TODO : check validity
        (fname, width, height, chans, bboxes) \
            = parse_file(os.path.join(path, filename))
        fname = os.path.basename(fname)
        for bbox in bboxes:
            ret.add_obj(fname, bbox, height, width)
    return ret
Пример #9
0
def ResultsToXY(sets,x,y,foreach=[]):
    """ combines observable x and y to build a list of DataSet with y vs x
 
    this function is used to collect data from a hierarchy of DataSet objects, to prepare plots or evaluation.
    the inner-most list has to contain one DataSet with props['observable'] = x and one props['observable'] = y,
    this will be the pair x-y used in the collection.

    The parameters are:
      sets:    hierarchy of datasets where the inner-most list must contain to pair x-y
      x:       the name of the observable to be used as x-value of the collected results 
      y:       the name of the observable to be used as y-value of the collected results 
      foreach: an optional list of properties used for grouping the results. A separate DataSet object is created for each unique set of values of the specified parameers.

    The function returns a list of DataSet objects.
    """
    
    dd = depth(sets)
    if dd < 2:
        raise Exception('The input hierarchy does not provide a unique pair x-y. The input structure has to be a list of lists as minimum. pyalps.groupSets might help you.')
    
    hgroups = flatten(sets, fdepth=-1)
    
    foreach_sets = {}
    for gg in hgroups:
        xset = None
        yset = None
        for d in gg:
            if d.props['observable'] == x:
                xset = d
            if d.props['observable'] == y:
                yset = d
        if xset is None or yset is None:
            continue
        
        common_props = dict_intersect([d.props for d in gg])
        fe_par_set = tuple((common_props[m] for m in foreach))
        
        if not fe_par_set in foreach_sets:
            foreach_sets[fe_par_set] = DataSet()
            foreach_sets[fe_par_set].props = common_props
            foreach_sets[fe_par_set].props['xlabel'] = x
            foreach_sets[fe_par_set].props['ylabel'] = y
        
        if len(xset.y) == len(yset.y):
            foreach_sets[fe_par_set].x = np.concatenate((foreach_sets[fe_par_set].x, xset.y))
            foreach_sets[fe_par_set].y = np.concatenate((foreach_sets[fe_par_set].y, yset.y))
        elif len(xset.y) == 1:
            foreach_sets[fe_par_set].x = np.concatenate((foreach_sets[fe_par_set].x, np.array( [xset.y[0]]*len(yset.y) )))
            foreach_sets[fe_par_set].y = np.concatenate((foreach_sets[fe_par_set].y, yset.y))
    
    for k, res in foreach_sets.items():
        order = np.argsort(res.x, kind = 'mergesort')
        res.x = res.x[order]
        res.y = res.y[order]
        res.props['label'] = ''
        for p in foreach:
            res.props['label'] += '%s = %s ' % (p, res.props[p])
        
    return foreach_sets.values()
Пример #10
0
    def load_test_data(self, sessions_df):
        data_df = read_from_csv(self.task_core.test_data_file, self.task_core.n_seed
                                #, max_rows=50000
                                )

        cache_file = os.path.join(self.task_core.cache_dir, 'features_test_' + str(len(data_df.index)) + '.p')
        if os.path.isfile(cache_file):
            print('Loading test features from file')
            x = DataSet.load_from_file(cache_file)
        else:
            x = ds_from_df(data_df, sessions_df, True)
            print('saving test features to file')
            DataSet.save_to_file(x, cache_file)

        return x
 def __reduce__(self):
     # FIXME: This does actually not feel right: We have to use the DataSet
     # method here, although we inherit from sequential dataset. 
     _, _, state, _, _ = DataSet.__reduce__(self)
     creator = self.__class__
     args = self.statedim, self.actiondim
     return creator, args, state, iter([]), iter({})
Пример #12
0
    def _trim_data(self, extension_fraction=None, max_interval=None):
        """
        Toss out data outside of (extended) view range, and closer than max_interval seconds apart.
        """
        if extension_fraction is None:
            start_stamp = self._start_stamp
            end_stamp   = self._end_stamp
        else:
            extension = rospy.Duration((self._end_stamp - self._start_stamp).to_sec() * extension_fraction)
            if extension.to_sec() >= self._start_stamp.to_sec():
                start_stamp = rospy.Time(0, 1)
            else:
                start_stamp = self._start_stamp - extension
            end_stamp = self._end_stamp + extension

        min_x = (start_stamp - self._timeline.start_stamp).to_sec()
        max_x = (end_stamp   - self._timeline.start_stamp).to_sec()

        for series in list(self._data.keys()):
            points     = self._data[series].points
            num_points = len(points)

            trimmed_points = []

            if num_points > 0 and points[0][0] < max_x and points[-1][0] > min_x:
                first_index = None
                last_x = None
                for i, (x, y) in enumerate(points):
                    if x >= min_x:
                        trimmed_points.append((x, y))
                        first_index = i
                        last_x = x
                        break

                if first_index is not None:
                    for i, (x, y) in enumerate(points[first_index + 1:]):
                        if x > max_x:
                            break

                        if (max_interval is None) or (x - last_x >= max_interval):
                            trimmed_points.append((x, y))
                            last_x = x

            new_data = DataSet()
            new_data.set(trimmed_points)

            self._data[series] = new_data
Пример #13
0
    def load_train_data(self, sessions_df):
        data_df = read_from_csv(self.task_core.data_file, self.task_core.n_seed
                                #, max_rows=50000
                                )

        cache_file = os.path.join(self.task_core.cache_dir, 'features_train_' + str(len(data_df.index)) + '.p')
        if os.path.isfile(cache_file):
            print('Loading train features from file')
            x = DataSet.load_from_file(cache_file)
        else:
            x = ds_from_df(data_df, sessions_df, False)
            print('saving train features to file')
            DataSet.save_to_file(x, cache_file)

        labels = data_df['country_destination'].values
        y = le_.transform(labels)
        return x, y
Пример #14
0
def ds_from_df(data_df, sessions_df, is_test):
    print('ds_from_df <<')
    data_df = add_features(data_df)
    data_df = add_sessions_features(data_df, sessions_df)
    if not is_test:
        data_df = data_df.drop(['country_destination'], axis=1)
    print('ds_from_df >>')
    return DataSet.create_from_df(data_df)
Пример #15
0
 def exp_(self):
     #"""
     data = DataSet()
     self.quick = DataSet()
     data.dataimport("D:\Dropbox\St Andrews\IT\IS5189 MSc Thesis\\02 Data\InnoCentive_Challenge_9933493_training_data.csv")
     data.labelencode(columns=self.configLE)
     xtest, xtrain, ytest, ytrain = data.split(quick=True)
     self.quick.import_split(xtest, xtrain, ytest, ytrain)
     self.output_str("10 percent of original dataset loaded (into train. Testset is 90 percent).")
     rows_train = len(xtrain)
     self.feedback("Challenge data loaded. self.quick init with " + str(rows_train) + " rows.")
     correlation_list, descstats = self.quick.correlation()
     self._output_last(correlation_list)
     #print(test)
     #a = test.sort_values(by='Correlation', ascending=True).head(20)
     #b = test.sort_values(by='Correlation',ascending=False).head(20)
     #print(a)
     #print(b)
     #print(descstats)
     #self.quick.descstats()
     #"""
     #Clock.schedule_once(lambda dt: self.feedback("this is good"), -1)
     #descstats = data.descstats(self.configLE)
     ############################################################
     # df is short for DataFrame , to make it more readable when manipulating the Pandas DataFrame.
     # Might be easier (and is shorter) to read by developers as an in house var name.
     threshold = 0.7
     df = correlation_list[correlation_list['Correlation'] > threshold]
     df = df.sort_values(by='Correlation',ascending=False)
     column_a_b = df['Var1']
     column_a_b = column_a_b.append(df['Var2'])
     print(df[df['Var1'] == 'C31'])
     print(column_a_b.value_counts())
     #print(df.head(10))
     print(pd.crosstab(df['Var1'], df['Var2']))
Пример #16
0
 def exp_quick_load(self):
     self.output_str("Import.")
     global data
     data = DataSet()
     data.dataimport("D:\Dropbox\St Andrews\IT\IS5189 MSc Thesis\\02 Data\InnoCentive_Challenge_9933493_training_data.csv")
     self.loaded = True
     self.output_str("Label Encode.")
     data.labelencode(columns=self.configLE)
     self.output_str("Split (quick = True).")
     data.split(target_column_name=self.configCV['target_value'], test_set_size=self.configCV['test_set_size'],
                seed=self.configCV['seed'], random_state_is=self.configCV['random_state_is'],quick=True)
     self.update_overview(trainrows=len(data.X_train), testrows=len(data.X_test),
                          ncols=len(data.X_train.columns.values))
     self.output_str("Function 'exp_quick_load()' finished running.")
     data.descstats(self.configLE,write=True,workdir=self.configGeneral['workdir'])
Пример #17
0
 def __init__(self):
     conf = Configuration()
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.mongo = MongoDB(self.ds.db,self.ds.collection)
     self.tweet=""
     self.tokens = ""
     self.i = 0
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
Пример #18
0
 def __init__(self,conf,q):
     self.ptext = TextProcess(conf)
     self.ds = DataSet(conf)
     self.cleaner = KeyCleaner()
     self.enable_translation = self.ptext.translation
     self.translation_store = self.ptext.translation_store
     self.tweets = q         # Tweets queue
     self.tweet = ""
     self.tokens = ""
     self.i = 0
     Thread.__init__(self)
Пример #19
0
def parse(filen, crawl = False):
    if crawl == True:
        raise StandardError()
    file = open(filen, "r")
    ret = DataSet()
    for line in file:
        line = line.strip().rstrip()
        splited = line.split()
        filename = splited[0]
        (left_eye_x, left_eye_y, right_eye_x, right_eye_y,
         nose_x, nose_y, left_corner_mouth_x, left_corner_mouth_y,
         center_mouth_x, center_mouth_y, right_corner_mouth_x,
         right_corner_mouth_y) = tuple([float(a) for a in splited[1:]])
        ret.add_obj(filename, EyesNoseMouth(Point(left_eye_x, left_eye_y),
                                            Point(right_eye_x, right_eye_y),
                                            Point(nose_x, nose_y),
                                            Point(left_corner_mouth_x, left_corner_mouth_y),
                                            Point(center_mouth_x, center_mouth_y),
                                            Point(right_corner_mouth_x, right_corner_mouth_y)))
    file.close()
    return ret
Пример #20
0
def get_blend_feature_or_load_from_cache(
    classifier,
    scale,
    classes_count,
    x_train,
    y_train,
    x_test,
    feature_prefix,
    random_state,
    cache_dir,
    n_folds,
    bagging_count,
):

    file_suffix = "_cl" + str(classes_count) + "_" + feature_prefix + "fld" + str(n_folds) + "_bag" + str(bagging_count)
    cache_file_train = os.path.join(cache_dir, "f_train_" + str(len(x_train.ids_)) + file_suffix + ".p")
    cache_file_test = os.path.join(cache_dir, "f_test_" + str(len(x_test.ids_)) + file_suffix + ".p")

    if os.path.isfile(cache_file_train) and os.path.isfile(cache_file_test):
        print("loading features " + feature_prefix + " from files")
        feature_train = DataSet.load_from_file(cache_file_train)
        feature_test = DataSet.load_from_file(cache_file_test)
    else:
        feature_train, feature_test = get_blend_feature(
            classifier, scale, classes_count, x_train, y_train, x_test, feature_prefix, random_state, n_folds
        )
        print("saving features " + feature_prefix + " to files")
        DataSet.save_to_file(feature_train, cache_file_train)
        DataSet.save_to_file(feature_test, cache_file_test)

    return feature_train, feature_test
Пример #21
0
def parse(filen, crawl = False):
    file = open(filen, "r")
    ret = DataSet()
    for line in file:
        line = line.strip().rstrip()
        splited = line.split()
        filename = splited[0]
        # filename = filename[filename.rfind("/")+1:]
        # filename = filename[:filename.rfind(".")]
        height = int(splited[1])
        width = int(splited[2])
        class_id = int(splited[3])
        (confidence, x, y, x2, y2) = tuple([float(a) for a in splited[4:]])
        #if confidence > parse_confidence_min: #TODO
        if hratio != None:
            height = y2 - y
            height2 = height * hratio
            y += (height - height2) / 2.0
            y2 = y + height2
        if wratio != None:
            width = x2 - x
            width2 = width * wratio
            x += (width - width2) / 2.0
            x2 = x + width2
        if whratio != None:
            height = y2 - y
            width = x2 - x
            width2 = height * whratio
            x += (width - width2) / 2.0
            x2 = x + width2
        bb = BoundingBox(x, y, x2, y2)
        area = bb.area()
        if (min_area == None or area >= min_area) and \
                (max_area == None or area <= max_area):
            ret.add_obj(filename, bb)
    file.close()
    # print summary
    print 'Dataset ' + str(filen) + ' has ' + str(len(ret)) + ' images and ' \
          + str(ret.get_nobjs()) + ' positive objects.'
    return ret
Пример #22
0
  def train(self, learning_rate, training_epochs, batch_size, keep_prob):
    # Load dataset for training and testing
    self.dataset = DataSet()

    # Define size of output
    self.Y = tf.placeholder(tf.float32, [None, 10], name='Y')
    # Define cost function
    self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.Y))
    # Define optimization method
    self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.cost)

    # Start logger
    if self.log:
        tf.summary.scalar('cost', self.cost)
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter('./log_train', self.sess.graph)

    self.sess.run(tf.global_variables_initializer())
    self.sess.run(tf.local_variables_initializer())

    print('Training...')
    weights = []
    # For each epoch, feed training data and perform updating parameters
    for epoch in range(training_epochs):
        avg_cost = 0
        # Number of batches = size of training set / batch_size
        total_batch = int(self.dataset.get_train_set_size() / batch_size)

        # For each batch 
        for i in range(total_batch + 1):
            # Get next batch to feed to the network
            batch_xs, batch_ys = self.dataset.next_batch(batch_size)
            feed_dict = {
                self.X: batch_xs.reshape([batch_xs.shape[0], 28, 28, 1]),
                self.Y: batch_ys,
                self.keep_prob: keep_prob
            }

            weights, summary, c, _ = self.sess.run([self.parameters, self.merged, self.cost, self.optimizer],
                                                   feed_dict=feed_dict)
            avg_cost += c / total_batch

        if self.log:
            self.train_writer.add_summary(summary, epoch + 1)

        print('Epoch:', '%02d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

    print('Training finished!')

    saver = tf.train.Saver()
    save_path = saver.save(self.sess, model_dir + "/mnist_lenet.ckpt")
    print("Trainned model is saved in file: %s" % save_path)
Пример #23
0
    def __init__(self, inp, target):
        """Initialize an empty supervised dataset.

        Pass `inp` and `target` to specify the dimensions of the input and
        target vectors."""
        DataSet.__init__(self)
        if isscalar(inp):
            # add input and target fields and link them
            self.addField('input', inp)
            self.addField('target', target)
        else:
            self.setField('input', inp)
            self.setField('target', target)

        self.linkFields(['input', 'target'])

        # reset the index marker
        self.index = 0

        # the input and target dimensions
        self.indim = self.getDimension('input')
        self.outdim = self.getDimension('target')
Пример #24
0
def get_region(region):

    """
        The main endpoint to get the information on the given region
    
        :param region: The genomic region who's data is to be extracted. ( chrom:start-end )
        :type: str
        
        ADDITIONAL PARAMETERS of the endpoint :
        These parameters are to be added to the query url as so : /region/<string:region> **?param=<string>**

        :param dataset: Name of the dataset in which the region's data is to be fetched.
        :type: str


        :return: A JSONify dict with the the formated data under the "response" key.
        :rtype: dict

    """

    return_data = defaultdict(list)

    query_string = request.query_string.decode("utf-8")
    querys = query_string.split("&")


    datasets = []
    r = Region(region.split(":")[0], region.split(":")[1].split("-")[0],region.split("-")[1])
    for query in querys:
        if query.split("=")[0] == "dataset":
            dataset = DataSet(query.split("=")[1])

            data = dataset.get_region(r)
            dataset_name = os.path.splitext(os.path.basename(query.split("=")[1]))[0]

            return_data[dataset_name] = data


    return jsonify({"response":return_data, "sucess": 1})
 def __init__(self, statedim, actiondim):
     """ initialize the reinforcement dataset, add the 3 fields state, action and 
         reward, and create an index marker. This class is basically a wrapper function
         that renames the fields of SupervisedDataSet into the more common reinforcement
         learning names. Instead of 'episodes' though, we deal with 'sequences' here. """
     DataSet.__init__(self)
     # add 3 fields: input, target, importance
     self.addField('state', statedim)
     self.addField('action', actiondim)
     self.addField('reward', 1)
     # link these 3 fields
     self.linkFields(['state', 'action', 'reward'])
     # reset the index marker
     self.index = 0
     # add field that stores the beginning of a new episode
     self.addField('sequence_index', 1)
     self.append('sequence_index', 0)
     self.currentSeq = 0
     self.statedim = statedim
     self.actiondim = actiondim
 
     # the input and target dimensions (for compatibility)
     self.indim = self.statedim
     self.outdim = self.actiondim
 def __init__(self, config):
     self.config = config
     self.data = DataSet(self.config)
     self.add_placeholders()
     self.summarizer = tf.summary
     self.net = Network(config)
     self.saver = tf.train.Saver()
     self.epoch_count, self.second_epoch_count = 0, 0
     self.outputs, self.prob = self.net.neural_search()
     self.hyperparams = self.net.gen_hyperparams(self.outputs)
     self.hype_list = [1 for i in range(self.config.hyperparams)] #[7, 7, 24, 5, 5, 36, 3, 3, 48, 64]
     self.reinforce_loss = self.net.REINFORCE(self.prob)
     self.tr_cont_step = self.net.train_controller(self.reinforce_loss, self.val_accuracy)
     self.cNet, self.y_pred = self.init_child(self.hype_list)
     self.cross_loss, self.accuracy, self.tr_model_step = self.grow_child()
     self.init = tf.global_variables_initializer()
     self.local_init = tf.local_variables_initializer()
Пример #27
0
class TweetDB():
    def __init__(self):
        conf = Configuration()
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.mongo = MongoDB(self.ds.db,self.ds.collection)
        self.tweet=""
        self.tokens = ""
        self.i = 0
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store

    def get_tweet_from_db(self):
        where = {
                    "text":{"$exists":"true"},
                    "geo.coordinates":{"$exists":"true"}
                }
        select = {"text":1,"source":1,"geo":1, "user":1,"retweet_count":1,"created_at":1}
        results = self.mongo.find(where,select)
        return results

    def process_tweets(self):
        tweets = self.get_tweet_from_db()
        for rawTweet in tweets:
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1


    def get_tweet_count(self):
        return self.i
Пример #28
0
 def load(self, path, filename):
     global last_path
     global last_filename
     global data
     last_path = path
     last_filename  = filename[0]
     try:
         data = DataSet()
         data.dataimport(filename[0])
         self.loaded = True
     except (RuntimeError, TypeError, NameError):
         data.dprint("Error: most likely not a csv file.")
     self.output_str("Successfully loaded the data set.")
     self.feedback("Fileimport completed")
     if self.configGeneral['desc_stats_on_load']:
         data.descstats(self.configLE)
         self.output_str("Descriptive statistics performed.")
     ncols = len(data.information())
     # Get the filename and cut it to fit the GUI..
     # Filename only used to remind the user of which dataset has been loaded.
     head, tail = os.path.split(filename[0])
     fname = tail[:5]+ "." + tail[-4:]
     self.update_overview(fname=fname,ncols=ncols)
     self.dismiss_popup()
Пример #29
0
class ProcessTweets(Thread):
    def __init__(self,conf,q):
        self.ptext = TextProcess(conf)
        self.ds = DataSet(conf)
        self.cleaner = KeyCleaner()
        self.enable_translation = self.ptext.translation
        self.translation_store = self.ptext.translation_store
        self.tweets = q         # Tweets queue
        self.tweet = ""
        self.tokens = ""
        self.i = 0
        Thread.__init__(self)
        
    def run(self):
        while True:
            rawTweet = self.tweets.get()
            if "text" in rawTweet:
                tokens = {}
                self.ptext.set_tweet_text(rawTweet['text'])
                self.ptext.set_tweet_source(rawTweet['source'])
                self.ptext.process_text()
                rawTweet['source'] = self.ptext.get_tweet_source()
                rawTweet['text'] = self.ptext.get_tweet_text()
                self.tokens = self.ptext.get_tweet_tokens()
                tokens['tokens'] = self.tokens
                rawTweet.update(tokens)
                self.tweet = self.cleaner.unset_tweet_keys(rawTweet)

                if not self.ptext.get_translate_status():
                    self.ds.output_tweet(self.tweet)
                    self.i +=  1
                else:
                    if self.translation_store:
                        if self.enable_translation:
                            if not self.ptext.get_translate_failed():
                                self.ds.output_tweet(self.tweet)
                                self.i +=  1
                        else:
                            self.ds.output_tweet(self.tweet)
                            self.i +=  1

                self.tweets.task_done()

    def get_tweet_count(self):
        return self.i
Пример #30
0
    def train(self, user_limit, start_learning_rate, training_steps, decay_rate):
        # data set
        train_x, train_y = DataSet(user_limit, self.time_step).lstm_train()

        # error and optimize function
        with tf.name_scope('train'):
            error = tf.reduce_mean(tf.abs(self.prd - self.y))
            tf.summary.scalar('error', error)
            # Dynamic learning rate
            global_step = tf.placeholder(tf.int16, name='global_step')
            learning_rate = tf.train.exponential_decay(start_learning_rate, global_step, training_steps, decay_rate)
            tf.summary.scalar('learning_rate', learning_rate)
            update_op = tf.train.AdamOptimizer(learning_rate).minimize(error)

        # Run session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # Merge summaries
            merged = tf.summary.merge_all()
            summary_writer = tf.summary.FileWriter('saved_models/%s_%d/' % (self.model_name, self.hidden_unit),
                                                   sess.graph)

            # Initialize global variables
            sess.run(tf.global_variables_initializer())

            # Initialize mapping matrix
            map_file = tf.train.latest_checkpoint('saved_models/Prl_SVD_%d/' % self.map_size)
            self.map_saver.restore(sess, map_file)

            # Start learning
            err_sum = 0
            turns = 0
            data_len = len(train_x)
            for start in range(0, data_len * training_steps, self.batch_size):
                end = start + self.batch_size
                curr_step = start // data_len

                if curr_step == end // data_len:
                    feed_dict = {
                        self.x: train_x[start % data_len:end % data_len],
                        self.y: train_y[start % data_len:end % data_len],
                        global_step: curr_step
                    }
                    _, curr_err, _ = sess.run([update_op, error, learning_rate], feed_dict=feed_dict)
                    err_sum += curr_err
                    turns += 1
                else:
                    feed_dict = {
                        self.x: train_x[start % data_len:] + train_x[:end % data_len],
                        self.y: train_y[start % data_len:] + train_y[:end % data_len],
                        global_step: curr_step
                    }
                    _, curr_err, curr_lr, summary = sess.run([update_op, error, learning_rate, merged],
                                                             feed_dict=feed_dict)
                    err_sum += curr_err
                    turns += 1

                    # Write summaries
                    summary_writer.add_summary(summary, global_step=curr_step)

                    print('Step %d: error = %g, learning rate = %g' % (curr_step, err_sum / turns, curr_lr))
                    err_sum = 0
                    turns = 0

            # Save model
            saver = tf.train.Saver()
            saver.save(sess, 'saved_models/%s_%d/' % (self.model_name, self.hidden_unit), global_step=training_steps)
Пример #31
0
class Model:
    def __init__(self, args):
        self.dataName = args.dataName
        self.dataSet = DataSet(self.dataName)
        self.shape = self.dataSet.shape
        self.maxRate = self.dataSet.maxRate
        self.train = self.dataSet.train
        self.test = self.dataSet.test
        self.negNum = args.negNum
        #############
        self.initializer = args.initializer
        self.activation_func = args.activation
        self.regularizer_rate = args.regularizer
        self.inference()
        self.dropout = args.dropout
        self.embed_size = args.embed_size
        #############
        #        self.testNeg = self.dataSet.getTestNeg(self.test, 99)
        self.maxEpochs = args.maxEpochs
        self.batchSize = args.batchSize
        self.topK = args.topK
        self.earlyStop = args.earlyStop
        self.add_embedding_matrix()

        self.add_placeholders()
        self.add_model()
        self.add_loss()
        self.lr = args.lr
        self.add_train_step()
        self.checkPoint = args.checkPoint
        self.init_sess()

    def inference(self):
        """ Initialize important settings """
        self.regularizer = tf.contrib.layers.l2_regularizer(
            self.regularizer_rate)

        if self.initializer == 'Normal':
            self.initializer = tf.truncated_normal_initializer(stddev=0.01)
        elif self.initializer == 'Xavier_Normal':
            self.initializer = tf.contrib.layers.xavier_initializer()
        else:
            self.initializer = tf.glorot_uniform_initializer()

        if self.activation_func == 'ReLU':
            self.activation_func = tf.nn.relu
        elif self.activation_func == 'Leaky_ReLU':
            self.activation_func = tf.nn.leaky_relu
        elif self.activation_func == 'ELU':
            self.activation_func = tf.nn.elu

    def add_placeholders(self):
        self.user = tf.placeholder(shape=(None, ),
                                   dtype=tf.int32,
                                   name="userid")
        self.item = tf.placeholder(shape=(None, ),
                                   dtype=tf.int32,
                                   name="itemid")
        self.rate = tf.placeholder(shape=(None, ),
                                   dtype=tf.float32,
                                   name='rate')
        self.drop = tf.placeholder(tf.float32, name="drop")

    def add_embedding_matrix(self):
        self.user_Embedding = tf.Variable(tf.truncated_normal(
            shape=[self.shape[0], self.embed_size],
            dtype=tf.float32,
            mean=0.0,
            stddev=0.01),
                                          name="user_Embedding")
        self.item_Embedding = tf.Variable(tf.truncated_normal(
            shape=[self.shape[1], self.embed_size],
            dtype=tf.float32,
            mean=0.0,
            stddev=0.01),
                                          name="item_Embedding")

    def add_model(self):
        self.user_input = tf.nn.embedding_lookup(self.user_Embedding,
                                                 self.user)
        self.item_input = tf.nn.embedding_lookup(self.item_Embedding,
                                                 self.item)

        with tf.name_scope("MNN"):
            self.interaction = tf.concat([self.user_input, self.item_input],
                                         axis=-1,
                                         name='interaction')

            self.layer1_MLP = tf.layers.dense(
                inputs=self.interaction,
                units=self.embed_size,
                activation=self.activation_func,
                kernel_initializer=self.initializer,
                kernel_regularizer=self.regularizer,
                name='layer1_MLP')
            self.layer1_MLP = tf.layers.dropout(self.layer1_MLP,
                                                rate=self.dropout)

            self.layer2_MLP = tf.layers.dense(
                inputs=self.layer1_MLP,
                units=self.embed_size // 2,
                activation=self.activation_func,
                kernel_initializer=self.initializer,
                kernel_regularizer=self.regularizer,
                name='layer2_MLP')
            self.layer2_MLP = tf.layers.dropout(self.layer2_MLP,
                                                rate=self.dropout)
            # 就是你在训练的时候想拿掉多少神经元,按比例计算。0就是没有dropout,1就是整个层都没了

            self.layer3_MLP = tf.layers.dense(
                inputs=self.layer2_MLP,
                units=self.embed_size // 4,
                activation=self.activation_func,
                kernel_initializer=self.initializer,
                kernel_regularizer=self.regularizer,
                name='layer3_MLP')
            self.layer3_MLP = tf.layers.dropout(self.layer3_MLP,
                                                rate=self.dropout)
            self.logits = tf.layers.dense(inputs=self.layer3_MLP,
                                          units=1,
                                          activation=None,
                                          kernel_initializer=self.initializer,
                                          kernel_regularizer=self.regularizer,
                                          name='predict')
            self.logits_dense = tf.reshape(self.logits, [-1])

    def add_loss(self):
        losses = tf.square(self.rate - self.logits_dense)
        self.loss = tf.reduce_sum(losses)

    def add_train_step(self):
        '''
        global_step = tf.Variable(0, name='global_step', trainable=False)
        self.lr = tf.train.exponential_decay(self.lr, global_step,
                                             self.decay_steps, self.decay_rate, staircase=True)
        '''
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_step = optimizer.minimize(self.loss)

    def init_sess(self):
        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
        self.config.allow_soft_placement = True
        self.sess = tf.Session(config=self.config)
        self.sess.run(tf.global_variables_initializer())

        self.saver = tf.train.Saver()
        if os.path.exists(self.checkPoint):
            [os.remove(f) for f in os.listdir(self.checkPoint)]
        else:
            os.mkdir(self.checkPoint)

    def run(self):
        best_hr = -1
        best_NDCG = -1
        best_epoch = -1
        loss = np.inf
        print("Start Training!")
        for epoch in range(self.maxEpochs):
            #            print("="*20+"Epoch ", epoch, "="*20)
            loss_temp = self.run_epoch(self.sess)
            if loss_temp < loss:
                loss = loss_temp
            else:
                break
            print("Epoch:", epoch, "loss:", loss_temp)
            self.saver.save(self.sess, self.checkPoint + 'model.ckpt')
        print("Training complete!")

    def run_epoch(self, sess, verbose=10):
        train_u, train_i, train_r = self.dataSet.getInstances(self.train)

        # train_set = {'user':train_u,"item":train_i,'rate':train_r}
        # dataset = tf.data.Dataset.from_tensor_slices(train_set)
        # dataset = dataset.shuffle(100000).batch(self.batchSize)
        # iterator = tf.data.Iterator.from_structure(dataset.output_types,
        #                                            dataset.output_shapes)
        # sess.run(iterator.make_initializer(dataset))
        train_len = len(train_u)
        shuffled_idx = np.random.permutation(np.arange(train_len))
        train_u = train_u[shuffled_idx]
        train_i = train_i[shuffled_idx]
        train_r = train_r[shuffled_idx]

        num_batches = len(train_u) // self.batchSize + 1

        losses = []
        for i in range(num_batches):
            min_idx = i * self.batchSize
            max_idx = np.min([train_len, (i + 1) * self.batchSize])
            train_u_batch = np.array(train_u[min_idx:max_idx])
            train_i_batch = train_i[min_idx:max_idx]
            train_r_batch = train_r[min_idx:max_idx]
            print("ssdsdsdds", train_u_batch.shape)
            print(train_u_batch)

            feed_dict = self.create_feed_dict(train_u_batch, train_i_batch,
                                              train_r_batch, self.drop)
            _, tmp_loss = sess.run([self.train_step, self.loss],
                                   feed_dict=feed_dict)
            losses.append(tmp_loss)
        loss = np.mean(losses)
        print("\nMean loss in this epoch is: {}".format(loss))
        return loss

    def create_feed_dict(self, u, i, r=None, drop=None):
        return {self.user: u, self.item: i, self.rate: r, self.drop: drop}

    def evaluate(self, sess, topK):
        def getHitRatio(ranklist, targetItem):
            for item in ranklist:
                if item == targetItem:
                    return 1
            return 0

        def getNDCG(ranklist, targetItem):
            for i in range(len(ranklist)):
                item = ranklist[i]
                if item == targetItem:
                    return math.log(2) / math.log(i + 2)
            return 0

        hr = []
        NDCG = []
        testUser = self.testNeg[0]
        testItem = self.testNeg[1]
        for i in range(len(testUser)):
            target = testItem[i][0]
            feed_dict = self.create_feed_dict(testUser[i], testItem[i])
            predict = sess.run(self.y_, feed_dict=feed_dict)

            item_score_dict = {}

            for j in range(len(testItem[i])):
                item = testItem[i][j]
                item_score_dict[item] = predict[j]

            ranklist = heapq.nlargest(topK,
                                      item_score_dict,
                                      key=item_score_dict.get)

            tmp_hr = getHitRatio(ranklist, target)
            tmp_NDCG = getNDCG(ranklist, target)
            hr.append(tmp_hr)
            NDCG.append(tmp_NDCG)
        return np.mean(hr), np.mean(NDCG)
#               and DATASET3.TXT are within the DATASETS folder and that
#               the paths specified in these files for database and query
#               images folders are correct.
# Author      : Antoni Burguera ([email protected])
# History     : 27-June-2019 - Creation
# Citation    : Please, refer to the README file to know how to properly cite
#               us if you use this software.
###############################################################################

from dataset import DataSet
import matplotlib.pyplot as plt
import sys

# Load three datasets
print('[[ LOADING DATASETS ]]')
dataSet1 = DataSet('DATASETS/DATASET1.TXT')
dataSet2 = DataSet('DATASETS/DATASET2.TXT')
dataSet3 = DataSet('DATASETS/DATASET3.TXT')
print('[[DATASETS LOADED ]]\n\n')

# Let's print the dataSet1 info
print('[[ PRINTINT DATASET1 INFO ]]')
dataSet1.print()
print('[[ DATASET1 PRINTED ]]\n\n')

# Let's compare dataSet1 to itself
print('[[ COMPARING DATASET1 TO ITSELF ]]')
dataSet1.compare(dataSet1)
print('[[ COMPARED ]]\n\n')

# Let's compare dataSet1 to dataSet3
Пример #33
0

def get_train_batch(x, y, batch_size=10):
    '''
    重构generator
    '''
    while 1:
        idx = np.random.randint(0, len(y), batch_size)
        x1 = get_im_cv2(x[0][idx])
        x2 = get_im_cv2(x[1][idx])
        y_train = y[idx]
        yield [x1, x2], y_train


# 加载图片地址
data = DataSet()
images = data.data[:10000]
labels = data.labels[:10000]

same_face = []
diff_face = []

for i in range(9999):
    # 如果两张图片标签相同,则将两个图片作为相同组样本
    if labels[i] == labels[i + 1]:
        same_face.append([images[i], images[i + 1]])
    # 如果两张图片标签不同,则作为差异组样本
    else:
        diff_face.append([images[i], images[i + 1]])

# 转化为numpy.ndarray,便于传入keras构造的神经网络进行计算
    def fit(self, X_train, Y_train, X_val, Y_val, n_epoch=100):
        # initialize log directory
        if tf.gfile.Exists(self.logdir):
            tf.gfile.DeleteRecursively(self.logdir)
        tf.gfile.MakeDirs(self.logdir)

        # load some training params
        n_batch = self.opt_params['batch_size']

        # create saver
        self.saver = tf.train.Saver()

        # summarization
        summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(self.logdir, self.sess.graph)

        # load data into DataSet
        train_data = DataSet(X_train, Y_train)
        val_data = DataSet(X_val, Y_val)

        # train the model
        start_time = time.time()
        step, epoch = 0, train_data.epochs_completed
        while train_data.epochs_completed < n_epoch:

            step += 1

            # load the batch
            # alpha = min((n_epoch - train_data.epochs_completed) / 200, 1.)
            # alpha = 1.0 if epoch < 100 else 0.1
            alpha = 1.0
            batch = train_data.next_batch(n_batch)
            feed_dict = self.load_batch(batch, alpha)

            # take training step
            tr_objective = self.train(feed_dict)
            # tr_obj_snr = 20 * np.log10(1. / np.sqrt(tr_objective) + 1e-8)
            # if step % 50 == 0:
            #   print step, tr_objective, tr_obj_snr

            # log results at the end of each epoch
            if train_data.epochs_completed > epoch:
                epoch = train_data.epochs_completed
                end_time = time.time()

                tr_l2_loss, tr_l2_snr = self.eval_err(X_train,
                                                      Y_train,
                                                      n_batch=n_batch)
                va_l2_loss, va_l2_snr = self.eval_err(X_val,
                                                      Y_val,
                                                      n_batch=n_batch)

                print "Epoch {} of {} took {:.3f}s ({} minibatches)".format(
                    epoch, n_epoch, end_time - start_time,
                    len(X_train) // n_batch)
                print "  training l2_loss/segsnr:\t\t{:.6f}\t{:.6f}".format(
                    tr_l2_loss, tr_l2_snr)
                print "  validation l2_loss/segsnr:\t\t{:.6f}\t{:.6f}".format(
                    va_l2_loss, va_l2_snr)

                # compute summaries for overall loss
                objectives_summary = tf.Summary()
                objectives_summary.value.add(tag='tr_l2_loss',
                                             simple_value=tr_l2_loss)
                objectives_summary.value.add(tag='tr_l2_snr',
                                             simple_value=tr_l2_snr)
                objectives_summary.value.add(tag='va_l2_snr',
                                             simple_value=va_l2_loss)

                # compute summaries for all other metrics
                summary_str = self.sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.add_summary(objectives_summary, step)

                # write summaries and checkpoints
                summary_writer.flush()
                self.saver.save(self.sess,
                                self.checkpoint_root,
                                global_step=step)

                # restart clock
                start_time = time.time()
Пример #35
0
    graph = tf.get_default_graph()

    X = graph.get_tensor_by_name("X:0")
    Y = graph.get_tensor_by_name("Y:0")
    keep_prob = graph.get_tensor_by_name("keep_prob:0")
    logits = graph.get_tensor_by_name("fc2/logits:0")
    softmax = graph.get_tensor_by_name("softmax:0")

    probs, chars = sess.run([logits, softmax], feed_dict={X: character_image.reshape((1, 28, 28, 1)), keep_prob: 1})

    probs = (np.exp(probs) / np.sum(np.exp(probs))) * 100    
    idx = np.argmax(chars)
    return (probs[0, idx], idx)


ds = DataSet(test_prob=1, one_hot=False)
characters = DataGenerator().get_list_characters()

x, y = ds.next_batch_test(1)

print('x.shape', x.shape)
print('y.shape', y.shape)

prob, idx = predict(x)

print('Input character: ', characters[int(y[0])])
print('Predicted: ', characters[idx], ' with probability = ', prob, '%')
print('Result: ', characters[int(y[0])] == characters[idx])
print('-' * 10)
Пример #36
0
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
from dataset import DataSet

batch_size = 100
sample_size = 100
epochs = 3001
steps = 1000
Z_dimension = 100
dataset = DataSet(test_prob=0, one_hot=False)


def conv2d(X,
           filters,
           kernel_size=2,
           strides=2,
           padding='same',
           is_training=True):
    X = tf.layers.conv2d(X,
                         filters,
                         kernel_size,
                         strides=strides,
                         padding=padding)
    X = tf.layers.batch_normalization(X, training=is_training)
    X = tf.nn.leaky_relu(X)
    return X


def deconv(Z,
Пример #37
0
    def execute_runs(self, mode, num_runs, resume=False):
        """
        Executes several training runs, each with different parameters and saves the results
        :param mode: experiment mode.
            MODE_FULL randomizes all parameters including the input data, per run
            MODE_PSA_RUNS generates different datasets and runs the psa separately for each
        :param num_runs: number of runs per experiment to add to the output
        :param resume: whether to resume the runs. if True, the runs will continue until there are num_runs records.
        :return:
        """
        iter_index = -1
        while True:
            iter_index += 1
            if mode == self.MODE_FULL:
                if iter_index == 1:
                    break
                out_dir = '../output/full'
                self.create_dir(out_dir, clean=not resume)
                curr_data = None
            elif mode == self.MODE_PSA_RUNS:
                if iter_index >= len(self.mode_psa_datasets):
                    break
                noise = self.fixed_noise
                dataset_name = self.mode_psa_datasets[iter_index]
                out_dir = '../output/' + dataset_name + '_' + str(noise)
                self.create_dir(out_dir, clean=not resume)
                input_filename = out_dir + '/input.txt'
                if resume and os.path.exists(input_filename):
                    curr_data = DataSet.create_from_file(input_filename)
                    curr_data.noise = noise
                    curr_data.dataset_name = dataset_name
                    assert(curr_data.num_samples() == Run.num_samples)
                else:
                    curr_data = DataSet(dataset_name, num_samples=Run.num_samples, noise=noise)
                    curr_data.save_to_file(input_filename)
            else:
                print("Invalid mode:" + str(mode))
                return

            run_id = 0
            index_filename = out_dir + '/runsInfo.txt'
            print('index table: ' + index_filename)
            if resume and os.path.exists(index_filename):
                index_table = np.genfromtxt(index_filename, dtype=None, delimiter='\t', names=True, autostrip=False)
                if len(index_table) > 0 and 'ID' in index_table.dtype.fields:
                    run_id = index_table['ID'][-1] + 1
                print('Resuming from ID {}'.format(run_id))

            write_header = (not os.path.exists(index_filename)) or (not resume)
            # create write the header for the runs.txt file
            f_runs = open(index_filename, 'a+' if resume else 'w+')
            all_param_info = \
                ([['ID', 'ID', self.PARAM_TYPE_OUTPUT, 'ID'],
                  ['imagePath', 'Image path', self.PARAM_TYPE_OUTPUT, 'Output image path']] +
                 self.param_info() +
                 [['epoch', 'Epoch', self.PARAM_TYPE_INT, 'Number of Epochs (of processing all training data)'],
                  ['iteration', 'Iterations', self.PARAM_TYPE_INT, 'Number of Iterations (of processing a batch)'],
                  ['success', 'Success', self.PARAM_TYPE_OUTPUT, 'Whether the training finished successfully'],
                  ['total_time', 'Total time (ms)', self.PARAM_TYPE_OUTPUT, 'Total time at this epoch'],
                  ['mean_time', 'Mean time (ms)', self.PARAM_TYPE_OUTPUT, 'Mean time per epoch'],
                  ['train_loss', 'Training loss', self.PARAM_TYPE_OUTPUT, 'Training loss at epoch'],
                  ['test_loss', 'Test loss', self.PARAM_TYPE_OUTPUT, 'Test loss at epoch'],
                  ['train_TPR', 'TPR for train', self.PARAM_TYPE_OUTPUT, 'True positive rate for training data'],
                  ['train_FPR', 'FPR for train', self.PARAM_TYPE_OUTPUT, 'False positive rate for training data'],
                  # ['train_TNR', 'TNR for train', self.PARAM_TYPE_OUTPUT, 'True negative rate for training data'],
                  # ['train_FNR', 'FNR for train', self.PARAM_TYPE_OUTPUT, 'False negative Rate for training data'],
                  ['test_TPR', 'TPR for test', self.PARAM_TYPE_OUTPUT, 'True positive rate for test data'],
                  ['test_FPR', 'FPR for test', self.PARAM_TYPE_OUTPUT, 'False positive rate for test data'],
                  # ['test_TNR', 'TNR for test', self.PARAM_TYPE_OUTPUT, 'True negative rate for test data'],
                  # ['test_FNR', 'FNR for test', self.PARAM_TYPE_OUTPUT, 'False negative Rate for test data'],
                  ])

            # save the paramInfo.txt
            with open(out_dir + '/paramInfo.txt', 'w') as fpi:
                fpi.write('\t'.join(self.param_info_header()) + '\n')
                fpi.write('\n'.join(['\t'.join(i) for i in all_param_info]))

            # write the header for the runs.txt
            if write_header:
                f_runs.write('\t'.join([i[0] for i in all_param_info]) + '\n')
                f_runs.flush()
            images_dir = out_dir + '/images'
            runs_dir = out_dir + '/runs'

            self.create_dir(images_dir, clean=not resume)
            self.create_dir(runs_dir, clean=not resume)

            while run_id < num_runs:
                if curr_data is None:
                    self.randomize_data()  # randomize the data every time
                else:
                    self.data = curr_data  # reuse the same data
                self.randomize_training_params()
                # print the parameters
                print('configuration (%d of %d)' % (int(run_id / len(self.epochs_per_config)) + 1,
                                                    int(num_runs / len(self.epochs_per_config))))
                print(', '.join(a[0] + ': ' + a[1] for a in zip(self.param_names(), self.param_str())))

                prev_step = 0
                total_time = 0
                for epoch in self.epochs_per_config:
                    curr_step = int(epoch * self.data.num_samples() / self.nn.batch_size)
                    # curr_step = epoch # in the online demo epoch == iter: https://github.com/tensorflow/playground/blob/67cf64ffe1fc53967d1c979d26d30a4625d18310/src/playground.ts#L898

                    time_start = time.time()

                    # train the network
                    success = True
                    try:
                        train_loss, test_loss = self.nn.train(self.data, restart=False, num_steps=curr_step - prev_step)
                    except:
                        train_loss, test_loss = 1, 1
                        success = False

                    total_time += (time.time() - time_start) * 1000.0
                    mean_time = total_time / epoch

                    try:
                        train_tpr, train_fpr, test_tpr, test_fpr = self.calc_tpr_fpr()
                    except:
                        train_tpr, train_fpr, test_tpr, test_fpr = 0, 1, 0, 1
                        success = False

                    print('(epoch: %d, step: %d), '
                          '(total_time: %g, mean_time: %g), '
                          '(training loss: %g, test loss: %g), '
                          '(train_tpr: %g, train_fpr: %g test_tpr: %g, test_fpr: %g)' %
                          (epoch, curr_step,
                           round(total_time, 2), round(mean_time, 2),
                           round(train_loss, 2), round(test_loss, 2),
                           round(train_tpr, 2), round(train_fpr, 2), round(test_tpr, 2), round(test_fpr, 2)))

                    image_filename = images_dir + '/' + str(run_id) + ".png"
                    run_filename = runs_dir + '/' + str(run_id) + ".txt"
                    self.save_plot(image_filename)
                    self.save_current_run(run_filename)

                    f_runs.write('\t'.join(
                        [str(run_id),
                         image_filename[len(out_dir)+1:]] +
                        self.param_str() +
                        [str(epoch),
                         str(curr_step),
                         str(success),
                         str(round(total_time, 3)),
                         str(round(mean_time, 3)),
                         str(round(train_loss, 3)),
                         str(round(test_loss, 3)),
                         str(round(train_tpr, 3)),
                         str(round(train_fpr, 3)),
                         str(round(test_tpr, 3)),
                         str(round(test_fpr, 3)),
                         ]) +
                                 '\n')
                    f_runs.flush()
                    prev_step = curr_step
                    run_id += 1
                    if run_id >= num_runs:
                        break
            f_runs.close()
Пример #38
0
class Model(object):
    def __init__(self, config):
        self.epoch_count = 0
        self.config = config
        self.data = DataSet(config)
        self.add_placeholders()
        self.summarizer = tf.summary
        self.net = Network(config, self.summarizer)
        self.optimizer = self.config.solver.optimizer
        self.y_pred = self.net.prediction(self.x, self.keep_prob)
        self.loss = self.net.loss_function(self.x, self.y, self.keep_prob)
        self.accuracy = self.net.accuracy(self.y_pred, self.y)
        self.summarizer.scalar("accuracy", self.accuracy)
        self.summarizer.scalar("loss", self.loss)
        self.train = self.net.train_step(self.loss)
        self.B = self.net.B
        self.A = self.net.A
        self.n_epoch_to_decay = list(range(800, 20000, 1000))[::-1]
        self.next_epoch_to_decay = self.n_epoch_to_decay.pop()
        self.saver = tf.train.Saver()
        self.init = tf.global_variables_initializer()
        self.local_init = tf.local_variables_initializer()
        self.kf = KFold(n_splits=10, random_state=0, shuffle=True)

    def add_placeholders(self):
        self.x = tf.placeholder(tf.float32, shape=[None, self.config.features_dim])
        self.y = tf.placeholder(tf.float32, shape=[None, self.config.labels_dim])
        self.keep_prob = tf.placeholder(tf.float32)

    def train_epoch(self, sess, summarizer):
        merged_summary = self.summarizer.merge_all()
        err, accuracy = list(), list()
        X, Y = self.data.get_train()
        for train, val in self.kf.split(X, y=Y):
            feed_dict = {self.x: X[train], self.y: Y[train], self.keep_prob: self.config.solver.dropout}
            # attention!
            summ, _, loss_, accuracy_ = sess.run([merged_summary, self.train,
                                                  self.loss, self.accuracy], feed_dict=feed_dict)
            summarizer.add_summary(summ)
            err.append(loss_)
            accuracy.append(accuracy_)
        return np.mean(err), np.mean(accuracy)

    def do_eval(self, sess, data):
        if data == "validation":
            err, accuracy = list(), list()
            X, Y = self.data.get_validation()
            for train, val in self.kf.split(X, y=Y):
                feed_dict = {self.x: X[val], self.y: Y[val], self.keep_prob: 1}
                loss_, Y_pred, accuracy_ = sess.run([self.loss, self.y_pred, self.accuracy], feed_dict=feed_dict)
                metrics = evaluate(predictions=Y_pred, labels=Y[val])
                err.append(loss_)
                accuracy.append(accuracy_)
            return np.mean(err), np.mean(accuracy), metrics

        if data == "test":
            X, Y = self.data.get_test()
            feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1}
            loss_, Y_pred, accuracy_ = sess.run([self.loss, self.y_pred, self.accuracy], feed_dict=feed_dict)
            metrics = evaluate(predictions=Y_pred, labels=Y)
            return loss_, accuracy_, metrics

    def fit(self, sess, summarizer):
        sess.run(self.init)
        sess.run(self.local_init)
        max_epochs = self.config.max_epochs
        self.epoch_count = 0
        max_micro_f1 = 0
        max_macro_f1 = 0
        while self.epoch_count < max_epochs:
            if self.config.load:
                break
            loss_train, accuracy_train = self.train_epoch(sess, summarizer['train'])
            loss_val, accuracy_val, metrics_val = self.do_eval(sess, "validation")
            if self.epoch_count == self.next_epoch_to_decay:
                if len(self.n_epoch_to_decay) == 0:
                    self.next_epoch_to_decay = -1
                else:
                    self.next_epoch_to_decay = self.n_epoch_to_decay.pop()
                self.config.learning_rate *= self.config.lr_decay_factor
                print('Decaying learning rate ...')
                print(self.config.learning_rate)
            
            if max_micro_f1 < metrics_val['micro_f1'] and max_macro_f1 < metrics_val['macro_f1']:
                print(self.config.ckptdir_path)
                print("cur_max_Mi-F1 = %g, cur_max_Ma-F1 = %g, cur_epoch = %g." % (
                    metrics_val['micro_f1'], metrics_val['macro_f1'], self.epoch_count))
                self.saver.save(sess, self.config.ckptdir_path + "model.ckpt")
            max_micro_f1 = max(max_micro_f1, metrics_val['micro_f1'])
            max_macro_f1 = max(max_macro_f1, metrics_val['macro_f1'])

            if self.epoch_count % 5 == 0:
                print("After %d training epoch(s), Training : Loss = %g, Validation : Loss = %g." % (
                self.epoch_count, loss_train, loss_val))
                print("train_accuracy = %g, val_accuracy = %g." % (accuracy_train, accuracy_val))
                print("Micro-F1 = %g, Macro-F1 = %g." % (metrics_val['micro_f1'], metrics_val['macro_f1']))
            self.epoch_count += 1
        returnDict = {"train_loss": loss_train, "val_loss": loss_val, "train_accuracy": accuracy_train,
                      "val_accuracy": accuracy_val}
        return returnDict

    def add_summaries(self, sess):
        if self.config.load or self.config.debug:
            path_ = os.path.join("../results/tensorboard" + self.config.dataset_name)
        else:
            path_ = os.path.join("../bin/results/tensorboard" + self.config.dataset_name)
        summary_writer_train = tf.summary.FileWriter(path_ + "/train", sess.graph)
        summary_writer_val = tf.summary.FileWriter(path_ + "/val", sess.graph)
        summary_writer_test = tf.summary.FileWriter(path_ + "/test", sess.graph)
        summary_writers = {'train': summary_writer_train, 'val': summary_writer_val, 'test': summary_writer_test}
        return summary_writers
Пример #39
0
def model():
    X_indices = tf.placeholder(tf.int64, name='X_indices', shape=None)
    X_data = tf.placeholder(tf.float32, name='X_data', shape=None)
    X_shape = tf.placeholder(tf.int64, name='X_shape', shape=None)
    '''
    Y_indices = tf.placeholder(tf.int64, name='Y_indices', shape=None)
    Y_data = tf.placeholder(tf.float32, name='Y_data', shape=None)
    Y_shape = tf.placeholder(tf.int64, name='Y_shape', shape=None)
    '''

    X = tf.SparseTensor(indices=X_indices, values=X_data, dense_shape=X_shape)
    #Y = tf.SparseTensor(indices=Y_indices, values=Y_data, dense_shape=Y_shape)
    Y = tf.placeholder(tf.float32, shape=[None, label_dim])
    Wx1 = tf.Variable(tf.random_normal(shape=[feature_dim, 700]))
    bx1 = tf.Variable(tf.random_normal(shape=[700]))
    Wx2 = tf.Variable(tf.random_normal(shape=[700, 983]))
    bx2 = tf.Variable(tf.random_normal(shape=[983]))

    act = tf.nn.relu
    hx1 = act(dot(X, Wx1) + bx1)
    hxe = dot(hx1, Wx2, sparse=False) + bx2
    print(hxe.get_shape())
    loss = ce_loss(hxe, Y)
    patk = tf.metrics.sparse_precision_at_k(labels=tf.cast(Y, tf.int64),
                                            predictions=tf.nn.sigmoid(hxe),
                                            k=3)

    train = tf.train.GradientDescentOptimizer(0.01).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        for epoch in range(200):
            el, c = 0.0, 0
            dataobj = DataSet("./data/delicious/delicious-train", batch_size)
            for x_train, y_train, dummy in dataobj.next_batch(
                    "train", sparse_features=True, sparse_labels=False):
                x_props, y_props = get_sparse_props(
                    x_train), None  #get_sparse_props(y_train)
                feed = {
                    X_indices: x_props[0],
                    X_data: x_props[1],
                    X_shape: x_props[2],
                    Y: y_train
                }  #, Y_indices : y_props[0], Y_data : y_props[1], Y_shape : y_props[2]}
                pl, _ = sess.run([loss, train], feed_dict=feed)
                el += pl
                c += 1
                print("Epoch #{} Loss : {}".format(epoch, pl), end='\r')
            test_obj = DataSet("./data/delicious/delicious-test", 3185)
            x_test, y_test = test_obj.get_test()
            x_props, y_props = get_sparse_props(
                x_test), None  #get_sparse_props(y_test)
            feed = {
                X_indices: x_props[0],
                X_data: x_props[1],
                X_shape: x_props[2],
                Y: y_test
            }  #Y_indices : y_props[0], Y_data : y_props[1], Y_shape : y_props[2]}
            pk = sess.run(patk, feed_dict=feed)
            output = "Epoch #{} Loss : {}, P@K : {}".format(epoch, el / c, pk)
            with open("train_test.log", "a+") as f:
                f.write(output)
            print(output)
Пример #40
0
def train(FLAGS):
    # read data
    dataset = DataSet(fpath=FLAGS.train_file,
                      seqlen=FLAGS.seq_len,
                      n_classes=FLAGS.num_classes,
                      num_feature=FLAGS.num_feature,
                      is_raw=FLAGS.is_raw,
                      need_shuffle=True)
    # set character set size
    FLAGS.charset_size = dataset.charset_size

    with tf.Graph().as_default():
        # get placeholders
        global_step = tf.placeholder(tf.int32)
        placeholders = get_placeholders(FLAGS)

        # prediction
        pred, layers = inference(placeholders['data'],
                                 FLAGS,
                                 for_training=True)
        # loss
        # slim.losses.softmax_cross_entropy(pred, placeholders['labels'])
        # class_weight = tf.constant([[1.0, 5.0]])
        # weight_per_label = tf.transpose( tf.matmul(placeholders['labels']
        #                        , tf.transpose(class_weight)) )
        # loss = tf.multiply(weight_per_label,
        #         tf.nn.softmax_cross_entropy_with_logits(labels=placeholders['labels'], logits=pred))
        # loss = tf.losses.compute_weighted_loss(loss)

        tf.losses.softmax_cross_entropy(placeholders['labels'], pred)
        loss = tf.losses.get_total_loss()

        # accuracy
        _acc_op = tf.equal(tf.argmax(pred, 1),
                           tf.argmax(placeholders['labels'], 1))
        acc_op = tf.reduce_mean(tf.cast(_acc_op, tf.float32))

        # optimization
        train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss)
        # train_op = tf.train.RMSPropOptimizer( FLAGS.learning_rate ).minimize( loss )

        # Create a saver.
        saver = tf.train.Saver(max_to_keep=None)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            if tf.train.checkpoint_exists(FLAGS.prev_checkpoint_path):
                if FLAGS.fine_tuning:
                    logging('%s: Fine Tuning Experiment!' % (datetime.now()),
                            FLAGS)
                    restore_variables = slim.get_variables_to_restore(
                        exclude=FLAGS.fine_tuning_layers)
                    restorer = tf.train.Saver(restore_variables)
                else:
                    restorer = tf.train.Saver()
                restorer.restore(sess, FLAGS.prev_checkpoint_path)
                logging(
                    '%s: Pre-trained model restored from %s' %
                    (datetime.now(), FLAGS.prev_checkpoint_path), FLAGS)
                step = int(
                    FLAGS.prev_checkpoint_path.split('/')[-1].split('-')
                    [-1]) + 1
            else:
                step = 0

            # iter epoch
            # for data, labels in dataset.iter_batch( FLAGS.batch_size, 5 ):
            for data, labels in dataset.iter_once(FLAGS.batch_size):
                start_time = time.time()
                _, loss_val, acc_val = sess.run(
                    [train_op, loss, acc_op],
                    feed_dict={
                        placeholders['data']: data,
                        placeholders['labels']: labels,
                        global_step: step
                    })
                duration = time.time() - start_time

                assert not np.isnan(loss_val), 'Model diverge'

                # logging
                if step > 0 and step % FLAGS.log_interval == 0:
                    examples_per_sec = FLAGS.batch_size / float(duration)
                    format_str = (
                        '%s: step %d, loss = %.2f, acc = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    logging(
                        format_str % (datetime.now(), step, loss_val, acc_val,
                                      examples_per_sec, duration), FLAGS)

                # save model
                if step > 0 and step % FLAGS.save_interval == 0:
                    saver.save(sess, FLAGS.checkpoint_path, global_step=step)

                # counter
                step += 1

            # save for last
            saver.save(sess, FLAGS.checkpoint_path, global_step=step - 1)
Пример #41
0
 def setUp(self):
     '''
     Executed prior to each test.
     '''
     self.ds = DataSet('test', NoSchema)
     return
Пример #42
0
            k = int(train_set_idx[i][0])
            l = int(train_set_idx[j][0])
            doc1 = self.documents[k]
            doc2 = self.documents[l]
            val = self.kernel_obj(doc1, doc2)
            ret[i, j] = val
            ret[j, i] = val
        return ret

    def save_kernel(self):
        self.kernel_obj.save_kernel_entry()


if __name__ == '__main__':
    from dataset import DataSet
    data_set = DataSet()

    # make a small subset for testing
    train_set = data_set.train_set
    train_labels = data_set.train_labels
    test_set = data_set.test_set
    test_labels = data_set.test_labels

    test_model = StringSVM("test_k5_lambda0.8", 5, 0.8)

    try:
        test_model.train(train_set, train_labels)
    except Exception as e:
        # re-raise exception
        raise e
    finally:
Пример #43
0
def train():
	unrelated_vs_all = LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)
	disagree_vs_all = LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)
	agree_vs_all = LinearSVC(penalty='l2', loss='squared_hinge', dual=True, tol=0.0001, C=1.0, multi_class='ovr', fit_intercept=True, intercept_scaling=1, class_weight=None, verbose=0, random_state=None, max_iter=1000)
	# create the training set with lemmatized bodies
	training_set = DataSet("csv/train_stances_csc483583.csv", "csv/lemmatized_bodies.csv")
	# create an original set that has original bodies
	orig_set = DataSet("csv/train_stances_csc483583.csv", "csv/train_bodies.csv")
	stances = training_set.stances
	articles = training_set.articles
	orig_articles = orig_set.articles

	similarity_vectors = []
	similarity_labels = []
	agree_labels = []

	negation_vectors = []
	negation_labels = []

	count = 0
	stanceVal = 0

	for stance in stances:
		count += 1
		print("Training article number: " + str(count))
		headline = stance['Headline']
		bodyID = stance['Body ID']
		#get lemmatized body from DataSet created with lemmatized_bodies.csv
		body_lemmas = articles[bodyID]
		#get the original body from DataSet created with train_bodies.csv
		orig_body = orig_articles[bodyID]
		stance = stance['Stance']
		#get the scores from the features
		similarity_score, similar_sentences, max_similarity, negation_avg = similarity_feature(headline, body_lemmas, orig_body)
		neg = max_similarity.get('Negates')
		if(neg == None):
			neg = 0
		
		max_score = max_similarity.get('Score')
		if(max_score == None):
			max_score = 0.0

		similarity_vectors.append([similarity_score, max_score])
		if(stance == 'unrelated'):
			similarity_labels.append(1)
		else:
			similarity_labels.append(2)

		if(stance == 'agree'):
			agree_labels.append(1)
		else:
			agree_labels.append(2)

		negation_vectors.append([negation_avg])
		if(stance == 'disagree'):
			negation_labels.append(1)
		else:
			negation_labels.append(2)

	np_sim_vectors = np.array(similarity_vectors)
	np_sim_labels = np.array(similarity_labels)
	unrelated_vs_all.fit(np_sim_vectors, np_sim_labels)
	save_object(unrelated_vs_all, 'unrelated_vs_all.pkl')

	np_neg_vectors = np.array(negation_vectors)
	np_neg_labels = np.array(negation_labels)
	disagree_vs_all.fit(np_neg_vectors, np_neg_labels)
	save_object(disagree_vs_all, 'disagree_vs_all.pkl')

	np_agree_labels = np.array(agree_labels)
	agree_vs_all.fit(np_sim_vectors, np_agree_labels)
	save_object(agree_vs_all, 'agree_vs_all.pkl')
Пример #44
0
'''

from __future__ import print_function
import numpy as np 
import tensorflow as tf
from keras.layers.convolutional import UpSampling2D
from keras import optimizers
import math
import os
os.sys.path.append('../')
import dataset.DataSet as DB
os.environ["CUDA_VISIBLE_DEVICES"] ="0"

dropout = 0.5

infodata = DB.Get5Class(ratio = 0.7, tag_array = [0, 0, 1, 1, 1], label_arry=[0, 0, 0, 0, 1], nlabel=0)
x_scale_train, x_scale_test, _, _ = infodata.GetScaleData()

N = infodata.GetSizeTrain()
D = infodata.GetDim()
num_classes = infodata.GetNumClass()

num_fc_1 = 500
learning_rate = 1e-4  
batch_size = 15

training_iters = 2100000
display_step = 14

# tf Graph input
x = tf.placeholder(tf.float32, [None, D])
Пример #45
0
class Run:
    """
    A single run
    """

    num_samples = 200  # always a fixed number of data points
    noise_values = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
    perc_train_values = [10, 20, 30, 40, 50, 60, 70, 80, 90]  # percentage of training to test
    range_batch_size = [1, 30]
    learning_rates = [0.00001, 0.0001, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
    regularization_rates = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1.0, 3.0, 10.0]
    range_hidden = [0, 6]
    range_hidden_neuron = [1, 8]
    epochs_per_config = [50, 100, 200, 400]  # number of epochs to run each nnet configuration for
    activations_names = Classifier.activations_names
    regularization_names = Classifier.regularization_names
    fixed_feature_ids = None

    # for debug:
    """
    num_samples = 200  # always a fixed number of data points
    noise_values = [25]
    perc_train_values = [50]  # percentage of training to test
    range_batch_size = [10, 11]
    learning_rates = [0.1]
    regularization_rates = [3.0]
    range_hidden = [3, 3]
    range_hidden_neuron = [4, 4]
    epochs_per_config = [400]  # number of epochs to run each nnet configuration for
    activations_names = [Classifier.ACTIVATION_TANH]
    regularization_names = [Classifier.REGULARIZATION_NONE]
    fixed_feature_ids = [DataSet.FEATURE_X1SQ, DataSet.FEATURE_X2SQ, DataSet.FEATURE_SIN_X1, DataSet.FEATURE_SIN_X2]
    """

    PARAM_TYPE_INT = 'int'
    PARAM_TYPE_DOUBLE = 'double'
    PARAM_TYPE_STR = 'string'
    PARAM_TYPE_OUTPUT = 'output'

    MODE_FULL = 'full'  # a single directory, with randomized data for each run
    MODE_PSA_RUNS = 'psa_runs'  # a few randomized data, in separate directories

    fixed_noise = 25

    # mode_psa_datasets = DataSet.all_data_names
    mode_psa_datasets = [DataSet.DATA_SPIRAL, DataSet.DATA_XOR, DataSet.DATA_CIRCLE, DataSet.DATA_GAUSS]  # debug

    def __init__(self):
        self.data = None
        self.nn = None

    def randomize_data(self, dataset_name=None, noise=None):
        """
        Build dataset with randomized parameters
        :param dataset_name: dataset name. if None, will randomize
        :param noise: noise [0 .. 50]. if None will randomly pick
        :return: None
        """
        # dataset parameters
        dataset_name = random.choice(DataSet.all_data_names) if dataset_name is None else dataset_name
        noise = random.choice(self.noise_values) if noise is None else noise
        self.data = DataSet(dataset_name, self.num_samples, noise)

    def randomize_training_params(self):
        """
        Creates classifier and network with randomized parameters
        :return: None
        """
        self.nn = Classifier()
        self.nn.perc_train = random.choice(self.perc_train_values)
        self.nn.batch_size = random.randint(*self.range_batch_size)
        self.nn.learning_rate = random.choice(self.learning_rates)
        self.nn.neurons_per_layer = [random.randint(*self.range_hidden_neuron)
                                     for _ in range(random.randint(*self.range_hidden))]
        self.nn.activation_h = random.choice(self.activations_names)
        self.nn.regularization_type = random.choice(self.regularization_names)
        self.nn.regularization_rate = random.choice(self.regularization_rates)

        # select which input features to use
        if self.fixed_feature_ids is not None:
            self.nn.features_ids = self.feature_ids
        else:
            # random
            feature_bits = random.randint(0, pow(2, DataSet.NUM_FEATURES))
            self.nn.features_ids = [i for i in range(DataSet.NUM_FEATURES) if feature_bits & pow(2, i) != 0]

        self.nn.build()

    @staticmethod
    def param_info_header():
        return ['label', 'name', 'type', 'info']

    def param_info(self):
        max_hidden = self.range_hidden[1]
        return ([['data', 'Data', self.PARAM_TYPE_STR, 'Which dataset do you want to use?'],
                 ['noise', 'Noise', self.PARAM_TYPE_INT, 'Noise'],
                 ['training_ratio', 'Training Ratio', self.PARAM_TYPE_INT, 'Ratio of training to test data'],
                 ['batch_size', 'Batch Size', self.PARAM_TYPE_INT, 'Batch Size']] +
                [[f, f, self.PARAM_TYPE_INT, f] for f in DataSet.feature_idx_to_name] +
                [['layer_count', 'Layers Count', self.PARAM_TYPE_INT, 'Number of hidden layers'],
                 ['neuron_count', 'Neurons Count', self.PARAM_TYPE_INT, 'Total number of neurons in hidden layers']] +
                [['H'+str(i), 'H'+str(i), self.PARAM_TYPE_INT, 'H'+str(i)] for i in range(1, max_hidden + 1)] +
                [['learning_rate', 'Learning rate', self.PARAM_TYPE_DOUBLE, 'Learning rate'],
                 ['activation', 'Activation', self.PARAM_TYPE_STR, 'Activation'],
                 ['regularization', 'Regularization', self.PARAM_TYPE_STR, 'Regularization'],
                 ['regularization_rate', 'Regularization rate', self.PARAM_TYPE_DOUBLE, 'Regularization rate']])

    def param_names(self):
        """
        returns array of string names for the parameters. matching 1-to-1 with param_str
        :return:
        """
        info = self.param_info()
        return [info[i][0] for i in range(len(info))]

    def param_str(self):
        """
        returns array of parameter values in string format. matching 1-to-1 to the param_names()
        :return:
        """
        layer_count = len(self.nn.neurons_per_layer)
        max_hidden = self.range_hidden[1]
        return ([self.data.dataset_name,
                 str(self.data.noise),
                 str(self.nn.perc_train),
                 str(self.nn.batch_size)] +
                ['1' if i in self.nn.features_ids else '0' for i in DataSet.all_features] +
                [str(layer_count),
                 str(sum(self.nn.neurons_per_layer))] +
                [str(self.nn.neurons_per_layer[i]) if i < layer_count else '0' for i in range(max_hidden)] +
                [str(self.nn.learning_rate),
                 self.nn.activation_h,
                 self.nn.regularization_type,
                 str(self.nn.regularization_rate)])

    def save_plot(self, filename):
        """
        Generates the plot using the current data and training state
        :param filename: output filename
        :return: None
        """
        # matplotlib.interactive(False)
        # plot the resulting classifier
        colormap = colors.ListedColormap(["#f59322", "#e8eaeb", "#0877bd"])
        x_min, x_max = -6, 6  # grid x bounds
        y_min, y_max = -6, 6  # grid y bounds
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                             np.linspace(y_min, y_max, 300))

        data_points = np.c_[xx.ravel(), yy.ravel()]
        data_grid = DataSet(None, len(data_points), 0, data_points=data_points)

        try:
            z = self.nn.predict_labels(data_grid.features).reshape(xx.shape)
        except:
            z = np.zeros(np.shape(xx))
        fig = plt.figure(figsize=(4, 4), dpi=75)
        # plt.imshow(z, cmap=colormap, interpolation='nearest')
        plt.contourf(xx, yy, z, cmap=colormap, alpha=0.8)
        num_training = self.data.num_training(self.nn.perc_train)
        point_color = self.data.labels
        # plot training data points
        plt.scatter(self.data.points[:num_training, 0], self.data.points[:num_training, 1],
                    c=point_color[:num_training], edgecolors='w', s=40, cmap=colormap)
        # plot test data points
        plt.scatter(self.data.points[num_training:, 0], self.data.points[num_training:, 1],
                    c=point_color[num_training:], edgecolors='k', s=30, cmap=colormap)
        plt.xlim(x_min, x_max)
        plt.ylim(y_min, y_max)
        fig.savefig(filename)
        plt.close()

    @staticmethod
    def create_dir(dirname, clean=False):
        """
        Creates the directory if doesn't exist
        :param dirname: directory path
        :param clean: whether to clean the directory
        :return: None
        """
        if clean:
            shutil.rmtree(dirname, ignore_errors=True)

        if not os.path.exists(dirname):
            os.makedirs(dirname)

    def save_current_run(self, filename):
        try:
            yp = self.nn.predict_labels(self.data.features)
        except:
            yp = 1 - self.data.labels
        if Config.SAVE_LABELS_NEG_POS:
            yp = [-1 if label == 0 else 1 for label in yp]
        header = 'label_pred'
        with open(filename, 'w') as f:
            f.write(header + '\n')
            for v in yp:
                f.write(str(v) + '\n')

    def calc_tpr_fpr(self):
        """
        calculates the true positive rate and false positive rate
        :return: [train_tpr, train_fpr, test_tpr, test_fpr]
        """
        labels_pred = self.nn.predict_labels(self.data.features)
        num_training = self.data.num_training(self.nn.perc_train)
        stats = []
        for population in ['train', 'test']:
            if population == 'train':
                y = self.data.labels[:num_training]  # true labels for training
                yp = labels_pred[:num_training]  # predicted labels for training
            else:  # population == 'test'
                y = self.data.labels[num_training:]  # true labels for test
                yp = labels_pred[num_training:]  # predicted labels for test

            num_p = list(y).count(1)  # number of positive labels
            num_n = list(y).count(0)  # number of negative labels
            num_tp = [l == 1 and lp == 1 for l, lp in zip(y, yp)].count(True)  # true positives
            num_fp = [l == 0 and lp == 1 for l, lp in zip(y, yp)].count(True)  # false positives
            # num_tn = [l == 0 and lp == 0 for l, lp in zip(y, yp)].count(True)  # true positives
            # num_fn = [l == 1 and lp == 0 for l, lp in zip(y, yp)].count(True)  # true positives
            tpr = 0 if num_tp == 0 else num_tp/num_p  # true positive rate
            fpr = 0 if num_fp == 0 else num_fp/num_n  # false positive rate
            # tnr = 0 if num_tn == 0 else num_tn/num_n  # true negative rate
            # fnr = 0 if num_fn == 0 else num_fn/num_p  # false negative rate
            stats = stats + [tpr, fpr]
        return stats

    def execute_runs(self, mode, num_runs, resume=False):
        """
        Executes several training runs, each with different parameters and saves the results
        :param mode: experiment mode.
            MODE_FULL randomizes all parameters including the input data, per run
            MODE_PSA_RUNS generates different datasets and runs the psa separately for each
        :param num_runs: number of runs per experiment to add to the output
        :param resume: whether to resume the runs. if True, the runs will continue until there are num_runs records.
        :return:
        """
        iter_index = -1
        while True:
            iter_index += 1
            if mode == self.MODE_FULL:
                if iter_index == 1:
                    break
                out_dir = '../output/full'
                self.create_dir(out_dir, clean=not resume)
                curr_data = None
            elif mode == self.MODE_PSA_RUNS:
                if iter_index >= len(self.mode_psa_datasets):
                    break
                noise = self.fixed_noise
                dataset_name = self.mode_psa_datasets[iter_index]
                out_dir = '../output/' + dataset_name + '_' + str(noise)
                self.create_dir(out_dir, clean=not resume)
                input_filename = out_dir + '/input.txt'
                if resume and os.path.exists(input_filename):
                    curr_data = DataSet.create_from_file(input_filename)
                    curr_data.noise = noise
                    curr_data.dataset_name = dataset_name
                    assert(curr_data.num_samples() == Run.num_samples)
                else:
                    curr_data = DataSet(dataset_name, num_samples=Run.num_samples, noise=noise)
                    curr_data.save_to_file(input_filename)
            else:
                print("Invalid mode:" + str(mode))
                return

            run_id = 0
            index_filename = out_dir + '/runsInfo.txt'
            print('index table: ' + index_filename)
            if resume and os.path.exists(index_filename):
                index_table = np.genfromtxt(index_filename, dtype=None, delimiter='\t', names=True, autostrip=False)
                if len(index_table) > 0 and 'ID' in index_table.dtype.fields:
                    run_id = index_table['ID'][-1] + 1
                print('Resuming from ID {}'.format(run_id))

            write_header = (not os.path.exists(index_filename)) or (not resume)
            # create write the header for the runs.txt file
            f_runs = open(index_filename, 'a+' if resume else 'w+')
            all_param_info = \
                ([['ID', 'ID', self.PARAM_TYPE_OUTPUT, 'ID'],
                  ['imagePath', 'Image path', self.PARAM_TYPE_OUTPUT, 'Output image path']] +
                 self.param_info() +
                 [['epoch', 'Epoch', self.PARAM_TYPE_INT, 'Number of Epochs (of processing all training data)'],
                  ['iteration', 'Iterations', self.PARAM_TYPE_INT, 'Number of Iterations (of processing a batch)'],
                  ['success', 'Success', self.PARAM_TYPE_OUTPUT, 'Whether the training finished successfully'],
                  ['total_time', 'Total time (ms)', self.PARAM_TYPE_OUTPUT, 'Total time at this epoch'],
                  ['mean_time', 'Mean time (ms)', self.PARAM_TYPE_OUTPUT, 'Mean time per epoch'],
                  ['train_loss', 'Training loss', self.PARAM_TYPE_OUTPUT, 'Training loss at epoch'],
                  ['test_loss', 'Test loss', self.PARAM_TYPE_OUTPUT, 'Test loss at epoch'],
                  ['train_TPR', 'TPR for train', self.PARAM_TYPE_OUTPUT, 'True positive rate for training data'],
                  ['train_FPR', 'FPR for train', self.PARAM_TYPE_OUTPUT, 'False positive rate for training data'],
                  # ['train_TNR', 'TNR for train', self.PARAM_TYPE_OUTPUT, 'True negative rate for training data'],
                  # ['train_FNR', 'FNR for train', self.PARAM_TYPE_OUTPUT, 'False negative Rate for training data'],
                  ['test_TPR', 'TPR for test', self.PARAM_TYPE_OUTPUT, 'True positive rate for test data'],
                  ['test_FPR', 'FPR for test', self.PARAM_TYPE_OUTPUT, 'False positive rate for test data'],
                  # ['test_TNR', 'TNR for test', self.PARAM_TYPE_OUTPUT, 'True negative rate for test data'],
                  # ['test_FNR', 'FNR for test', self.PARAM_TYPE_OUTPUT, 'False negative Rate for test data'],
                  ])

            # save the paramInfo.txt
            with open(out_dir + '/paramInfo.txt', 'w') as fpi:
                fpi.write('\t'.join(self.param_info_header()) + '\n')
                fpi.write('\n'.join(['\t'.join(i) for i in all_param_info]))

            # write the header for the runs.txt
            if write_header:
                f_runs.write('\t'.join([i[0] for i in all_param_info]) + '\n')
                f_runs.flush()
            images_dir = out_dir + '/images'
            runs_dir = out_dir + '/runs'

            self.create_dir(images_dir, clean=not resume)
            self.create_dir(runs_dir, clean=not resume)

            while run_id < num_runs:
                if curr_data is None:
                    self.randomize_data()  # randomize the data every time
                else:
                    self.data = curr_data  # reuse the same data
                self.randomize_training_params()
                # print the parameters
                print('configuration (%d of %d)' % (int(run_id / len(self.epochs_per_config)) + 1,
                                                    int(num_runs / len(self.epochs_per_config))))
                print(', '.join(a[0] + ': ' + a[1] for a in zip(self.param_names(), self.param_str())))

                prev_step = 0
                total_time = 0
                for epoch in self.epochs_per_config:
                    curr_step = int(epoch * self.data.num_samples() / self.nn.batch_size)
                    # curr_step = epoch # in the online demo epoch == iter: https://github.com/tensorflow/playground/blob/67cf64ffe1fc53967d1c979d26d30a4625d18310/src/playground.ts#L898

                    time_start = time.time()

                    # train the network
                    success = True
                    try:
                        train_loss, test_loss = self.nn.train(self.data, restart=False, num_steps=curr_step - prev_step)
                    except:
                        train_loss, test_loss = 1, 1
                        success = False

                    total_time += (time.time() - time_start) * 1000.0
                    mean_time = total_time / epoch

                    try:
                        train_tpr, train_fpr, test_tpr, test_fpr = self.calc_tpr_fpr()
                    except:
                        train_tpr, train_fpr, test_tpr, test_fpr = 0, 1, 0, 1
                        success = False

                    print('(epoch: %d, step: %d), '
                          '(total_time: %g, mean_time: %g), '
                          '(training loss: %g, test loss: %g), '
                          '(train_tpr: %g, train_fpr: %g test_tpr: %g, test_fpr: %g)' %
                          (epoch, curr_step,
                           round(total_time, 2), round(mean_time, 2),
                           round(train_loss, 2), round(test_loss, 2),
                           round(train_tpr, 2), round(train_fpr, 2), round(test_tpr, 2), round(test_fpr, 2)))

                    image_filename = images_dir + '/' + str(run_id) + ".png"
                    run_filename = runs_dir + '/' + str(run_id) + ".txt"
                    self.save_plot(image_filename)
                    self.save_current_run(run_filename)

                    f_runs.write('\t'.join(
                        [str(run_id),
                         image_filename[len(out_dir)+1:]] +
                        self.param_str() +
                        [str(epoch),
                         str(curr_step),
                         str(success),
                         str(round(total_time, 3)),
                         str(round(mean_time, 3)),
                         str(round(train_loss, 3)),
                         str(round(test_loss, 3)),
                         str(round(train_tpr, 3)),
                         str(round(train_fpr, 3)),
                         str(round(test_tpr, 3)),
                         str(round(test_fpr, 3)),
                         ]) +
                                 '\n')
                    f_runs.flush()
                    prev_step = curr_step
                    run_id += 1
                    if run_id >= num_runs:
                        break
            f_runs.close()
Пример #46
0
 def __init__(self, data_name):
     self.dataset = DataSet.load_dataset(name=data_name)
Пример #47
0
        output_sentence = []

        for di in range(config.MAX_LENGTH):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == config.EOS_token:
                break
            else:
                output_sentence.append(output_lang.index2word[topi.item()])
            decoder_input = topi.squeeze().detach()

    return ' '.join(output_sentence)


dataset = DataSet(config.input_lang, config.target_lang, config.path)
dataset.prepareData()
encoder = EncoderRNN(dataset.input_lang.n_words, config.hidden_size)
decoder = AttnDecoderRNN(config.hidden_size, dataset.target_lang.n_words,
                         config.MAX_LENGTH)
encoder.load_state_dict(
    torch.load(config.curPath + 'annotation_encoder.pth',
               map_location=config.eval_device))
decoder.load_state_dict(
    torch.load(config.curPath + 'annotation_decoder.pth',
               map_location=config.eval_device))

code = sys.argv[1]
#code = input()

input_tensor = dataset.tensorFromSentence(code, dataset.input_lang)
Пример #48
0
num_epochs = 50

feature_extract = True

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_ft, input_size = initialize_model(model_name,
                                        num_classes,
                                        feature_extract,
                                        use_pretrained=True)
#print(model_ft)

# Create training and validation datasets
image_datasets = {
    x: DataSet(data_dir[x], input_size, num_classes, x == 'train')
    for x in ['train', 'val']
}
# Create training and validation dataloaders
dataloaders_dict = {
    x: torch.utils.data.DataLoader(image_datasets[x],
                                   batch_size=batch_size,
                                   shuffle=True,
                                   num_workers=16)
    for x in ['train', 'val']
}

for x in ['train', 'val']:
    print('dataset size', x, len(image_datasets[x]))
    print('loader size', x, len(dataloaders_dict[x]))
    # svm.test()

    # SIFT_SVM = SIFT_SupportVectorMachine()
    # SIFT_SVM.setInputs(
    # DataSet(vectors_path=None, images_path="datasets/Train/TrainImages", labels_path="datasets/Train/trainLbls.csv"),
    # DataSet(vectors_path=None, images_path="datasets/Validate/ValidationImages", labels_path="datasets/Validate/valLbls.csv"),
    # DataSet(vectors_path=None, images_path="datasets/Test/TestImages")
    # )
    #     SIFT_SVM.train()

    #     NN = NearestNeighbour()
    # NN.setInputs(
    # DataSet("datasets/Train/trainVectors.csv",
    # labels_path="datasets/Train/trainLbls.csv", normalize = False),
    # DataSet("datasets/Validate/valVectors.csv",
    # labels_path="datasets/Validate/valLbls.csv", normalize = False),
    # DataSet("datasets/Test/testVectors.csv", normalize = False)
    # )
    #     NN.validate()

    SIFT_NN = SIFT_NearestNeighbour()
    SIFT_NN.setInputs(
        DataSet(vectors_path=None,
                images_path="datasets/Train/TrainImages",
                labels_path="datasets/Train/trainLbls.csv"),
        DataSet(vectors_path=None,
                images_path="datasets/Validate/ValidationImages",
                labels_path="datasets/Validate/valLbls.csv"),
        DataSet(vectors_path=None, images_path="datasets/Test/TestImages"))
    SIFT_NN.validate()
class Model(object):
    def __init__(self, config):
        self.epoch_count = 0
        self.config = config
        self.data = DataSet(config)
        self.add_placeholders()
        self.summarizer = tf.summary
        self.net = Network(config, self.summarizer)
        self.optimizer = self.config.solver.optimizer
        self.y_pred = self.net.prediction(self.x, self.keep_prob)
        self.loss = self.net.loss(self.x, self.y, self.keep_prob)
        self.accuracy = self.net.accuracy(tf.nn.sigmoid(self.y_pred), self.y)
        self.patk = self.net.patk(self.y, self.y_pred)
        self.summarizer.scalar("accuracy", self.accuracy)
        self.summarizer.scalar("loss", self.loss)
        self.train = self.net.train_step(self.loss)
        self.saver = tf.train.Saver()
        self.init = tf.global_variables_initializer()
        self.local_init = tf.local_variables_initializer()

    def add_placeholders(self):
        self.x = tf.placeholder(tf.float32,
                                shape=[None, self.config.features_dim])
        self.y = tf.placeholder(tf.float32,
                                shape=[None, self.config.labels_dim])
        self.keep_prob = tf.placeholder(tf.float32)
        #self.k = int()

    def run_epoch(self, sess, data, summarizer, epoch):
        err = list()
        i, p_k, y_pred, Y = 0, None, None, None
        step = epoch
        merged_summary = self.summarizer.merge_all()
        for X, Y, tot in self.data.next_batch(data):
            feed_dict = {
                self.x: X,
                self.y: Y,
                self.keep_prob: self.config.solver.dropout
            }
            if not self.config.load:
                summ, _, y_pred, loss = sess.run([
                    merged_summary, self.train,
                    tf.nn.sigmoid(self.y_pred), self.loss
                ],
                                                 feed_dict=feed_dict)
                err.append(loss)
                output = "Epoch ({}) Batch({}) - Loss : {}".format(
                    self.epoch_count, i, loss)
                with open(
                        "../stdout/{}_train.log".format(
                            self.config.project_name), "a+") as log:
                    log.write(output + "\n")
                print("   {}".format(output), end='\r')
            step = int(epoch * tot + i)
            summarizer.add_summary(summ, step)
            i += 1
        #p_k = patk(predictions=y_pred, labels=Y)
        return np.mean(err), step, p_k

    def run_eval(self, sess, data, summary_writer=None, step=0):
        y, y_pred, loss_, metrics, p_k = list(), list(), 0.0, None, None
        accuracy, loss = 0.0, 0.0
        merged_summary = self.summarizer.merge_all()
        i = 0
        for X, Y, tot in self.data.next_batch(data):
            feed_dict = {self.x: X, self.y: Y, self.keep_prob: 1}
            if i == tot - 1 and summary_writer is not None:
                if data == "validation":
                    summ, loss_ = sess.run([merged_summary, self.loss],
                                           feed_dict=feed_dict)
                else:
                    summ, loss_, accuracy_val = sess.run(
                        [merged_summary, self.loss, self.accuracy],
                        feed_dict=feed_dict)
                summary_writer.add_summary(summ, step)
            else:
                if data == "validation":
                    loss_, Y_pred = sess.run(
                        [self.loss, tf.nn.sigmoid(self.y_pred)],
                        feed_dict=feed_dict)
                    p_k = patk(predictions=Y_pred, labels=Y)
                else:
                    loss_, Y_pred, accuracy_val = sess.run(
                        [self.loss,
                         tf.nn.sigmoid(self.y_pred), self.accuracy],
                        feed_dict=feed_dict)
                    metrics = evaluate(predictions=Y_pred, labels=Y)
                    accuracy += accuracy_val  #metrics['accuracy']
            loss += loss_
            i += 1
        if data == "test":
            X, Y = self.data.get_test()
            p_k = patk(
                sess.run(tf.nn.sigmoid(self.y_pred),
                         feed_dict={
                             self.x: X,
                             self.y: Y,
                             self.keep_prob: 1
                         }), Y
            )  # sess.run(self.patk, feed_dict={self.x: X, self.y: Y, self.keep_prob: 1}) #
        return loss / i, accuracy / self.config.batch_size, metrics, p_k

    def add_summaries(self, sess):
        if self.config.load or self.config.debug:
            path_ = "../results/tensorboard"
        else:
            path_ = "../bin/results/tensorboard"
        summary_writer_train = tf.summary.FileWriter(path_ + "/train",
                                                     sess.graph)
        summary_writer_val = tf.summary.FileWriter(path_ + "/val", sess.graph)
        summary_writer_test = tf.summary.FileWriter(path_ + "/test",
                                                    sess.graph)
        summary_writers = {
            'train': summary_writer_train,
            'val': summary_writer_val,
            'test': summary_writer_test
        }
        return summary_writers

    def fit(self, sess, summarizer):
        '''
         - Patience Method : 
         + Train for particular no. of epochs, and based on the frequency, evaluate the model using validation data.
         + If Validation Loss increases, decrease the patience counter.
         + If patience becomes less than a certain threshold, devide learning rate by 10 and switch back to old model
         + If learning rate is lesser than a certain 
        '''
        sess.run(self.init)
        sess.run(self.local_init)
        max_epochs = self.config.max_epochs
        patience = self.config.patience
        patience_increase = self.config.patience_increase
        improvement_threshold = self.config.improvement_threshold
        best_validation_loss = 1e6
        self.epoch_count = 0
        best_step, losses, learning_rate = -1, list(
        ), self.config.solver.learning_rate
        while self.epoch_count < max_epochs:
            if (self.config.load == True):
                break
            start_time = time.time()
            average_loss, tr_step, train_patk = self.run_epoch(
                sess, "train", summarizer['train'], self.epoch_count)
            duration = time.time() - start_time
            if not self.config.debug:
                if self.epoch_count % self.config.epoch_freq == 0:
                    val_loss, _, _, val_patk = self.run_eval(
                        sess, "validation", summarizer['val'], tr_step)
                    test_loss, _, metrics, patk = self.run_eval(
                        sess, "test", summarizer['test'], tr_step)
                    output = "=> Training : Loss = {:.2f} | Validation : Loss = {:.2f} | Test : Loss = {:.2f}\n=> Training : P@K = {} | Validation : P@K = {} | Test : P@K  {}".format(
                        average_loss, val_loss, test_loss, train_patk,
                        val_patk, patk)
                    with open("../stdout/validation.log", "a+") as f:
                        output_ = output + "\n=> Test : Coverage = {}, Average Precision = {}, Micro Precision = {}, Micro Recall = {}, Micro F Score = {}".format(
                            metrics['coverage'], metrics['average_precision'],
                            metrics['micro_precision'],
                            metrics['micro_recall'], metrics['micro_f1'])
                        output_ += "\n=> Test : Macro Precision = {}, Macro Recall = {}, Macro F Score = {}\n=> P@K = {}\n\n".format(
                            metrics['macro_precision'],
                            metrics['macro_recall'], metrics['macro_f1'], patk)
                        f.write(output_)
                    print(output)
                    if self.config.have_patience:
                        if val_loss < best_validation_loss:
                            if val_loss < best_validation_loss * improvement_threshold:
                                self.saver.save(
                                    sess, self.config.ckptdir_path +
                                    "/model_best.ckpt")
                                best_validation_loss = val_loss
                                best_step = self.epoch_count
                        else:
                            if patience < 1:
                                self.saver.restore(
                                    sess, self.config.ckptdir_path +
                                    "/model_best.ckpt")
                                if learning_rate <= 0.00001:
                                    print("=> Breaking by Patience Method")
                                    break
                                else:
                                    learning_rate /= 10
                                    patience = self.config.patience
                                    print(
                                        "\033[91m=> Learning rate dropped to {}\033[0m"
                                        .format(learning_rate))
                            else:
                                patience -= 1
            self.epoch_count += 1
        print("=> Best epoch : {}".format(best_step))
        if self.config.debug == True:
            sys.exit()
        test_loss, test_accuracy, test_metrics, p_k = self.run_eval(
            sess, "test", summarizer['test'], tr_step)
        returnDict = {
            "test_loss": test_loss,
            "test_accuracy": test_accuracy,
            'test_metrics': test_metrics,
            "test_pak": p_k
        }
        if self.config.debug == False:
            returnDict["train"] = best_validation_loss
        return returnDict
Пример #51
0
            cur_pix_frames = []
            for i, frame_data in enumerate(self.temp_data):
                printProgressBar(i + self.temp_data.start_frame,
                                 self.temp_data.end_frame,
                                 'Generating temp history plot.')
                cur_pix_frames.append(i + self.temp_data.start_frame)
                cur_pix_history.append(frame_data[pixel])
            pixelTempHistory.append(cur_pix_history)
            frame.append(cur_pix_frames)

        fig6, ax6 = plt.subplots()
        fig6.suptitle('Pixel {} Temperature History:\n'.format(self.pixels))
        ax6.set_xlabel('Frame')
        ax6.set_ylabel('Temperature')
        for history, frames, pixel in zip(pixelTempHistory, frame,
                                          self.pixels):
            ax6.plot(frames,
                     history,
                     label=str(pixel[1]) + ',' + str(pixel[0]))

        plt.legend()


if __name__ == '__main__':
    dataset = DataSet(
        '/home/troy/thermography/4-20_corrected/thermal_cam_temps.npy',
        end_frame=27000)
    plotter = Plots(dataset, [(50, 100), (123, 99)], threshold=500)
    plotter.plot3DBubble()
    plt.show()
Пример #52
0
def main(operation='train', code=None):
    step = 30
    input_size = 73
    train_steps = 1000000
    batch_size = 512
    learning_rate = 0.0002
    hidden_size = 16
    nclasses = 1
    validation_size = 700
    keep_rate = 0.7
    selector = [
        "ROCP", "OROCP", "HROCP", "LROCP", "MACD", "RSI", "VROCP", "BOLL",
        "MA", "VMA", "PRICE_VOLUME", "CROSS_PRICE"
    ]

    input_shape = [step,
                   input_size]  # [length of time series, length of feature]

    if operation == 'train':
        dataset_dir = "./dataset"
        train_features = []
        train_labels = []
        val_features = []
        val_labels = []
        for filename in os.listdir(dataset_dir):
            if filename != '000001.csv':
                continue
            print("processing file: " + filename)
            filepath = dataset_dir + "/" + filename
            raw_data = read_sample_data(filepath)
            moving_features, moving_labels = extract_feature(
                raw_data=raw_data,
                selector=selector,
                window=input_shape[0],
                with_label=True,
                flatten=False)
            train_features.extend(moving_features[:-validation_size])
            train_labels.extend(moving_labels[:-validation_size])
            val_features.extend(moving_features[-validation_size:])
            val_labels.extend(moving_labels[-validation_size:])

        train_features = numpy.transpose(numpy.asarray(train_features),
                                         [0, 2, 1])
        train_labels = numpy.asarray(train_labels)
        train_labels = numpy.reshape(train_labels, [train_labels.shape[0], 1])
        val_features = numpy.transpose(numpy.asarray(val_features), [0, 2, 1])
        val_labels = numpy.asarray(val_labels)
        val_labels = numpy.reshape(val_labels, [val_labels.shape[0], 1])
        train_set = DataSet(train_features, train_labels)
        val_set = DataSet(val_features, val_labels)

        # raw_data = read_sample_data("toy_stock.csv")
        # moving_features, moving_labels = extract_feature(raw_data=raw_data, selector=selector, window=input_shape[0],
        #                                                 with_label=True, flatten=False)
        # moving_features = numpy.asarray(moving_features)
        # moving_features = numpy.transpose(moving_features, [0, 2, 1])
        # moving_labels = numpy.asarray(moving_labels)
        # moving_labels = numpy.reshape(moving_labels, [moving_labels.shape[0], 1])
        # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size])
        # val_set = DataSet(moving_features[-validation_size:], moving_labels[-validation_size:])

        trader = SmartTrader(step, input_size, learning_rate, hidden_size,
                             nclasses)
        trader.build_graph()
        train(trader,
              train_set,
              val_set,
              train_steps,
              batch_size=batch_size,
              keep_rate=keep_rate)
    elif operation == "predict":
        predict_file_path = "./dataset/000001.csv"
        if code is not None:
            predict_file_path = "./dataset/%s.csv" % code
        print("processing file %s" % predict_file_path)
        raw_data = read_sample_data(predict_file_path)
        moving_features, moving_labels = extract_feature(raw_data=raw_data,
                                                         selector=selector,
                                                         window=input_shape[0],
                                                         with_label=True,
                                                         flatten=False)
        moving_features = numpy.asarray(moving_features)
        moving_features = numpy.transpose(moving_features, [0, 2, 1])
        moving_labels = numpy.asarray(moving_labels)
        moving_labels = numpy.reshape(moving_labels,
                                      [moving_labels.shape[0], 1])
        # train_set = DataSet(moving_features[:-validation_size], moving_labels[:-validation_size])
        val_set = DataSet(moving_features[-validation_size:],
                          moving_labels[-validation_size:])
        predict(val_set,
                step=step,
                input_size=input_size,
                learning_rate=learning_rate,
                hidden_size=hidden_size,
                nclasses=nclasses)

    else:
        print("Operation not supported. ")
Пример #53
0
def main(args):

    check_path(args)

    # CIFAR-10的全部类别,一共10类
    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    # 数据集
    data_builder = DataBuilder(args)
    dataSet = DataSet(data_builder.train_builder(),
                      data_builder.test_builder(), classes)

    # 选择模型
    if args.lenet:
        net = LeNet()
        model_name = args.name_le
    elif args.vgg:
        net = Vgg16_Net()
        model_name = args.name_vgg
    elif args.resnet18:
        net = ResNet18()
        model_name = args.name_res18
    elif args.resnet34:
        net = ResNet34()
        model_name = args.name_res34
    elif args.resnet50:
        net = ResNet50()
        model_name = args.name_res50
    elif args.resnet101:
        net = ResNet101()
        model_name = args.name_res101
    elif args.resnet152:
        net = ResNet152()
        model_name = args.name_res152

    # 交叉熵损失函数
    criterion = nn.CrossEntropyLoss()

    # SGD优化器
    optimizer = optim.SGD(net.parameters(),
                          lr=args.learning_rate,
                          momentum=args.sgd_momentum,
                          weight_decay=args.weight_decay)

    # 余弦退火调整学习率
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=150)

    # 模型的参数保存路径
    model_path = os.path.join(args.model_path, model_name)

    # 启动训练
    if args.do_train:
        print("Training...")

        trainer = Trainer(net, criterion, optimizer, scheduler,
                          dataSet.train_loader, dataSet.test_loader,
                          model_path, args)

        trainer.train(epochs=args.epoch)
        # t.save(net.state_dict(), model_path)

    # 启动测试,如果--do_train也出现,则用刚刚训练的模型进行测试
    # 否则就使用已保存的模型进行测试
    if args.do_eval:
        if not args.do_train and not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        # --do_eval
        if not args.do_train:
            checkpoint = t.load(model_path)
            net.load_state_dict(checkpoint['net'])
            accuracy = checkpoint['acc']
            epoch = checkpoint['epoch']
            print("Using saved model, accuracy : %f  epoch: %d" %
                  (accuracy, epoch))
        tester = Tester(dataSet.test_loader, net, args)
        tester.test()

    if args.show_model:
        if not os.path.exists(model_path):
            print(
                "Sorry, there's no saved model yet, you need to train first.")
            return
        show_model(args)

    if args.do_predict:
        device = t.device("cuda" if t.cuda.is_available() else "cpu")
        checkpoint = t.load(model_path, map_location=device)
        net.load_state_dict(checkpoint['net'])
        predictor = Predictor(net, classes)
        img_path = 'test'
        img_name = [os.path.join(img_path, x) for x in os.listdir(img_path)]
        for img in img_name:
            predictor.predict(img)
Пример #54
0
from params import *
from least_squares import LeastSquares
from regularized_least_squares import RegularizedLeastSquares
from lasso import Lasso
from robust_regression import RobustRegression
from bayesian_regression import BayesianRegression
from dataset import DataSet
from common import *

result_sub_path = result_path + 'part1c/'
#get data
gt_data = DataSet(gt_x_path, gt_y_path)
gt_x, gt_y = gt_data.x, gt_data.y
gt_phi = generate_polynomial_features(gt_x)

sample_percent = [10, 25, 50, 75]
num_sub_sample = 5

sample_data_list = []
for i in range(4):
    for j in range(num_sub_sample):
        sample_data = DataSet(sample_x_path, sample_y_path, percent_sample=sample_percent[i])
        sample_data_list.append(sample_data)

def deploy_least_square(sample_id):
    sample_data = sample_data_list[sample_id]
    sample_x, sample_y = sample_data.x, sample_data.y
    sample_phi = generate_polynomial_features(sample_x)

    title = 'LEAST SQUARES'
    log = open(result_sub_path + title + '_' + str(sample_id) + '.txt', 'w')
Пример #55
0
import torch
import numpy as np
from dataset import DataSet
from loss2 import MultiBoxLoss
from ssd300 import SSD
from ssdpytorch.utils.augmentations import SSDAugmentation
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib as plt
from PIL import ImageDraw


model = SSD()
model.cuda()
model.load_state_dict(torch.load('ssd.pth'))
test_dataset = DataSet('MiniSKU/test','MiniSKU/annotations/test.csv', SSDAugmentation(scale_only=True))
test_loader = DataLoader(test_dataset, 1, num_workers=2, collate_fn=test_dataset.collate_fn)

def test(n=5):
    d = []
    for i, (img, boxes, labels) in enumerate(test_loader):
        predicted_locs, predicted_scores = model(img[0].unsqueeze(0).cuda())
        det_boxes, det_labels, det_scores = model.detect_objects(predicted_locs, predicted_scores, min_score=0.2,
                                                                 max_overlap=0.5, top_k=200)
        det_boxes = det_boxes[0].to('cpu')
        img = img[0].permute(1,2,0)
        img = torch.squeeze(img)
        print(img.shape)
        dist1 = cv2.convertScaleAbs(img.numpy())
        w, h, _ = dist1.shape
        origin_dims = torch.FloatTensor([w,h,w,h]).unsqueeze(0)
Пример #56
0
    def moving_extract(self,
                       window=30,
                       date=None,
                       open_prices=None,
                       close_prices=None,
                       high_prices=None,
                       low_prices=None,
                       volumes=None,
                       N_predict=1,
                       flatten=True):

        self.extract(open_prices=open_prices,
                     close_prices=close_prices,
                     high_prices=high_prices,
                     low_prices=low_prices,
                     volumes=volumes)

        feature_arr = numpy.asarray(self.feature)
        p = 0
        rows = feature_arr.shape[0]
        print("feature dimension: %s" % rows)
        all_data = DataSet([], [], [])
        predict = DataSet([], [], [])

        while p + window <= feature_arr.shape[1]:
            # The last self.prospective days can not produce complete labels
            if feature_arr.shape[1] - (p + window) >= N_predict:
                x = feature_arr[:, p:p + window]
                # Label the closing price of the next day -days
                y = make_label(close_prices, p + window, self.prospective)
                d = list(date[p + window:p + window + self.prospective])

                if flatten:
                    x = x.flatten("F")
                all_data.features.append(numpy.nan_to_num(x))
                all_data.labels.append(y)
                all_data.date.append(d)

            else:
                x = feature_arr[:, p:p + window]
                if flatten:
                    x = x.flatten("F")
                predict.features.append(numpy.nan_to_num(x))
                predict.date.append(date[p + window - 1])
                predict.closing_price.append(close_prices[p + window - 1])
                predict.last_label.append(close_prices[p + window - 2])
            p += 1

        all_data._features = numpy.asarray(all_data.features)
        all_data._labels = numpy.asarray(all_data.labels)
        all_data._date = numpy.asarray(all_data.date)
        predict._features = numpy.asarray(predict.features)
        predict._date = numpy.asarray(predict.date)
        predict._last_label = numpy.asarray(predict.last_label)
        predict._closing_price = numpy.asarray(predict.closing_price)

        return all_data, predict
Пример #57
0
def test():

    BATCH_SIZE = 1
    with tf.Graph().as_default():
        dataset = DataSet(BATCH_SIZE)
        keep_conv = tf.placeholder(tf.float32)
        images, depths, invalid_depths, features = dataset.csv_inputs(
            TEST_FILE)
        coarse = model.inference(images, trainable=False)
        logits = model.inference_refine(images,
                                        coarse,
                                        keep_conv,
                                        trainable=False)

        loss1 = model.loss(coarse, depths, invalid_depths)
        loss2 = model.loss(logits, depths, invalid_depths)
        init_op = tf.global_variables_initializer()  #改了

        # Session
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=LOG_DEVICE_PLACEMENT))  # 不打印设备分配日志
        sess.run(init_op)

        coarse_params = {}  # 定义一个新的dict
        refine_params = {}

        for variable in tf.all_variables():
            variable_name = variable.name
            #print("parameter: %s" % (variable_name))
            if variable_name.find("/") < 0 or variable_name.count("/") != 1:
                continue
            if variable_name.find('coarse') >= 0:
                coarse_params[variable_name] = variable
            #print("parameter: %s" %(variable_name))
            if variable_name.find('fine') >= 0:
                refine_params[variable_name] = variable

        saver_coarse = tf.train.Saver(coarse_params)
        saver_refine = tf.train.Saver(refine_params)

        # fine tune 微调。。。
        if FINE_TUNE:
            coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR)
            if coarse_ckpt and coarse_ckpt.model_checkpoint_path:
                #print(coarse_ckpt.model_checkpoint_path)
                saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path)
            refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR)
            if refine_ckpt and refine_ckpt.model_checkpoint_path:
                #print(refine_ckpt.model_checkpoint_path)
                saver_refine.restore(sess, refine_ckpt.model_checkpoint_path)

        # test
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        index = 0
        ls1 = []
        ls2 = []
        print('\n', '---------Examples---------:')
        for step in range(NumOfTest):
            #print('-----------------------------------------')
            loss_value1, loss_value2, logits_val, coarse_val, images_val, features_ = sess.run(
                [loss1, loss2, logits, coarse, images, features],
                feed_dict={keep_conv: 1})
            ls1.append(loss_value1)
            ls2.append(loss_value2)
            if step % 1 == 0:
                index = index + 1
                print(features_, 'Coarse losses:', loss_value1,
                      'Refine losses:', loss_value2, '\n')
                output_save(coarse_val, logits_val, images_val, index,
                            "data/test")
        ls1m = np.mean(ls1)
        ls2m = np.mean(ls2)
        print('---------Testing Results--------:')
        print('Coasre image mean losses:', ls1m)
        print('Refine image mean losses:', ls2m)
        coord.request_stop()  #请求所有线程停止
        coord.join(threads)  #等待所有的线程完成
        sess.close()
Пример #58
0
def main(args):

    config_file = args.config
    test = args.test

    cfg = Config(config_file)

    tr = None
    if test is None:
        tr = DataSet(cfg.tr_data, cfg)
        te = DataSet(cfg.te_data, cfg, sub_sample=1)
        tr0 = DataSet([cfg.tr_data[0]], cfg, sub_sample=1)
        cfg.att = te.sz[1]
    else:
        if test == 'te':
            te = DataSet([cfg.te_data[0]], cfg)
        else:
            te = DataSet([cfg.tr_data[0]], cfg)
        cfg.att = te.sz[1]

    iterations = 10000
    loop = cfg.loop
    print "input attribute", cfg.att, "LR", cfg.lr, 'feature', cfg.feature_len

    n_att = cfg.att
    # n_length = cfg.feature_len
    n_hidden = cfg.nodes[1][-1]
    n_output = cfg.num_output
    hidden0 = ToTensor(np.ones(n_hidden).astype(np.float32))

    mrnn = RNN(n_att, cfg.nodes, n_output, cfg.lr)

    if test:
        mrnn.load_state_dict(torch.load(cfg.netTest[:-3]))
        run_test(mrnn, te, cfg, hidden0)
        tr_loss, tr_median = run_test(mrnn, te, cfg, hidden0)
        for a in range(len(tr_loss)):
            print a, tr_loss[a], tr_median[a]

        exit(0)

    if cfg.renetFile:
        mrnn.load_state_dict(torch.load(cfg.renetFile[:-3]))

    t00 = datetime.datetime.now()

    T = 0
    T_err = 0
    for a in range(iterations):

        tr_pre_data = tr.prepare(multi=1)
        while tr_pre_data:
            for b in tr_pre_data:
                length = len(b[0])
                x = ToTensor(b[0].reshape(length, cfg.feature_len,
                                          cfg.att).astype(np.float32))
                y = ToTensor(b[1].astype(np.float32))
                err = mrnn.train(y, x, hidden0)
                if a % loop == 0 and a > 0:
                    t1 = datetime.datetime.now()
                    print a, (t1 - t00).total_seconds() / 3600.0, T_err / T
                    T_err = 0
                    T = 0
                    torch.save(mrnn.state_dict(), cfg.netFile[:-3])
                T_err += err
                T += 1

            tr_pre_data = tr.get_next()
Пример #59
0
def main():
    # Load json file
    ds_task1 = DataSet('dataset/entity.json')
    # ds_task1.json_print()
    hash_type = ds_task1.entity_categories()
Пример #60
0
def train(REFINE_TRAIN):
    BATCH_SIZE = 8

    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)
        dataset = DataSet(BATCH_SIZE)
        keep_conv = tf.placeholder(tf.float32)
        images, depths, invalid_depths, features = dataset.csv_inputs(
            TRAIN_FILE)

        if REFINE_TRAIN:
            print("refine train.")
            coarse = model.inference(images, trainable=False)
            logits = model.inference_refine(images, coarse,
                                            keep_conv)  #???这个 参数是什么
        else:
            print("coarse train.")
            logits = model.inference(images)

        loss = model.loss(logits, depths, invalid_depths)
        train_op = op.train(loss, global_step, BATCH_SIZE)
        init_op = tf.global_variables_initializer()  #改了

        # Session
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=LOG_DEVICE_PLACEMENT))  # 不打印设备分配日志
        sess.run(init_op)

        # parametersi
        coarse_params = {}  # 定义一个新的dict
        refine_params = {}

        if REFINE_TRAIN:
            for variable in tf.all_variables():
                variable_name = variable.name
                print("parameter: %s" % (variable_name))
                if variable_name.find("/") < 0 or variable_name.count(
                        "/") != 1:
                    continue
                if variable_name.find('coarse') >= 0:
                    coarse_params[variable_name] = variable
                print("parameter: %s" % (variable_name))
                if variable_name.find('fine') >= 0:
                    refine_params[variable_name] = variable
        else:
            for variable in tf.trainable_variables():
                variable_name = variable.name
                print("parameter: %s" % (variable_name))
                if variable_name.find("/") < 0 or variable_name.count(
                        "/") != 1:
                    continue
                if variable_name.find('coarse') >= 0:
                    coarse_params[variable_name] = variable
                if variable_name.find('fine') >= 0:
                    refine_params[variable_name] = variable

        # define saver
        print(coarse_params)
        saver_coarse = tf.train.Saver(coarse_params)

        if REFINE_TRAIN:
            saver_refine = tf.train.Saver(refine_params)

        # fine tune 微调。。。
        if FINE_TUNE:
            coarse_ckpt = tf.train.get_checkpoint_state(COARSE_DIR)

            if coarse_ckpt and coarse_ckpt.model_checkpoint_path:
                print(coarse_ckpt.model_checkpoint_path)
                saver_coarse.restore(sess, coarse_ckpt.model_checkpoint_path)
            else:
                print("No Pretrained coarse Model.")

            if REFINE_TRAIN:
                refine_ckpt = tf.train.get_checkpoint_state(REFINE_DIR)
                if refine_ckpt and refine_ckpt.model_checkpoint_path:
                    saver_refine.restore(sess,
                                         refine_ckpt.model_checkpoint_path)
                else:
                    print("No Pretrained refine Model.")

        # train
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        lossli = []
        lossli1 = []
        for step in range(MAX_STEPS):
            index = 0
            lossli = []
            print('-------------------------------')
            for i in range(3000):
                _, loss_value, logits_val, images_val = sess.run(
                    [train_op, loss, logits, images],
                    feed_dict={keep_conv: 0.8})
                if i % 100 == 0:
                    print('[Epoch]:', step, '[iteration]:', i,
                          '[Train losses]:', loss_value)
                lossli.append(loss_value)
                index += 1
            lossli1.append(np.mean(lossli))
            if step % 5 == 0 or (step * 1) == MAX_STEPS:
                if REFINE_TRAIN:
                    refine_checkpoint_path = REFINE_DIR + '/model.ckpt'
                    saver_refine.save(sess,
                                      refine_checkpoint_path,
                                      global_step=step)
                else:
                    coarse_checkpoint_path = COARSE_DIR + '/model.ckpt'
                    saver_coarse.save(sess,
                                      coarse_checkpoint_path,
                                      global_step=step)
        plt.figure()
        plt.plot(lossli1)
        plt.savefig("train_loss.jpg")
        plt.xlabel("Epoch")
        plt.ylabel("Train_loss")
        plt.title("Train_Loss for Each Epoch")
        coord.request_stop()  #请求所有线程停止
        coord.join(threads)  #等待所有的线程完成
        sess.close()