示例#1
0
def train(net, lr, trainloader, epoch):
  """
  Train SSD

  @args
    net: (nn.Module) network
    lr: (float) learning rate
    trainloader: (DataLoader) dataloader
    epoch: (int) training epoch
  """
  net.train()
  optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=1e-4)
  criterion = MultiBoxLoss(num_classes=config[args.dataset]['num_classes']+1)

  progress_bar = ProgressBar(total=len(trainloader))
  train_loss = 0
  torch.set_printoptions(threshold=10000)
  for batch_idx, (images, loc_targets, conf_targets) in enumerate(trainloader):
    images = Variable(images.cuda())
    loc_targets = Variable(loc_targets.cuda())
    conf_targets = Variable(conf_targets.cuda())

    optimizer.zero_grad()
    loc_preds, conf_preds = net(images)
    loc_loss, conf_loss, loss = criterion(loc_preds, loc_targets, conf_preds, conf_targets)
    loss.backward()
    optimizer.step()

    writer.add_scalar('train/loss_loc', loc_loss, batch_idx + epoch * len(trainloader))
    writer.add_scalar('train/loss_conf', conf_loss, batch_idx + epoch * len(trainloader))
    writer.add_scalar('train/loss_total', loss, batch_idx + epoch * len(trainloader))

    train_loss += loss.item()
    progress_bar.move(leftmsg="training epoch " + str(epoch), rightmsg="loss: %.6f" % (train_loss/(batch_idx+1)))
示例#2
0
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER):
    """
    Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation
    Converts all *.rrd files in source folder
    """
    assert os.path.exists(source)
    try:
        os.makedirs(destination)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    filelist = [("", os.path.join(source, file)) for file in os.listdir(source) if file.endswith(".rrd")]
    nb_files = len(filelist)
    progress_bar = ProgressBar(nb_files)

    print "Exporting {0} RRD databases:".format(nb_files)

    for domain, file in filelist:
        src = os.path.join(source, domain, file)
        dst = os.path.join(destination, "{0}-{1}".format(domain, file).replace(".rrd", ".xml"))
        progress_bar.update()

        code = subprocess.check_call(['rrdtool', 'dump', src, dst])

    return nb_files
示例#3
0
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER):
    """
    Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation
    Converts all *.rrd files in source folder
    """
    assert os.path.exists(source)
    try:
        os.makedirs(destination)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    filelist = [("", os.path.join(source, file)) for file in os.listdir(source)
                if file.endswith(".rrd")]
    nb_files = len(filelist)
    progress_bar = ProgressBar(nb_files)

    print("Exporting {0} RRD databases:".format(nb_files))

    for domain, file in filelist:
        src = os.path.join(source, domain, file)
        dst = os.path.join(
            destination, "{0}-{1}".format(domain,
                                          file).replace(".rrd", ".xml"))
        progress_bar.update()

        code = subprocess.check_call(['rrdtool', 'dump', src, dst])

    return nb_files
def load_images(
    path,
    preprocessor=None,
    limit=None,
):
    images = []
    images_id = next(os.walk(path))[2]
    size = limit if limit != None else len(images_id)
    print(f"Loading {size} images")

    prog = ProgressBar(100, size)

    for id in range(size):
        name = images_id[id]

        filename = path + "/" + name
        image = load_img(filename, target_size=(224, 224))
        image = img_to_array(image)
        image = image.reshape(
            (1, image.shape[0], image.shape[1], image.shape[2]))

        if preprocessor != None:
            image = preprocessor.preprocess_input(image)

        image_id = name.split('.')[0]
        images.append([image_id, image])

        prog.update(id)

    print("Loading complete")
    return images
示例#5
0
def parallel_test(model_cls,
                  model_kwargs,
                  checkpoint,
                  dataset,
                  data_func,
                  gpus,
                  worker_per_gpu=1):
    ctx = multiprocessing.get_context('spawn')
    idx_queue = ctx.Queue()
    result_queue = ctx.Queue()
    num_workers = len(gpus) * worker_per_gpu
    workers = [
        ctx.Process(target=worker_func,
                    args=(model_cls, model_kwargs, checkpoint, dataset,
                          data_func, gpus[i % len(gpus)], idx_queue,
                          result_queue)) for i in range(num_workers)
    ]
    for w in workers:
        w.daemon = True
        w.start()

    for i in range(len(dataset)):
        idx_queue.put(i)

    results = {}
    prog_bar = ProgressBar(task_num=len(dataset))
    for _ in range(len(dataset)):
        img_id, res = result_queue.get()
        results[img_id] = format_ret(res)
        prog_bar.update()
    print('\n')
    for worker in workers:
        worker.terminate()

    return results
 def _load(self):
     log('[{time}] loading from {path}'.format(time=get_time(),
                                               path=self._source_path))
     for i, label_tag in enumerate(self._label_tags):
         path = os.path.join(self._source_path, label_tag)
         files = sample(
             os.listdir(path)[self._start:self._end], self._max_num
         ) if self._max_num > 0 else os.listdir(path)[self._start:self._end]
         print('start: {}, end: {}'.format(self._start, self._end))
         print(len(files))
         pbar = ProgressBar(len(files))
         for j, filename in enumerate(files):
             filepath = os.path.join(path, filename)
             try:
                 with open(filepath, 'r') as f:
                     log_sequence = json.load(f)
                     feature = self._sequence2feature(log_sequence)
                     self._data_ids.append(
                         filepath.split('/')[-1].split('.')[0].split('_')
                         [0])
                     self._feature_data.append(feature)
                     self._label_data.append(i)
             except:
                 log('[{time}] Failed to load file {filepath}'.format(
                     time=get_time(), filepath=filepath))
                 print('[{time}] Failed to load file {filepath}'.format(
                     time=get_time(), filepath=filepath))
             pbar.updateBar(j)
示例#7
0
    def generate(self):
        progress_bar = ProgressBar(self.settings.nb_rrd_files)

        self.add_header(self.settings)

        for domain in self.settings.domains:
            for host in self.settings.domains[domain].hosts:
                row = self.add_row("{0} / {1}".format(domain, host))
                for plugin in self.settings.domains[domain].hosts[
                        host].plugins:
                    _plugin = self.settings.domains[domain].hosts[
                        host].plugins[plugin]
                    panel = row.add_panel(
                        _plugin.settings["graph_title"] or plugin, plugin)

                    for field in _plugin.fields:
                        query = panel.add_query(field)
                        if "label" in _plugin.fields[field].settings:
                            query.alias = _plugin.fields[field].settings[
                                "label"]
                        progress_bar.update()

                    panel.width = 12 // self.settings.grafana['graph_per_row']
                    panel.process_graph_settings(_plugin.settings)
                    panel.process_graph_thresholds(_plugin.fields)
                    panel.process_graph_types(_plugin.fields)
示例#8
0
    def _preprocess(self):
        self.lang = Lang()
        for text in self._texts:
            self.lang.index_text(text)

        for text in self._texts:
            indexes = indexes_from_text(self.lang, text)
            indexes.append(EOT_token)
            padded_indexes = pad_indexes(indexes, self._max_text_length,
                                         PAD_token)
            self._indexed_texts.append(padded_indexes)

        self._indexed_texts = np.stack(self._indexed_texts, axis=0)

        bar = ProgressBar(len(self._audio_files) - 1, unit='')
        for (audio_files_read, audio_file) in enumerate(self._audio_files):
            # (n_mels, T), (1+n_fft/2, T)
            mel, mag = compute_spectrograms(audio_file)
            padded_mel = pad_time_dim(mel, self._max_audio_length, 0)
            padded_mag = pad_time_dim(mag, self._max_audio_length, 0)
            self._mels.append(padded_mel.transpose())
            self._mags.append(padded_mag.transpose())

            bar.update(audio_files_read)

        self._mels = np.stack(self._mels, axis=0)
        self._mags = np.stack(self._mags, axis=0)
示例#9
0
def discover_from_www(settings):
    """
    Builds a Munin dashboard structure (domain/host/plugins) by reading the HTML files
    rather than listing the cache folder because the later is likely to contain old data
    """

    # delayed  import since this function should not be used in the "normal" case
    try:
        from bs4 import BeautifulSoup
    except ImportError:
        try:
            from BeautifulSoup import BeautifulSoup
        except ImportError:
            print "Please install BeautifulSoup to use this program"
            print "  pip install beautifulsoup4 or easy_install beautifulsoup4"
            sys.exit(1)

    folder = settings.paths['www']

    print "Reading Munin www cache: ({0})".format(folder)
    with open(os.path.join(folder, "index.html")) as f:
        root = BeautifulSoup(f.read())

    domains = root.findAll("span", {"class": "domain"})

    # hosts and domains are at the same level in the tree so let's open the file
    for domain in domains:
        with open(os.path.join(folder, domain.text, "index.html")) as f:
            domain_root = BeautifulSoup(f.read())

        links = domain_root.find(id="content").findAll("a")
        progress_bar = ProgressBar(len(links), title=domain.text)

        for link in links:
            progress_bar.update()

            elements = link.get("href").split("/")
            if len(elements) < 2 \
                or elements[0].startswith("..") \
                or elements[-1].startswith("index"):
                continue

            if len(elements) == 2:
                host, plugin = elements[0], elements[1]
            elif len(elements) == 3:
                # probably a multigraph, we'll be missing the plugin part
                # we won't bother reading the html file for now and guess it from the RRD database later
                host, plugin = elements[0], ".".join(elements[1:3])
            else:
                print "Unknown structure"
                continue

            plugin = plugin.replace(".html", "")
            settings.domains[domain.text].hosts[host].plugins[plugin].is_multigraph = (len(elements) == 3)
            settings.domains[domain.text].hosts[host].plugins[plugin].settings = {
                'graph_title': link.text,
            }
            settings.nb_plugins += 1

    return settings
示例#10
0
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in xrange(self.mem_size):
            time[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Training', max=N)

        for idx in xrange(N):
            if self.show: bar.next()
            for b in xrange(self.batch_size):
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]

            loss, self.step = self.sess.run([self.loss,
                                             self.global_step],
                                             feed_dict={
                                                 self.input: x,
                                                 self.time: time,
                                                 self.target: target,
                                                 self.context: context})
            cost += loss

        if self.show: bar.finish()
        return cost/N/self.batch_size
示例#11
0
def plot_traj(trajs,
              fig_size=(6, 6),
              color="mediumpurple",
              size=5,
              title='',
              is_plot_line=False,
              od_only=False,
              offset=None):
    """plot the traj
    """
    if offset is None:
        offset = [0, 0]
    p = ProgressBar(len(trajs), '绘制轨迹图')
    plt.figure(figsize=fig_size)
    for i in range(len(trajs)):
        p.update(i)
        traj = np.array(trajs[i])
        if od_only:
            traj = [traj[0], traj[-1]]
        x = [x[0] + np.random.uniform(-offset[0], offset[0]) for x in traj]
        y = [y[1] + np.random.uniform(-offset[1], offset[1]) for y in traj]

        if od_only:
            if is_plot_line:
                plt.plot(x[0], y[0], c=color)
                plt.plot(x[1], y[1], c="yellowgreen")
            plt.scatter(x[0], y[0], c=color, s=size)
            plt.scatter(x[1], y[1], c="yellowgreen", s=size)
        else:
            if is_plot_line:
                plt.plot(x, y, c=color)
            plt.scatter(x, y, c=color, s=size)
    plt.title(title)
    plt.show()
示例#12
0
def remove_noise(dataset, features, verbose=False):
    sample_0 = dataset['input'][0]
    _, _, F = sample_0.shape
    print 'Removing noise: ',
    pbar = ProgressBar(len(dataset['input']) * (F - 1))
    for f in range(F):
        if (features[f] == 'time'): continue  # no need to filter time
        for sample in dataset['input']:
            y = sample[:, 0, f]
            # compute FT of the feature f
            w = scipy.fftpack.rfft(y)
            # compute mean frequency
            mean = np.mean(np.abs(w))
            # set the threshold to double the mean
            thr = 2 * mean
            # remove high frequency components
            cutoff_idx = np.abs(w) < thr
            w[cutoff_idx] = 0
            # return to time domain by doing inverseFFT
            y = scipy.fftpack.irfft(w)
            sample[:, 0, f] = y
            # update progress bar
            pbar.next()
    # return
    return None
示例#13
0
 def stability_derivatives(self):
     prog = ProgressBar('Instantiating Stability Object')
     derivatives = StabilityDerivatives(u=self.initial_trim_case.u, w=self.initial_trim_case.w, q=0,
                                        theta_f=self.initial_trim_case.fuselage_tilt,
                                        collective_pitch=self.initial_trim_case.collective_pitch,
                                        longitudinal_cyclic=self.initial_trim_case.longitudinal_cyclic)
     prog.update(100)
     return derivatives
示例#14
0
    def import_from_xml_folder(self, folder):
        raise DeprecationWarning

        # build file list and grouping if necessary
        file_list = os.listdir(folder)
        grouped_files = defaultdict(list)
        errors = []
        progress_bar = ProgressBar(len(file_list))

        for file in file_list:
            fullname = os.path.join(folder, file)
            parts = file.replace(".xml", "").split("-")
            series_name = ".".join(parts[0:-2])
            if self.settings.influxdb['group_fields']:
                grouped_files[series_name].append((parts[-2], fullname))
            else:
                grouped_files[".".join([series_name, parts[-2]])].append(('value', fullname))

        if self.settings.interactive:
            show = raw_input("Would you like to see the prospective series and columns? y/[n]: ") or "n"
            if show in ("y", "Y"):
                for series_name in sorted(grouped_files):
                    print("  - {2}{0}{3}: {1}".format(series_name, [name for name, _ in grouped_files[series_name]], Color.GREEN, Color.CLEAR))

        print("Importing {0} XML files".format(len(file_list)))
        for series_name in grouped_files:
            data = []
            keys_name = ['time']
            values = defaultdict(list)
            for field, file in grouped_files[series_name]:
                progress_bar.update()

                keys_name.append(field)

                content = read_xml_file(file)
                [values[key].append(value) for key, value in content.items()]

            # join data with time as first column
            data.extend([[k]+v for k, v in values.items()])

            try:
                pass
                # self.upload_values(series_name, keys_name, data)
            except Exception as e:
                errors.append(str(e))
                continue

            try:
                self.validate_record(series_name, keys_name)
            except Exception as e:
                errors.append("Validation error in {0}: {1}".format(series_name, e))

        if errors:
            print("The following errors were detected while importing:")
            for error in errors:
                print("  {0} {1}".format(Symbol.NOK_RED, error))
示例#15
0
def write2files(init_path, file_list, D, write_file_num=14650):
    """写入文件
    """
    rand_ind = random.sample([i for i in range(len(file_list))],
                             write_file_num)  # 随机抽取轨迹
    p2 = ProgressBar(write_file_num, '写入文件')
    for i in range(write_file_num):
        p2.update(i)
        with open(init_path + file_list[rand_ind[i]], 'w') as f2:
            for step in D[rand_ind[i]]:
                f2.writelines(str(step[0]) + ',' + str(step[1]) + '\n')
示例#16
0
def check_popularity():
    people = dbs.query(Person).filter(Person.count == None).all()
    pb = ProgressBar(people.__len__())

    for person in people:
        pb.update_print(people.index(person))
        references = dbs.query(
            PeopleRel.count).filter(PeopleRel.person == person.name).all()
        count = sum(i[0] for i in references)
        person.count = int(count)
    dbs.commit()
def extract_features(images, model):
    features = dict()
    count = 0
    prog = ProgressBar(100, len(images))

    for id, image in images:
        feature = model.predict(image, verbose=0)
        features[id] = feature
        count += 1
        prog.update(count)

    return features
示例#18
0
def markov_model(trajectory, N, epsilon):
    """basic description

    detailed description

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        O_: 中间点转移概率矩阵

    """
    O_ = np.zeros([N, N])  # 建立N*N的转移概率矩阵
    for t in trajectory:
        O_0 = np.zeros([N, N])
        for i in range(len(t) - 1):
            curr_point = t[i]
            next_point = t[i + 1]
            O_0[curr_point][next_point] += 1
        O_0 = O_0 / (len(t) - 1)  # 该轨迹的转移概率
        O_ += O_0

    line_all = []
    p = ProgressBar(N, '建立中间点转移概率矩阵')
    for i in range(N):
        p.update(i)
        score = 0
        for j in range(N):
            # 添加拉普拉斯噪声
            # sensitivity = 1
            # randomDouble = random.random() - 0.5
            # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log(
            #     1 - 2 * abs(randomDouble))

            noise = np.random.laplace(0, 1 / epsilon)
            # noise = 0.00000000000000000000000001
            O_[i][j] += noise
            if O_[i][j] < 0:
                O_[i][j] = 0
            score += O_[i][j]
        line_all.append(score)

    # compute X,归一
    for i in range(N):
        O_[i] /= line_all[i]

    sns.heatmap(data=O_, square=True)
    plt.show()

    return O_
def one_run(projects_train, projects_test, K, outlier_threshold, granularity):
    rmse_failed_run = []
    rmse_success_run = []
    rmse_run = []
    accuracy_run = []
    relative_time = np.linspace(0.025, 1, 20)
    bar = ProgressBar(end_value=len(relative_time), text="Time steps", count=True)
    bar.start()
    for i, rel_t in enumerate(relative_time):
        # Data
        t = int(rel_t * 999)
        samples = subsample(t, granularity)
        t = len(samples)
        T = 999

        # Remove outliers
        projects_train_filtered = [p for p in projects_train if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)]
        projects_test_filtered = [p for p in projects_test if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)]

        X_train = np.ndarray(shape=(len(projects_train_filtered), t), buffer=np.array([p.money[samples] for p in projects_train_filtered]), dtype=float)
        y_train = np.expand_dims(np.array([p.money[T] for p in projects_train_filtered]), axis=1)
        X_test = np.ndarray(shape=(len(projects_test_filtered), t), buffer=np.array([p.money[samples] for p in projects_test_filtered]), dtype=float)
        y_test = np.expand_dims(np.array([p.money[T] for p in projects_test_filtered]), axis=1)

        #X_max = np.max(X_train, axis=0)
        #X_train = X_train / X_max[np.newaxis, :]
        #X_test = X_test / X_max[np.newaxis, :]

        # Hyperparameters
        beta = 0.0001
        epsilon = 1e0
        lam = 0
        iterations = 50
        random_restarts = None

        mls = LeastSquaresMixture(X_train, y_train,
                                  K=K, beta=beta, lam=lam,
                                  iterations=iterations, epsilon=epsilon, random_restarts=random_restarts)
        mls.train(verbose=False)
        #print(mls)

        rmse_failed, rmse_success, rmse, accuracy = mls.evaluate(X_test, y_test, verbose=False)
        rmse_failed_run.append(rmse_failed)
        rmse_success_run.append(rmse_success)
        rmse_run.append(rmse)
        accuracy_run.append(accuracy)

        bar.update(i)

    print(accuracy_run)

    return rmse_failed_run, rmse_success_run, rmse_run, accuracy_run
示例#20
0
def get_most_common(a1, a2):
    temp_dict1 = {}
    temp_dict2 = {}
    pb = ProgressBar(worksum=len(a1), auto_display=False)
    pb.startjob()
    num = 0
    for s1, s2 in zip(a1, a2):
        num += 1
        pb.complete(1)
        if args.max_words != -1 and (len(s1) > args.max_words
                                     or len(s2) > args.max_words):
            continue
        for w1 in s1:
            temp_dict1.setdefault(w1, 0)
            temp_dict1[w1] += 1
        for w2 in s2:
            temp_dict2.setdefault(w2, 0)
            temp_dict2[w2] += 1

        if num % 32 == 0:
            pb.display_progress_bar()
    sorted1 = sorted(temp_dict1.items(), key=lambda i: i[1], reverse=True)
    sorted2 = sorted(temp_dict2.items(), key=lambda i: i[1], reverse=True)
    #print(sorted1[:100])
    #print(sorted2[:100])
    return [i[0] for i in sorted1[:args.vac_dict_ch]
            ], [i[0] for i in sorted2[:args.vac_dict_en]]
示例#21
0
    def train(self,
              train,
              validation,
              num_epochs=None,
              learning_rate=0.01,
              threshold=0.001):
        """Train the FFNN with gradient descent. Dynamic stopping on lowest validation error.

        Training runs over the given number of epochs. If None are given, then training runs
        until the threshold (change in validation error) is reached over multiple consecutive
        iterations. This dynamic stopping also occurs if validation error begins to increase.
        When dynamic stopping is used, the network finalizes the best weights found of the
        duration of training.
        """

        num_epochs_iter = num_epochs if num_epochs else 600  # 600 set to max epochs
        dynamic_stopping = False if num_epochs else True  # Dynamically halt if num_epochs unspec.
        retries = 0
        err = self.evaluate(validation)

        progress_bar = ProgressBar()
        for epoch in range(num_epochs_iter):
            last_err = err
            for i in range(len(train)):
                progress_bar.refresh(i / len(train))
                sample = choice(train)  # Randomly sample training data

                # Update weights based on the chosen sample
                self.prepare_network()
                self.propagate_input(sample.features)
                self.propagate_error(sample.label)
                self.update_weights(sample, learning_rate, momentum=0.3)

            progress_bar.refresh(1.0)
            progress_bar.clear()

            # Evaluate validation error
            err = self.evaluate(validation)
            print('Epoch {} validation error: {:.4f}'.format(epoch, err))
            if dynamic_stopping:
                if last_err - err < threshold:
                    if err <= last_err:  # Still improved, but below threshold
                        self.save_network_weights(err)

                    retries += 1
                    if retries >= 100:
                        epochs_ran = epoch
                        break
                else:
                    self.save_network_weights(err)
                    retries = 0
        else:
            epochs_ran = num_epochs_iter  # Loop did not stop early

        if dynamic_stopping:
            self.finalize_network_weights(
            )  # Finalize weights to best validation error

        return epochs_ran
示例#22
0
    def run(self, mode, X, Y, batch_size, optimizer=None, clip=None):
        self.reset_states(batch_size)
        if optimizer:
            self.train(True)
        else:
            self.eval()

        nbatches = X.size(0) // batch_size

        pb = ProgressBar(mode, self.epoch, nbatches)
        _total_time = 0
        _total_loss = 0
        _total_word = 0

        L = nn.CrossEntropyLoss(size_average=False)

        for index in range(nbatches):
            begin = index * batch_size
            end = begin + batch_size

            # Start
            if optimizer:
                start = time.time()
                x = Variable(X[begin:end], requires_grad=False)
                t = Variable(Y[begin:end], requires_grad=False)
            else:
                start = time.time()
                x = Variable(X[begin:end], requires_grad=False, volatile=True)
                t = Variable(Y[begin:end], requires_grad=False, volatile=True)
            y = self(x)
            loss = L(y, t.view(-1))

            if optimizer:
                if clip:
                    torch.nn.utils.clip_grad_norm(self.parameters(), clip)
                self.zero_grad()
                loss.backward()
                optimizer.step()
            # End
            time_per_batch = time.time() - start
            _total_time += time_per_batch
            _total_loss += loss.cpu().data.numpy()[0]
            _total_word += float(numpy.prod(t.size()))
            pb.update([('ppl', numpy.exp(_total_loss / _total_word),
                        lambda x: x),
                       ('wps', _total_word / _total_time, lambda x: x)])

        print
        return numpy.exp(_total_loss / _total_word), _total_word / _total_time
示例#23
0
    def __init__(self, args):
        self.initial_lr = args.learning_rate
        self.lr = args.learning_rate
        self.test_only = args.test_only
        self.dump_statistics = args.dump_statistics
        self.modelName = args.model
        self.experiment = args.experiment
        self.log_path = args.log_path
        self.save_path = args.save_path

        if not os.path.isdir(self.log_path):
            os.makedirs(self.log_path)

        self.logger = Logger(
            '%s/%s_%s.csv' % (self.log_path, self.modelName, args.experiment),
            'epoch, time, learning_rate, tr_loss, tr_acc, val_loss, val_acc')
        self.progress_bar = ProgressBar()
        self.chrono = Chrono()

        self.trainset, self.testset, self.trainloader, self.testloader = dataloader(
        )

        print('==> Building model..')
        self.ae = AutoEncoder()
        self.model = getattr(models, self.modelName)()

        if self.modelName == 'bit':
            self.model.load_from(
                numpy.load('./state_dicts/%s.npz' % self.modelName))

        if torch.cuda.is_available():
            self.ae = torch.nn.DataParallel(self.ae)
            self.model = torch.nn.DataParallel(self.model)
            torch.backends.cudnn.benchmark = True

        self.optimizer = torch.optim.SGD(self.model.parameters(),
                                         lr=self.lr,
                                         momentum=0.9)

        self.load_ae()
        if args.resume or self.test_only or self.dump_statistics:
            self.load()

        self.criterion = torch.nn.CrossEntropyLoss()
        self.criterion = get_torch_vars(self.criterion, False)

        self.ae = get_torch_vars(self.ae, False)
        self.model = get_torch_vars(self.model, False)
示例#24
0
def get_all_ranking(save_to):
    from utils import ProgressBar
    fout = open(save_to, 'w')

    images = Image.objects.all()
    progress = ProgressBar(len(images) * len(images), 20)
    for target in images:
        searcher = Searcher(target)
        searcher.run()
        results = []
        for _, image in searcher.results:
            results.append((image.origin_id, len(results)))
            progress.update()
        results.sort()
        print >> fout, ' '.join(str(x) for _, x in results)
    print('Finished. Written to file "{}"'.format(save_to))
示例#25
0
def test(test_data_loader, model):
    srocc = SROCC()
    plcc = PLCC()
    rmse = RMSE()
    len_test = len(test_data_loader)
    pb = ProgressBar(len_test, show_step=True)

    print("Testing")

    model.eval()
    with torch.no_grad():
        for i, ((img, ref), score) in enumerate(test_data_loader):
            img, ref = img.cuda(), ref.cuda()
            output = model(img, ref).cpu().data.numpy()
            score = score.data.numpy()

            srocc.update(score, output)
            plcc.update(score, output)
            rmse.update(score, output)

            pb.show(
                i, "Test: [{0:5d}/{1:5d}]\t"
                "Score: {2:.4f}\t"
                "Label: {3:.4f}".format(i + 1, len_test, float(output),
                                        float(score)))

    print("\n\nSROCC: {0:.4f}\n"
          "PLCC: {1:.4f}\n"
          "RMSE: {2:.4f}".format(srocc.compute(), plcc.compute(),
                                 rmse.compute()))
示例#26
0
文件: iqa.py 项目: Gavinylk/CNN-FRIQA
def train(train_loader, model, criterion, optimizer, epoch):
    losses = AverageMeter()
    len_train = len(train_loader)
    pb = ProgressBar(len_train-1)

    print("Training")

    # Switch to train mode
    model.train()
    criterion.cuda()
    for i, ((img,ref), score) in enumerate(train_loader):
        img, ref, score = img.cuda(), ref.cuda(), score.squeeze().cuda()

        # Compute output
        output = model(img, ref)
        loss = criterion(output, score)

        # Measure accuracy and record loss
        losses.update(loss.data, img.shape[0])

        # Compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        pb.show(i, '[{0:5d}/{1:5d}]\t'
                'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                .format(i, len_train, loss=losses))
示例#27
0
    def create_examples(self, lines, example_type, cached_file, save_cache):
        '''
		Creates examples for data
		'''
        label_list = self.get_labels()
        if cached_file and cached_file.exists():
            logger.info("Loading examples from cached file %s", cached_file)
            examples = torch.load(cached_file)
        else:
            pbar = ProgressBar(n_total=len(lines), desc='create examples')
            examples = []
            for i, line in enumerate(lines):
                #if i>20:break # for quik debug
                guid = '%s-%d' % (example_type, i)
                label = line['tags']
                text_a = line['info']
                text_b = None
                match = line["cira_match"]

                if self.test_mode == 4 and sum(match) < 4:
                    continue
                else:
                    examples.append(
                        InputExample(guid=guid,
                                     text_a=text_a,
                                     text_b=text_b,
                                     label=label,
                                     match=match))
                pbar(step=i)

            if save_cache:
                logger.info("Saving examples into cached file %s", cached_file)

                torch.save(examples, cached_file)
        return examples
示例#28
0
文件: iqa.py 项目: Gavinylk/CNN-FRIQA
def validate(val_loader, model, criterion, show_step=False):
    losses = AverageMeter()
    srocc = SROCC()
    len_val = len(val_loader)
    pb = ProgressBar(len_val-1, show_step=show_step)

    print("Validation")

    # Switch to evaluate mode
    model.eval()

    with torch.no_grad():
        for i, ((img,ref), score) in enumerate(val_loader):
            img, ref, score = img.cuda(), ref.cuda(), score.squeeze().cuda()

            # Compute output
            output = model(img, ref)
            
            loss = criterion(output, score)
            losses.update(loss.data, img.shape[0])

            output = output.cpu().data
            score = score.cpu().data
            srocc.update(score.numpy(), output.numpy())

            pb.show(i, '[{0:5d}/{1:5d}]\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Output {out:.4f}\t'
                    'Target {tar:.4f}\t'
                    .format(i, len_val, loss=losses, 
                    out=output, tar=score))


    return float(1.0-srocc.compute())  # losses.avg
示例#29
0
    def _write_per_dir(self):
        """
        write outputs of each directory to a different file
        """

        bag_of_words = BagOfWordsParser(self._words_filter)
        reviews_per_dir = defaultdict(list)

        flags = Reviewer._get_flags_text(self._output_per_dir,
                                         self._output_in_svm_light,
                                         self._output_in_tfdidf)

        for dir in map(os.path.abspath, self._dirs):
            numoffiles = len(os.listdir(dir))
            filename = Reviewer._get_filename(dir, flags)

            reviews = reviews_per_dir[filename]
            prefix = "crunching reviews for '{}'".format(filename)

            with ProgressBar(prefix, numoffiles) as pb:
                for review in bag_of_words.parse_dir(dir):
                    pb.report()
                    reviews.append(review)

            assert numoffiles == len(reviews)

        # we update here because the bag of words is not updated yet
        # when we're still reading the files in the directories
        for filename, reviews in reviews_per_dir.items():
            self._write(filename, bag_of_words, reviews)
示例#30
0
def get_all_ranking(save_to):
    from utils import ProgressBar
    fout = open(save_to, 'w')

    images = Image.objects.all()
    progress = ProgressBar(len(images) * len(images), 20)
    for target in images:
        searcher = Searcher(target)
        searcher.run()
        results = []
        for _, image in searcher.results:
            results.append((image.origin_id, len(results)))
            progress.update()
        results.sort()
        print >> fout, ' '.join(str(x) for _, x in results)
    print('Finished. Written to file "{}"'.format(save_to))
def trip_distribution(trajectory, N, epsilon):
    """

    获取行程分布

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        R: 转移概率矩阵

    """
    R = np.zeros((N, N))  # 每个格子建立转移概率矩阵
    for t in trajectory:
        if len(t) > 1:
            sta = t[0]
            end = t[-1]
            R[sta][end] += 1
        else:
            print(t)

    count = np.sum(R)
    print(count)

    p = ProgressBar(N, '建立转移概率矩阵')
    for i in range(N):
        p.update(i)
        for j in range(N):
            # 添加拉普拉斯噪声
            # sensitivity = 1
            # randomDouble = random.random() - 0.5
            # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log(
            #     1 - 2 * abs(randomDouble))
            noise = np.random.laplace(0, 1 / epsilon)

            R[i][j] += noise

            if R[i][j] < 0:
                R[i][j] = 0

            # 是否计算加完噪声后的|D|, 存疑
            # count += R[i][j]
    R /= count

    return R
def markov_model(trajectory, N, epsilon):
    """

    马尔可夫模型

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        O_: 中间点转移概率矩阵

    """
    O_ = np.zeros((N, N))  # 建立 N*N 的转移概率矩阵
    for t in trajectory:
        O_sub = np.zeros((N, N))
        for i in range(len(t) - 1):
            curr_point = t[i]
            next_point = t[i + 1]
            O_sub[curr_point][next_point] += 1
        O_sub /= (len(t) - 1)  # 该轨迹的转移概率
        O_ += O_sub

    p = ProgressBar(N, '生成中间点转移概率矩阵')
    for i in range(N):
        p.update(i)
        for j in range(N):
            noise = np.random.laplace(0, 1 / epsilon)  # 添加拉普拉斯噪声
            O_[i][j] += noise

            if O_[i][j] < 0:
                O_[i][j] = 0

    # compute X
    row_sum = [sum(O_[i]) for i in range(N)]
    for j in range(N):
        O_[j] /= row_sum[j]

    # 绘制矩阵热力图
    sns.heatmap(data=O_, square=True)
    plt.title('mobility model construction matrix (epsilon=%s)' %
              str(used_pair[0]))
    plt.show()

    return O_
示例#33
0
def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    bar = ProgressBar(hp.n_iter, unit='')
    for i in range(hp.n_iter):
        bar.update(i)
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(
            X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)
示例#34
0
def predict(f_enc, f_dec, samples, batches, mat, max_len, header):
    '''
    Sample words and compute the prediction error
    '''
    preds = []
    errs = []
    progress = ProgressBar(numpy.sum([len(batch) for batch in batches]), 20,
                           header)
    for batch in batches:
        x, mask_x, y, mask_y = load_batch(samples, batch, mat)
        [prev_h] = f_enc(x, mask_x)

        n_steps = mask_x.sum(0)
        n_samples = x.shape[1]
        sents = numpy.zeros((n_samples, max_len), 'int32')
        # First step - No embedded word is fed into the decoder
        sents[:, 0], prev_h = f_dec(numpy.asarray([-1] * n_samples, 'int32'),
                                    prev_h)
        n_ends = n_steps - (sents[:, 0] == 0)

        for i in range(1, max_len - 1):
            prev_words = sents[:, i - 1]
            if not n_ends.any():
                break

            next_words, prev_h = f_dec(prev_words, prev_h)
            sents[:, i] = next_words * (n_ends > 0)
            n_ends -= (next_words == 0) * (n_ends > 0)

        for i in range(n_samples):
            idx = 0
            while idx < max_len and n_steps[i] > 0:
                if sents[i, idx] == 0:
                    n_steps[i] -= 1
                idx += 1
            preds.append(sents[i, :idx].tolist())

        y = numpy.concatenate(
            [y, numpy.zeros((max_len - len(y), n_samples), 'int32')]).T
        mask_y = numpy.concatenate(
            [mask_y,
             numpy.zeros((max_len - len(mask_y), n_samples), 'int32')]).T
        errs.extend(((sents != y) * mask_y * 1.).sum(1) / mask_y.sum(1))
        progress.disp(errs, ' ERR')

    return preds, numpy.mean(errs)
示例#35
0
def evaluate(epoch, model, val_loader, criterion, log_path):
    model.eval()
    val_progressor = ProgressBar(log_path,
                                 mode="Val  ",
                                 epoch=epoch,
                                 total_epoch=config.epochs,
                                 model_name=config.model_name,
                                 total=len(val_loader))
    losses = AverageMeter()
    top1 = AverageMeter()

    with torch.no_grad():
        for index, (data, label) in enumerate(val_loader):
            val_progressor.current = index
            data = Variable(data).cuda()
            label = Variable(torch.from_numpy(np.asarray(label))).cuda()
            output = model(data)
            loss = criterion(output, label)

            p_top1, p_top2 = accuracy(output, label, topk=(1, 2))
            losses.update(loss.item(), data.size(0))
            top1.update(p_top1[0], data.size(0))
            val_progressor.current_loss = losses.avg
            val_progressor.current_top1 = top1.avg
            val_progressor()
            #print('epoch %d validate iteration %d: loss: %.3f' % (epoch + 1, index + 1, it_loss.data))
            #correct += (output == label).sum()
        val_progressor.done()
    return losses.avg, top1.avg
示例#36
0
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32) # batch_size * internal_state_dimension
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size]) # 128 * 100

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:,t].fill(t)
        '''
        time = array([[ 0,  1,  2, ..., 97, 98, 99], 
        ..., 
       	[ 0,  1,  2, ..., 97, 98, 99]], dtype=int32) 128 * 100
        '''

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in range(N):
            if self.show: bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                # generate a randome number for 100 to the length of data
                m = random.randrange(self.mem_size, len(data))
                # for this batch b, the target data[m] is set to be one
                target[b][data[m]] = 1
                # the context is range from (m - self.mem_size) to m
                context[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.input: x,
                                                    self.time: time,
                                                    self.target: target,
                                                    self.context: context})
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost/N/self.batch_size
示例#37
0
    def evaluate(self, samples):
        """Evaluate a set of samples using RMSE."""
        ssq_error = 0
        progress_bar = ProgressBar()
        for i, sample in enumerate(samples):
            progress_bar.refresh(i / len(samples))
            ssq_error += self.sq_error(sample)
        progress_bar.refresh(1.0)
        progress_bar.clear()

        return sqrt(ssq_error / len(samples))
示例#38
0
    def train(self, data):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim], dtype=np.float32)      # (N, 150) Will fill with 0.1
        T = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)    # (N, 100) Will fill with 0..99
        target = np.zeros([self.batch_size, self.nwords])                   # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(self.init_u)   # (N, 150) Fill with 0.1 since we do not need query in the language model.
        for t in range(self.mem_size):   # (N, 100) 100 memory cell with 0 to 99 time sequence.
            T[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=n_batch)

        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)      # (128, 10,000)
            for b in range(self.batch_size):
                # We random pick a word in our data and use that as the word we need to predict using the language model.
                m = random.randrange(self.mem_size, len(data))
                target[b][data[m]] = 1                       # Set the one hot vector for the target word to 1

                # (N, 100). Say we pick word 1000, we then fill the memory using words 1000-150 ... 999
                # We fill Xi (sentence) with 1 single word according to the word order in data.
                sentences[b] = data[m - self.mem_size:m]

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.u: u,
                                                    self.T: T,
                                                    self.target: target,
                                                    self.sentences: sentences})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/n_batch/self.batch_size
示例#39
0
def export_to_xml(settings):
    progress_bar = ProgressBar(settings.nb_rrd_files)

    try:
        os.makedirs(settings.paths['xml'])
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    for domain, host, plugin, field in settings.iter_fields():
        _field = settings.domains[domain].hosts[host].plugins[plugin].fields[field]

        if _field.rrd_found:
            progress_bar.update()

            code = subprocess.check_call(['rrdtool', 'dump', _field.rrd_filename, _field.xml_filename])
            if code == 0:
                _field.rrd_exported = True

    return progress_bar.current
示例#40
0
def predict(f_enc, f_dec, samples, batches, mat, max_len, header):
    '''
    Sample words and compute the prediction error
    '''
    preds = []
    errs = []
    progress = ProgressBar(numpy.sum([len(batch) for batch in batches]), 20, header)
    for batch in batches:
        x, mask_x, y, mask_y = load_batch(samples, batch, mat)
        [prev_h] = f_enc(x, mask_x)

        n_steps = mask_x.sum(0)
        n_samples = x.shape[1]
        sents = numpy.zeros((n_samples, max_len), 'int32')
        # First step - No embedded word is fed into the decoder
        sents[:, 0], prev_h = f_dec(numpy.asarray([-1] * n_samples, 'int32'), prev_h)
        n_ends = n_steps - (sents[:, 0] == 0)

        for i in range(1, max_len - 1):
            prev_words = sents[:, i - 1]
            if not n_ends.any():
                break

            next_words, prev_h = f_dec(prev_words, prev_h)
            sents[:, i] = next_words * (n_ends > 0)
            n_ends -= (next_words == 0) * (n_ends > 0)

        for i in range(n_samples):
            idx = 0
            while idx < max_len and n_steps[i] > 0:
                if sents[i, idx] == 0:
                    n_steps[i] -= 1
                idx += 1
            preds.append(sents[i, : idx].tolist())

        y = numpy.concatenate([y, numpy.zeros((max_len - len(y), n_samples), 'int32')]).T
        mask_y = numpy.concatenate([mask_y, numpy.zeros((max_len - len(mask_y), n_samples), 'int32')]).T
        errs.extend(((sents != y) * mask_y * 1.).sum(1) / mask_y.sum(1))
        progress.disp(errs, ' ERR')

    return preds, numpy.mean(errs)
async def quote_many(num_quotes=1, conn_limit=20, progress=None, step=10):
    if progress is None:
        progress = ProgressBar()
        progress.max = num_quotes // step

    logger.info('Process total %d quotes with max %d concurrent connections'
                % (num_quotes, conn_limit))
    logger.debug('... progress bar increment step size: %d coroutines' % step)

    semaphore = asyncio.Semaphore(conn_limit)

    coro_to_fut = asyncio.ensure_future
    futures = [
        coro_to_fut(quote_with_lock(semaphore))
        for i in range(num_quotes)
    ]

    t_start = datetime.today()
    for ith, fut in enumerate(asyncio.as_completed(futures), 1):
        if ith % step == 0:
            progress.next()
        await fut
    t_end = datetime.today()
    progress.finish()

    logger.info('All coroutines complete in {:.2f} seconds'.format(
        (t_end - t_start).total_seconds()
    ))
    quotes = [fut.result() for fut in futures]
    return quotes
示例#42
0
    def gen_words(self, data, dummy_idx, N):
        # N = int(math.ceil(len(data) / self.batch_size))
        data = data.copy()
        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Generating', max=N)

        for idx in range(N):
            if self.show:
                bar.next()

        for n in range(N):
            context = np.zeros([self.batch_size, self.mem_size]) + dummy_idx

            min_len = min(len(data), self.mem_size)
            context[:, -min_len:] = data[-min_len:]

            prediction = self.sess.run(self.output, feed_dict={
                    self.input: x,
                    self.time: time,
                    self.target: target,
                    self.context: context})

            predicted_word_index = np.argmax(prediction[0])
            print(predicted_word_index)
            data = np.append(data, predicted_word_index)

        if self.show:
            bar.finish()
        return data
示例#43
0
    def generate(self):
        progress_bar = ProgressBar(self.settings.nb_rrd_files)

        self.add_header(self.settings)

        for domain in self.settings.domains:
            for host in self.settings.domains[domain].hosts:
                row = self.add_row("{0} / {1}".format(domain, host))
                for plugin in self.settings.domains[domain].hosts[host].plugins:
                    _plugin = self.settings.domains[domain].hosts[host].plugins[plugin]
                    panel = row.add_panel(_plugin.settings["graph_title"] or plugin, plugin)

                    for field in _plugin.fields:
                        query = panel.add_query(field)
                        if "label" in _plugin.fields[field].settings:
                            query.alias = _plugin.fields[field].settings["label"]
                        progress_bar.update()

                    panel.width = 12//self.settings.grafana['graph_per_row']
                    panel.process_graph_settings(_plugin.settings)
                    panel.process_graph_thresholds(_plugin.fields)
                    panel.process_graph_types(_plugin.fields)
示例#44
0
    def test(self, data, label='Test'):
        n_batch = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        u = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        T = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        sentences = np.ndarray([self.batch_size, self.mem_size])

        u.fill(self.init_u)
        for t in range(self.mem_size):
            T[:,t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=n_batch)

        m = self.mem_size
        for idx in range(n_batch):
            if self.show:
                bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                target[b][data[m]] = 1
                sentences[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run([self.loss], feed_dict={self.u: u,
                                                         self.T: T,
                                                         self.target: target,
                                                         self.sentences: sentences})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/n_batch/self.batch_size
示例#45
0
    def test(self, data, label='Test'):
        N = int(math.ceil(len(data) / self.batch_size))
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords])  # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid)
        for t in range(self.mem_size):
            time[:, t].fill(t)

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar(label, max=N)

        m = self.mem_size
        for idx in range(N):
            if self.show:
                bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                target[b][data[m]] = 1
                context[b] = data[m - self.mem_size:m]
                m += 1

                if m >= len(data):
                    m = self.mem_size

            loss = self.sess.run([self.loss], feed_dict={self.input: x,
                                                         self.time: time,
                                                         self.target: target,
                                                         self.context: context})
            cost += np.sum(loss)

        if self.show:
            bar.finish()
        return cost/N/self.batch_size
示例#46
0
    def train(self, data):
        N = int(math.ceil(len(data) / self.batch_size)) # math.ceil : returns smallest integer not less than x.
        cost = 0

        x = np.ndarray([self.batch_size, self.edim], dtype=np.float32)
        time = np.ndarray([self.batch_size, self.mem_size], dtype=np.int32)
        target = np.zeros([self.batch_size, self.nwords]) # one-hot-encoded
        context = np.ndarray([self.batch_size, self.mem_size])

        x.fill(self.init_hid) # 초기화 QA 테스크와는 달리 질문이 없기 때문에 0.1로 된 상수 벡터로 고정한다(embedding도 x)
        for t in range(self.mem_size):
            time[:,t].fill(t) # [[0,1,2,3,4,...,mem_size] ... ]

        if self.show:
            from utils import ProgressBar
            bar = ProgressBar('Train', max=N)

        for idx in range(N):
            if self.show: bar.next()
            target.fill(0)
            for b in range(self.batch_size):
                m = random.randrange(self.mem_size, len(data)) # 100~ x  에서 하나를 가져와서..
                target[b][data[m]] = 1 # 타겟을 랜덤으로 고르는건가?
                context[b] = data[m - self.mem_size:m] # 그 단어의 100번째 전 단어까지를 context로 사용

            _, loss, self.step = self.sess.run([self.optim,
                                                self.loss,
                                                self.global_step],
                                                feed_dict={
                                                    self.input: x, # 0.1로 고정된 벡터
                                                    self.time: time, # temporal encoding을 위한 memory slot lookup용
                                                    self.target: target, # one-hot encoding된 101번째 예측되는 단어
                                                    self.context: context}) # 그 전의 100개의 단어
            cost += np.sum(loss)

        if self.show: bar.finish()
        return cost/N/self.batch_size
示例#47
0
async def save_profiles(names):
    conn = aiohttp.TCPConnector(limit=50, verify_ssl=False)
    with aiohttp.ClientSession(connector=conn) as session:
        ps = [Profile(name, session) for name in names]
        futures = [asyncio.ensure_future(p.get_info()) for p in ps]
        futures += [asyncio.ensure_future(p.get_publications()) for p in ps]

        progress, step = ProgressBar(), 10
        progress.max = len(futures) // step

        for i, future in enumerate(asyncio.as_completed(futures), 1):
            if i % step == 0:
                progress.next()
            await future
        progress.finish()

    return [future.result() for future in futures]
示例#48
0
def quote_many(num_quotes=1, conn_limit=20, progress=None, step=10):
    if progress is None:
        progress = ProgressBar()
        progress.max = num_quotes // step

    logger.info('Process total %d quotes with max %d concurrent connections'
                % (num_quotes, conn_limit))
    logger.debug('... progress bar increment step size: %d coroutines' % step)

    semaphore = asyncio.Semaphore(conn_limit)

    # wrap coroutines with future
    # For Python 3.4.4+, asyncio.ensure_future(...)
    # will wrap coro as Task and keep input the same
    # if it is already Future.
    try:
        coro_to_fut = asyncio.ensure_future
    except AttributeError:
        logger.warning('asyncio.ensure_future requires Python 3.4.4+. '
                       'Fall back to asyncio.async')
        coro_to_fut = asyncio.async
    futures = [
        coro_to_fut(quote_with_lock(semaphore))
        for i in range(num_quotes)
    ]

    t_start = datetime.today()
    for ith, fut in enumerate(asyncio.as_completed(futures), 1):
        if ith % step == 0:
            progress.next()
        yield from fut
    t_end = datetime.today()
    progress.finish()

    logger.info('All coroutines complete in {:.2f} seconds'.format(
        (t_end - t_start).total_seconds()
    ))
    quotes = [fut.result() for fut in futures]
    return quotes
示例#49
0
def predict(f_enc, f_dec, samples, batches, mat, beam_size, max_len, header):
    '''
    Sample words and compute the prediction error
    '''
    preds = []
    errs = []
    progress = ProgressBar(numpy.sum([len(batch) for batch in batches]), 20, header)
    for batch in batches:
        x, mask_x, y, mask_y = load_batch(samples, batch, mat)
        [init_h] = f_enc(x, mask_x)

        n_steps = mask_x.sum(0)
        n_samples = x.shape[1]
        prev_sents = numpy.zeros((beam_size, n_samples, max_len), 'int32')
        # First step - No embedded word is fed into the decoder
        prev_words = numpy.asarray([-1] * n_samples, 'int32')
        prev_sents[:, :, 0], prev_log_prob, prev_h = f_dec(prev_words, init_h)
        prev_h = numpy.tile(prev_h, (beam_size, 1, 1))
        prev_n_ends = n_steps - (prev_sents[:, :, 0] == 0)

        for i in range(1, max_len - 1):
            hypo_sents = [[]] * n_samples
            hypo_log_prob = [[]] * n_samples
            hypo_h = [[]] * n_samples
            hypo_n_ends = [[]] * n_samples
            has_hypos = numpy.asarray([False] * n_samples)
            for j in range(beam_size):
                if not prev_n_ends[j].any():
                    continue

                next_words, next_log_prob, next_h = f_dec(prev_sents[j, :, i - 1], prev_h[j])
                for k in range(n_samples):
                    if prev_n_ends[j, k] > 0:
                        has_hypos[k] = True
                        next_sents = numpy.tile(prev_sents[j, k], (beam_size, 1))
                        next_sents[:, i] = next_words[:, k]
                        hypo_sents[k].extend(next_sents)
                        hypo_log_prob[k].extend(next_log_prob[:, k] + prev_log_prob[j, k])
                        hypo_h[k].extend([next_h[k]] * beam_size)
                        hypo_n_ends[k].extend(prev_n_ends[j, k] - (next_words[:, k] == 0))
                    else:
                        hypo_sents[k].append(prev_sents[j, k].copy())
                        hypo_log_prob[k].append(prev_log_prob[j, k])
                        hypo_h[k].append(prev_h[j, k].copy())
                        hypo_n_ends[k].append(0)

            if not has_hypos.any():
                break

            for j in range(n_samples):
                if not has_hypos[j]:
                    continue

                indices = numpy.argsort(hypo_log_prob[j])[: -beam_size - 1: -1]
                for k in range(beam_size):
                    prev_sents[k, j] = hypo_sents[j][indices[k]]
                    prev_log_prob[k, j] = hypo_log_prob[j][indices[k]]
                    prev_h[k, j] = hypo_h[j][indices[k]]
                    prev_n_ends[k, j] = hypo_n_ends[j][indices[k]]

        sents = prev_sents[prev_log_prob.argmax(0), numpy.arange(n_samples)]
        for i in range(n_samples):
            idx = 0
            while idx < max_len and n_steps[i] > 0:
                if sents[i, idx] == 0:
                    n_steps[i] -= 1
                idx += 1
            preds.append(sents[i, : idx].tolist())

        y = numpy.concatenate([y, numpy.zeros((max_len - len(y), n_samples), 'int32')]).T
        mask_y = numpy.concatenate([mask_y, numpy.zeros((max_len - len(mask_y), n_samples), 'int32')]).T
        errs.extend(((sents != y) * mask_y * 1.).sum(1) / mask_y.sum(1))
        progress.disp(errs, ' ERR')

    return preds, numpy.mean(errs)
示例#50
0
def main(
    # Dataset Configuration
    path_train='../train.json',             # Path to load training set
    path_val='../val.json',                 # Path to load validation set
    path_mat_train='../VGG19_train.npy',    # Path of image features of training set
    path_mat_val='../VGG19_val.npy',        # Path of image features of validation set
    max_samples_train=0,                    # Max number of samples in training set
    max_samples_val=0,                      # Max number of samples in validation set
    # Model Configuration
    n_dim_img=4096,                         # Dimension of image feature
    n_dim_txt=250,                          # Dimension of word embedding
    n_dim_enc=1000,                         # Number of hidden units in encoder
    n_dim_dec=1000,                         # Number of hidden units in decoder
    batch_size_train=64,                    # Batch size in training
    batch_size_test=64,                     # Batch size in validation
    optimizer=adadelta,                     # [sgd|adam|adadelta|rmsprop], sgd not recommanded
    lrate=0.0002,                           # Learning rate for optimizer
    max_epochs=1000,                        # Maximum number of epoch to run
    patience=10,                            # Number of epoch to wait before early stop if no progress
    # Frequency
    ratio_val=1.,                           # Validation frequency - Validate model after trained by this ratio of data
    ratio_save=1.,                          # Save frequency - Save the best parameters after trained by this ratio of data
    # Save & Load
    path_load=None,                         # Path to load a previouly trained model
    path_save='model',                      # Path to save the models
):
    '''
    Main function
    '''
    print('Loading data...')
    n_dim_vocab = 0                                             # Vocabulary size
    samples_train, mat_train, n_dim_vocab = load_data(path_train, path_mat_train, n_dim_vocab, max_samples_train)
    samples_val, mat_val, n_dim_vocab = load_data(path_val, path_mat_val, n_dim_vocab, max_samples_val)

    print('\ttraining:   %6d samples' % len(samples_train))
    print('\tvalidation: %6d samples' % len(samples_val))

    t_params = OrderedDict()
    best_params = None
    costs = []
    if path_load:
        best_params = OrderedDict(numpy.load(path_load))
        costs.extend(best_params['costs'])
        del best_params['costs']
        init_t_params(best_params, t_params)

    print('Building model...')
    f_cost, f_update = build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer)

    print('Training...')
    time_start = time.time()
    batches_val = get_batches(len(samples_val), batch_size_test)
    n_epochs = 0
    n_samples = 0
    n_bad_costs = 0
    n_stops = 0
    next_val = ratio_val * len(samples_train)
    next_save = max(ratio_save * len(samples_train), next_val)
    while n_epochs < max_epochs:
        n_epochs += 1
        batches_train = get_batches(len(samples_train), batch_size_train, True)
        pgb_train = ProgressBar(len(batches_train), 20, 'EPOCH %4d ' % n_epochs)
        costs_train = []
        for batch_train in batches_train:
            n_samples += len(batch_train)
            get_cost(f_cost, samples_train, batch_train, mat_train, costs_train, pgb_train, f_update, lrate)

            if n_samples >= next_val:
                next_val += ratio_val * len(samples_train)
                pgb_train.pause()
                pgb_val = ProgressBar(len(batches_val), 20, 'VALIDATION ')
                costs_val = []
                for batch_val in batches_val:
                    get_cost(f_cost, samples_val, batch_val, mat_val, costs_val, pgb_val)
                costs.append(numpy.mean(costs_val))

                if best_params is None or costs[-1] <= numpy.min(costs):
                    best_params = params_unzip(t_params)
                    n_bad_costs = 0
                else:
                    n_bad_costs += 1
                    if n_bad_costs > patience:
                        n_stops += 1
                        print('WARNING: early stop for %d time(s)!' % n_stops)
                        params_zip(best_params, t_params)
                        n_bad_costs = 0

            if path_save and n_samples >= next_save:
                next_save = max(next_save + ratio_save * len(samples_train), next_val)
                pgb_train.pause()
                print('Saving model...')
                if best_params is not None:
                    params = best_params
                else:
                    params = params_unzip(t_params)
                numpy.savez(path_save, costs=costs, **params)
                numpy.savez('%s_%f' % (path_save, costs_train[-1]), costs=costs, **params_unzip(t_params))

    time_end = time.time()
    print('Training finished')
    print('TIME: %9.3f sec    EPOCHS: %4d    SPEED: %9.3f sec/epoch' % (time_end - time_start, n_epochs, (time_end - time_start) / n_epochs))

    if best_params is not None:
        params_zip(best_params, t_params)
    else:
        best_params = params_unzip(t_params)

    print('Saving final model...')
    if path_save:
        numpy.savez(path_save, costs=costs, **best_params)

    print('Done.')
示例#51
0
    def import_from_xml(self):
        print("\nUploading data to InfluxDB:")
        progress_bar = ProgressBar(self.settings.nb_rrd_files*3)  # nb_files * (read + upload + validate)
        errors = []

        def _upload_and_validate(measurement, tags, fields, packed_values):
            try:
                self.write_series(measurement, tags, fields, packed_values)
            except Exception as e:
                errors.append((Symbol.NOK_RED, "Error writing {0} to InfluxDB: {1}".format(measurement, e)))
                return
            finally:
                progress_bar.update(len(fields)-1)  # 'time' column ignored

            try:
                self.validate_record(measurement, fields)
            except Exception as e:
                errors.append((Symbol.WARN_YELLOW, "Validation error in {0}: {1}".format(measurement, e)))
            finally:
                progress_bar.update(len(fields)-1)  # 'time' column ignored

        try:
            assert self.client and self.valid
        except:
            raise Exception("Not connected to a InfluxDB server")
        else:
            print("  {0} Connection to database \"{1}\" OK".format(Symbol.OK_GREEN, self.settings.influxdb['database']))

        if self.settings.influxdb['group_fields']:
            """
            In "group_fields" mode, all fields of a same plugin (ex: system, user, nice, idle... of CPU usage)
             will be represented as columns of the same time series in InfluxDB.

             Schema will be:
                +----------------------+-------+----------+----------+-----------+
                |   time_series_name   | col_0 |  col_1   |  col_2   | col_3 ... |
                +----------------------+-------+----------+----------+-----------+
                | domain.host.plugin   | time  | metric_1 | metric_2 | metric_3  |
                | acadis.org.tesla.cpu | time  | system   | user     | nice      |
                | ...                  |       |          |          |           |
                +----------------------+-------+----------+----------+-----------+
            """
            for domain, host, plugin in self.settings.iter_plugins():
                _plugin = self.settings.domains[domain].hosts[host].plugins[plugin]
                measurement = plugin
                tags = {
                    "domain": domain,
                    "host": host,
                    "plugin": plugin
                }
                if _plugin.is_multigraph:
                    tags["is_multigraph"] = True
                    print(host, plugin)

                field_names = ['time']
                values = defaultdict(list)
                values_with_time = []

                for field in _plugin.fields:
                    _field = _plugin.fields[field]

                    if _field.rrd_exported:
                        field_names.append(field)
                        try:
                            content = read_xml_file(_field.xml_filename)
                        except Exception as e:
                            errors.append((Symbol.WARN_YELLOW, "Could not read file for {0}: {1}".format(field, e)))
                        else:
                            [values[key].append(value) for key, value in content.items()]

                            # keep track of influxdb storage info to allow 'fetch'
                            _field.influxdb_measurement = measurement
                            _field.influxdb_field = field
                            _field.xml_imported = True

                    # update progress bar [######      ] 42 %
                    progress_bar.update()

                # join data with time as first column
                values_with_time.extend([[k]+v for k, v in values.items()])

                _upload_and_validate(measurement, tags, field_names, values_with_time)

        else:  # non grouping
            """
            In "non grouped" mode, all fields of a same plugin will have a dedicated time series and the values
             will be written to a 'value' column

             Schema will be:
                +-----------------------------+-------+-------+
                |      time_series_name       | col_0 | col_1 |
                +-----------------------------+-------+-------+
                | domain.host.plugin.metric_1 | time  | value |
                | domain.host.plugin.metric_2 | time  | value |
                | acadis.org.tesla.cpu.system | time  | value |
                | ...                         |       |       |
                +-----------------------------+-------+-------+
            """
            for domain, host, plugin, field in self.settings.iter_fields():
                _field = self.settings.domains[domain].hosts[host].plugins[plugin].fields[field]
                if not _field.rrd_exported:
                    continue
                measurement = field
                tags = {
                    "domain": domain,
                    "host": host,
                    "plugin": plugin
                }
                field_names = ['time', 'value']
                values = defaultdict(list)
                values_with_time = []

                _field.influxdb_measurement = measurement
                _field.influxdb_field = 'value'

                content = read_xml_file(_field.xml_filename)
                [values[key].append(value) for key, value in content.items()]
                _field.xml_imported = True
                progress_bar.update()

                # join data with time as first column
                values_with_time.extend([[k]+v for k, v in values.items()])
                _upload_and_validate(measurement, tags, field_names, values_with_time)

        for error in errors:
            print("  {} {}".format(error[0], error[1]))
示例#52
0
def discover_from_rrd(settings, insert_missing=True, print_missing=False):
    """
    Builds a Munin dashboard structure (domain/host/plugins) by listing the files in the RRD folder

    http://munin-monitoring.org/wiki/MuninFileNames:
    /var/lib/munin/SomeGroup/foo.example.com-cpu-irq-d.rrd
               --------- --------------- --- --- -
                   |            |         |   |  `-- Data type (a = absolute, c = counter, d = derive, g = gauge)
                   |            |         |   `----- Field name / data source: 'irq'
                   |            |         `--------- Plugin name: 'cpu'
                   |            `------------------- Node name: 'foo.example.com'
                   `-------------------------------- Group name: 'SomeGroup'
    """

    folder = settings.paths['munin']
    print "Reading Munin RRD cache: ({0})".format(folder)

    not_inserted = defaultdict(dict)

    for domain in os.listdir(folder):
        if not os.path.isdir(os.path.join(folder, domain)):
            #domains are represented as folders
            continue

        if not insert_missing and not domain in settings.domains:
            #skip unknown domains (probably no longer wanted)
            continue

        files = os.listdir(os.path.join(folder, domain))
        progress_bar = ProgressBar(len(files), title=domain)
        for filename in files:
            progress_bar.update()

            path = os.path.join(folder, domain, filename)
            if os.path.isdir(path) or not path.endswith(".rrd"):
                # not a RRD database
                continue

            parts = os.path.splitext(filename)[0].split('-')
            length = len(parts)

            if(length < 4):
                print "Error:", filename, parts, length
                continue

            host, plugin, field, datatype = parts[0], ".".join(parts[1:-2]), parts[-2], parts[-1]

            if not insert_missing and (not host in settings.domains[domain].hosts or not plugin in settings.domains[domain].hosts[host].plugins):
                if not host in not_inserted[domain]:
                    not_inserted[domain][host] = set()
                not_inserted[domain][host].add(plugin)
                continue

            plugin_data = settings.domains[domain].hosts[host].plugins[plugin]
            try:
                assert os.path.exists(os.path.join(folder, domain, "{0}-{1}-{2}-{3}.rrd".format(host, plugin.replace(".", "-"), field, datatype[0])))
            except AssertionError:
                print "{0} != {1}-{2}-{3}-{4}.rrd".format(filename, host, plugin, field, datatype[0])
                plugin_data.fields[field].rrd_found = False
            else:
                plugin_data.fields[field].rrd_found = True
                plugin_data.fields[field].rrd_filename = os.path.join(settings.paths['munin'], domain, filename)
                plugin_data.fields[field].xml_filename = os.path.join(settings.paths['xml'], domain, filename.replace(".rrd", ".xml"))
                plugin_data.fields[field].settings = {
                    "type": DATA_TYPES[datatype]
                }
                settings.nb_fields += 1

    if print_missing and len(not_inserted):
        print "The following plugin databases were ignored"
        for domain, hosts in not_inserted.items():
            print "  - Domain {0}:".format(domain)
            for host, plugins in hosts.items():
                print "    {0} Host {1}: {2}".format(Symbol.NOK_RED, host, ", ".join(plugins))

    return settings