Пример #1
0
def discover_from_www(settings):
    """
    Builds a Munin dashboard structure (domain/host/plugins) by reading the HTML files
    rather than listing the cache folder because the later is likely to contain old data
    """

    # delayed  import since this function should not be used in the "normal" case
    try:
        from bs4 import BeautifulSoup
    except ImportError:
        try:
            from BeautifulSoup import BeautifulSoup
        except ImportError:
            print "Please install BeautifulSoup to use this program"
            print "  pip install beautifulsoup4 or easy_install beautifulsoup4"
            sys.exit(1)

    folder = settings.paths['www']

    print "Reading Munin www cache: ({0})".format(folder)
    with open(os.path.join(folder, "index.html")) as f:
        root = BeautifulSoup(f.read())

    domains = root.findAll("span", {"class": "domain"})

    # hosts and domains are at the same level in the tree so let's open the file
    for domain in domains:
        with open(os.path.join(folder, domain.text, "index.html")) as f:
            domain_root = BeautifulSoup(f.read())

        links = domain_root.find(id="content").findAll("a")
        progress_bar = ProgressBar(len(links), title=domain.text)

        for link in links:
            progress_bar.update()

            elements = link.get("href").split("/")
            if len(elements) < 2 \
                or elements[0].startswith("..") \
                or elements[-1].startswith("index"):
                continue

            if len(elements) == 2:
                host, plugin = elements[0], elements[1]
            elif len(elements) == 3:
                # probably a multigraph, we'll be missing the plugin part
                # we won't bother reading the html file for now and guess it from the RRD database later
                host, plugin = elements[0], ".".join(elements[1:3])
            else:
                print "Unknown structure"
                continue

            plugin = plugin.replace(".html", "")
            settings.domains[domain.text].hosts[host].plugins[plugin].is_multigraph = (len(elements) == 3)
            settings.domains[domain.text].hosts[host].plugins[plugin].settings = {
                'graph_title': link.text,
            }
            settings.nb_plugins += 1

    return settings
Пример #2
0
 def make_pdb(self, bar_msg=''):
     """
     Returns a pdb-like formatted string. bar_msg is a string with message to show at ProgressBar initialization.
     bar_msg = '' disables the bar.
     :param bar_msg: str
     :return: str
     """
     models = self.models()
     if bar_msg:
         bar = ProgressBar(len(models), bar_msg)
     else:
         bar = None
     if len(models) == 1:
         s = self.__repr__()
     else:
         s = ''
         for m in models:
             s += 'MODEL%9i\n' % m[0].model
             s += m.__repr__()
             s += '\nENDMDL\n'
             if bar:
                 bar.update()
     if bar:
         bar.done(False)
     return s
Пример #3
0
    def _preprocess(self):
        self.lang = Lang()
        for text in self._texts:
            self.lang.index_text(text)

        for text in self._texts:
            indexes = indexes_from_text(self.lang, text)
            indexes.append(EOT_token)
            padded_indexes = pad_indexes(indexes, self._max_text_length,
                                         PAD_token)
            self._indexed_texts.append(padded_indexes)

        self._indexed_texts = np.stack(self._indexed_texts, axis=0)

        bar = ProgressBar(len(self._audio_files) - 1, unit='')
        for (audio_files_read, audio_file) in enumerate(self._audio_files):
            # (n_mels, T), (1+n_fft/2, T)
            mel, mag = compute_spectrograms(audio_file)
            padded_mel = pad_time_dim(mel, self._max_audio_length, 0)
            padded_mag = pad_time_dim(mag, self._max_audio_length, 0)
            self._mels.append(padded_mel.transpose())
            self._mags.append(padded_mag.transpose())

            bar.update(audio_files_read)

        self._mels = np.stack(self._mels, axis=0)
        self._mags = np.stack(self._mags, axis=0)
Пример #4
0
 def fit(self, train_df, regressors=None):
     print("Fitting...")
     progress_bar = ProgressBar(len(train_df.columns))
     for item in train_df.columns:
         self.models[item] = Prophet(
             yearly_seasonality=self.yearly_seasonality,
             weekly_seasonality=self.weekly_seasonality,
             daily_seasonality=self.daily_seasonality,
             **self.prophet_config)
         target = train_df[item].dropna()
         if self.use_boxcox:
             idx = target.index
             target, self.lmbda_boxcox[item] = boxcox(target)
             target = pd.Series(target, index=idx)
         target.index.name = "ds"
         target.name = "y"
         if self.country_holidays is not None:
             self.models[item].add_country_holidays(country_name=self.country_holidays)
         if regressors is not None:
             target = pd.merge(target, regressors, left_index=True, right_index=True, how="left")
             for reg in regressors.columns:
                 self.models[item].add_regressor(reg)
         target = target.reset_index()
         self.models[item].fit(target)
         progress_bar.update()
     progress_bar.finish()
     return self.models
Пример #5
0
    def generate(self):
        progress_bar = ProgressBar(self.settings.nb_rrd_files)

        self.add_header(self.settings)

        for domain in self.settings.domains:
            for host in self.settings.domains[domain].hosts:
                row = self.add_row("{0} / {1}".format(domain, host))
                for plugin in self.settings.domains[domain].hosts[
                        host].plugins:
                    _plugin = self.settings.domains[domain].hosts[
                        host].plugins[plugin]
                    panel = row.add_panel(
                        _plugin.settings["graph_title"] or plugin, plugin)

                    for field in _plugin.fields:
                        query = panel.add_query(field)
                        if "label" in _plugin.fields[field].settings:
                            query.alias = _plugin.fields[field].settings[
                                "label"]
                        progress_bar.update()

                    panel.width = 12 // self.settings.grafana['graph_per_row']
                    panel.process_graph_settings(_plugin.settings)
                    panel.process_graph_thresholds(_plugin.fields)
                    panel.process_graph_types(_plugin.fields)
Пример #6
0
    def train(self, ts, cm, batchsz=1):
        self.model.train()

        start_time = time.time()

        steps = int(math.floor(len(ts) / float(batchsz)))

        shuffle = np.random.permutation(np.arange(steps))
        pg = ProgressBar(steps)
        cm.reset()

        total_loss = 0
        for i in range(steps):
            self.optimizer.zero_grad()
            si = shuffle[i]
            x, y = self._batch(ts, si, batchsz)
            pred = self.model(x)
            loss = self.crit(pred, y)
            total_loss += loss.data[0]
            loss.backward()
            self._add_to_cm(cm, y, pred)
            self.optimizer.step()
            pg.update()
        pg.done()

        duration = time.time() - start_time
        total_corr = cm.get_correct()
        total = cm.get_total()

        print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' %
              (float(total_loss) / total, total_corr, total,
               float(total_corr) / total, duration))
        print(cm)
Пример #7
0
 def predict(self, steps):
     print("Forecasting...")
     progress_bar = ProgressBar(len(self.models.items()))
     self.fcst_ds = pd.date_range(
         start=self.train_ds.min(), 
         freq="D", 
         periods=len(self.train_ds)+steps)[-365:]
     for item, model in self.models.items():
         pred = model.predict(
             exogenous=fourier(
                 steps, 
                 seasonality=self.seasonality, 
                 n_terms=self.n_fourier_terms),
             n_periods=steps, 
             return_conf_int=True,
             alpha=(1.0 - self.confidence_interval))
         self.fcst[item] = pd.DataFrame(
             {"yhat":pred[0],
              "yhat_lower":pred[1][:,0],
              "yhat_upper":pred[1][:,1]},
             index=self.fcst_ds)
         if self.use_boxcox:
             self.fcst[item] = inv_boxcox(
                 self.fcst[item], 
                 self.lmbda_boxcox[item])
         progress_bar.update()
     progress_bar.finish()
     return pd.concat(self.fcst, axis=1)
Пример #8
0
def parallel_test(model_cls,
                  model_kwargs,
                  checkpoint,
                  dataset,
                  data_func,
                  gpus,
                  worker_per_gpu=1):
    ctx = multiprocessing.get_context('spawn')
    idx_queue = ctx.Queue()
    result_queue = ctx.Queue()
    num_workers = len(gpus) * worker_per_gpu
    workers = [
        ctx.Process(target=worker_func,
                    args=(model_cls, model_kwargs, checkpoint, dataset,
                          data_func, gpus[i % len(gpus)], idx_queue,
                          result_queue)) for i in range(num_workers)
    ]
    for w in workers:
        w.daemon = True
        w.start()

    for i in range(len(dataset)):
        idx_queue.put(i)

    results = {}
    prog_bar = ProgressBar(task_num=len(dataset))
    for _ in range(len(dataset)):
        img_id, res = result_queue.get()
        results[img_id] = format_ret(res)
        prog_bar.update()
    print('\n')
    for worker in workers:
        worker.terminate()

    return results
Пример #9
0
    def train(self, ts, batchsz):
        self.model.train()

        start_time = time.time()

        steps = int(math.floor(len(ts) / float(batchsz)))
        shuffle = np.random.permutation(np.arange(steps))
        total_loss = total = 0
        pg = ProgressBar(steps)
        for i in range(steps):
            self.optimizer.zero_grad()

            si = shuffle[i]
            ts_i = data.batch(ts, si, batchsz, long_tensor_alloc, tensor_shape,
                              tensor_max)
            src, dst, tgt = self._wrap(ts_i)
            pred = self.model((src, dst))
            loss = self.crit(pred, tgt)
            total_loss += loss.data[0]
            loss.backward()
            torch.nn.utils.clip_grad_norm(self.model.parameters(), self.clip)

            total += self._total(tgt)
            self.optimizer.step()
            pg.update()
        pg.done()
        duration = time.time() - start_time

        avg_loss = float(total_loss) / total

        print('Train (Loss %.4f) (Perplexity %.4f) (%.3f sec)' %
              (avg_loss, np.exp(avg_loss), duration))
Пример #10
0
def plot_traj(trajs,
              fig_size=(6, 6),
              color="mediumpurple",
              size=5,
              title='',
              is_plot_line=False,
              od_only=False,
              offset=None):
    """plot the traj
    """
    if offset is None:
        offset = [0, 0]
    p = ProgressBar(len(trajs), '绘制轨迹图')
    plt.figure(figsize=fig_size)
    for i in range(len(trajs)):
        p.update(i)
        traj = np.array(trajs[i])
        if od_only:
            traj = [traj[0], traj[-1]]
        x = [x[0] + np.random.uniform(-offset[0], offset[0]) for x in traj]
        y = [y[1] + np.random.uniform(-offset[1], offset[1]) for y in traj]

        if od_only:
            if is_plot_line:
                plt.plot(x[0], y[0], c=color)
                plt.plot(x[1], y[1], c="yellowgreen")
            plt.scatter(x[0], y[0], c=color, s=size)
            plt.scatter(x[1], y[1], c="yellowgreen", s=size)
        else:
            if is_plot_line:
                plt.plot(x, y, c=color)
            plt.scatter(x, y, c=color, s=size)
    plt.title(title)
    plt.show()
Пример #11
0
    def rmsd_matrix(self, msg=''):
        """
        Calculates rmsd matrix with no fitting for all pairs od models in trajectory.
        :return: np.array
        """
        def rmsd(m1, m2, ml):
            return np.sqrt(np.sum((m1 - m2)**2) / ml)

        model_length = len(self.template)
        models = self.coordinates.reshape(-1, model_length, 3)
        dim = len(models)
        result = np.zeros((dim, dim))
        if msg:
            bar = ProgressBar((dim * dim - dim) / 2, msg=msg)
        else:
            bar = None
        for i in range(dim):
            for j in range(i + 1, dim):
                if bar:
                    bar.update()
                result[i, j] = result[j, i] = rmsd(models[i], models[j],
                                                   model_length)
        if bar:
            bar.done(True)
        return result
def load_images(
    path,
    preprocessor=None,
    limit=None,
):
    images = []
    images_id = next(os.walk(path))[2]
    size = limit if limit != None else len(images_id)
    print(f"Loading {size} images")

    prog = ProgressBar(100, size)

    for id in range(size):
        name = images_id[id]

        filename = path + "/" + name
        image = load_img(filename, target_size=(224, 224))
        image = img_to_array(image)
        image = image.reshape(
            (1, image.shape[0], image.shape[1], image.shape[2]))

        if preprocessor != None:
            image = preprocessor.preprocess_input(image)

        image_id = name.split('.')[0]
        images.append([image_id, image])

        prog.update(id)

    print("Loading complete")
    return images
Пример #13
0
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER):
    """
    Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation
    Converts all *.rrd files in source folder
    """
    assert os.path.exists(source)
    try:
        os.makedirs(destination)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    filelist = [("", os.path.join(source, file)) for file in os.listdir(source)
                if file.endswith(".rrd")]
    nb_files = len(filelist)
    progress_bar = ProgressBar(nb_files)

    print("Exporting {0} RRD databases:".format(nb_files))

    for domain, file in filelist:
        src = os.path.join(source, domain, file)
        dst = os.path.join(
            destination, "{0}-{1}".format(domain,
                                          file).replace(".rrd", ".xml"))
        progress_bar.update()

        code = subprocess.check_call(['rrdtool', 'dump', src, dst])

    return nb_files
Пример #14
0
def export_to_xml_in_folder(source, destination=Defaults.MUNIN_XML_FOLDER):
    """
    Calls "rrdtool dump" to convert RRD database files in "source" folder to XML representation
    Converts all *.rrd files in source folder
    """
    assert os.path.exists(source)
    try:
        os.makedirs(destination)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    filelist = [("", os.path.join(source, file)) for file in os.listdir(source) if file.endswith(".rrd")]
    nb_files = len(filelist)
    progress_bar = ProgressBar(nb_files)

    print "Exporting {0} RRD databases:".format(nb_files)

    for domain, file in filelist:
        src = os.path.join(source, domain, file)
        dst = os.path.join(destination, "{0}-{1}".format(domain, file).replace(".rrd", ".xml"))
        progress_bar.update()

        code = subprocess.check_call(['rrdtool', 'dump', src, dst])

    return nb_files
Пример #15
0
    def train(self, ts, cm, dropout, batchsz=1):

        total_loss = 0
        start_time = time.time()
        steps = int(math.floor(len(ts)/float(batchsz)))
        shuffle = np.random.permutation(np.arange(steps))
        pg = ProgressBar(steps)
        cm.reset()

        for i in range(steps):
            si = shuffle[i]
            ts_i = data.batch(ts, si, batchsz)
            feed_dict = self.model.ex2dict(ts_i, 1.0-dropout)
        
            _, step, summary_str, lossv, guess = self.sess.run([self.train_op, self.global_step, self.summary_op, self.loss, self.model.best], feed_dict=feed_dict)
            self.train_writer.add_summary(summary_str, step)
            total_loss += lossv
            cm.add_batch(ts_i.y, guess)
            pg.update()

        pg.done()
        total = cm.get_total()
        total_corr = cm.get_correct()
        duration = time.time() - start_time

        print('Train (Loss %.4f) (Acc %d/%d = %.4f) (%.3f sec)' % (float(total_loss)/total, total_corr, total, float(total_corr)/total, duration))
        print(cm)
Пример #16
0
    def train(self, ts):
        self.model.train()

        start_time = time.time()

        steps = int(len(ts))
        shuffle = np.random.permutation(np.arange(steps))

        total_loss = total_corr = total = 0
        pg = ProgressBar(steps)
        for i in range(steps):
            self.optimizer.zero_grad()
            si = shuffle[i]
            src, dst, tgt = self._wrap(ts[si])
            pred = self.model((src, dst))
            loss = self.crit(pred, tgt)
            total_loss += loss.data[0]
            loss.backward()

            total_corr += self._right(pred, tgt)
            total += self._total(tgt)
            self.optimizer.step()
            pg.update()
        pg.done()
        duration = time.time() - start_time

        avg_loss = float(total_loss) / total

        print(
            'Train (Loss %.4f) (Perplexity %.4f) (Acc %d/%d = %.4f) (%.3f sec)'
            % (avg_loss, np.exp(avg_loss), total_corr, total,
               float(total_corr) / total, duration))
Пример #17
0
 def stability_derivatives(self):
     prog = ProgressBar('Instantiating Stability Object')
     derivatives = StabilityDerivatives(u=self.initial_trim_case.u, w=self.initial_trim_case.w, q=0,
                                        theta_f=self.initial_trim_case.fuselage_tilt,
                                        collective_pitch=self.initial_trim_case.collective_pitch,
                                        longitudinal_cyclic=self.initial_trim_case.longitudinal_cyclic)
     prog.update(100)
     return derivatives
Пример #18
0
    def specific_entropy(self):
        """ Allows look-up of reference Specific Entropy `s` of the ambient flow depending on the ambient static
        temperature :pr:attr:`engine_in.ambient.t_static` and ambient static pressure
        `py:attr:`engine_in.ambient.p_static`. If a cached value is already present in the look-up table then the online
        calculator won't be accessed. Otherwise, this process takes some time as a request to the server has to be made.

        :return: Specific Entropy in SI Joule per kilogram Kelvin [J/kg K]
        :rtype: float
        """
        prog = ProgressBar('Fetching Entropy Value')
        temp = self.engine_in.ambient.t_static - 273.15
        pres = self.engine_in.ambient.p_static / 1000.
        tol = {'temperature': 10., 'pressure': 1.}  # Tolerances for cache search procedure

        # Opening cache as read-only file to look-up stored values
        with open(DIRS['ENTROPY_TABLE_DIR']) as cache:
            entries = cache.readlines()[1:]
            valid_entries = []
            for entry in entries:
                try:
                    cached_temp, cached_pres, cached_s0 = entry.split('\t')
                    cached_temp, cached_pres, cached_s0 = float(cached_temp), float(cached_pres), float(cached_s0)
                    temp_error, pres_error = temp - cached_temp, pres - cached_pres
                    if temp_error < tol['temperature'] or pres_error < tol['pressure']:
                        # [0] Temperature [1] Pressure [2] Entropy [3] Squared Error
                        valid_entries += [(cached_temp, cached_pres, cached_s0, (temp_error + pres_error)**2)]
                except Exception as e:
                    raise e

        if len(valid_entries) == 0:  # No valid entries found online database must be accessed
            with requests.Session() as session:
                prog.update(25, 'Loading Session')
                calculator_url = 'http://www.irc.wisc.edu/properties/'
                data = {'units': 'International',
                        'fluid': 'Dry Air',
                        'parameter1': 'T',  # State 1 = Temperature
                        'parameter2': 'P',  # State 2 = Pressure
                        'state1': '{}'.format(temp),  # Degrees Celcius
                        'state2': '{}'.format(pres),  # Absolute Pressure in kPa
                        'calculate': 'Calculate Properties'}
                prog.update(35, 'Posting Request to Database')
                response = session.post(calculator_url, data=data)
                prog.update(90, 'Parsing Response')
                result_hdr = ('<!-- Calculation Results -->', '</table')  # Tuple of start [0] end [1] strings

                raw = response.text.split(result_hdr[0])[-1].split(result_hdr[1])[0]
                filtered = raw.replace('\t', '').replace('\n', '').split('Entropy: ')[-1].split(' J')[0]
                s0 = float(filtered)  # Entropy in J/kg K

                # Writing value to cache and returning
                with open(DIRS['ENTROPY_TABLE_DIR'], 'a') as cache:
                    cache.write('\n{:.10f}\t{:.10f}\t{:.10f}'.format(temp, pres, s0))
                prog.update(100, 'Complete')
                return s0
        else:
            closest_match = sorted(valid_entries, key=lambda x: x[-1])[0]  # Returns entry with minimum sq. Error
            prog.update(100, 'Complete')
            return closest_match[2]
Пример #19
0
def main(argv=None):
    with tf.Session() as sess:
        data_dir = FLAGS.data_dir
        files = [os.path.join(data_dir, item) for item in os.listdir(data_dir)]
        # files = random.sample(files,  800)
        images = tf.placeholder(tf.float32,
                                [None, RESIZE_FINAL, RESIZE_FINAL, 3])
        logits = inference(
            images,
            False,
            num_classes=2,
            num_blocks=[3, 4, 6, 3],  # defaults to 50-layer network
            use_bias=False,  # defaults to using batch norm
            bottleneck=True)
        init = tf.global_variables_initializer()
        resnet_variables = tf.global_variables()
        saver = tf.train.Saver(resnet_variables)
        saver.restore(sess, os.path.join(FLAGS.model_dir, FLAGS.ckpt_file))

        softmax_output = tf.nn.softmax(logits)
        if FLAGS.target:
            print('Creating output file %s' % FLAGS.target)
            output = open(os.path.join(FLAGS.data_dir, FLAGS.target), 'w')
            writer = csv.writer(output)
            writer.writerow(('file', 'label', 'score'))

        num_batches = int(math.ceil(len(files)) / MAX_BATCH_SZ)
        pg = ProgressBar(num_batches)
        # try:
        for j in range(num_batches):
            start_offset = j * MAX_BATCH_SZ
            end_offset = min((j + 1) * MAX_BATCH_SZ, len(files))

            batch_image_files = files[start_offset:end_offset]
            images_ = []
            for file in batch_image_files:
                print file
                image_buffer = tf.read_file(file)
                bbox = []
                image = image_preprocessing(image_buffer, [], False)
                images_.append(image)
            image_batch = tf.stack(images_)
            batch_results = sess.run(softmax_output,
                                     feed_dict={images: image_batch.eval()})
            batch_sz = batch_results.shape[0]

            for i in range(batch_sz):
                output_i = batch_results[i]
                best_i = np.argmax(output_i)

                best_choice = (label_list[best_i], output_i[best_i])
                if writer is not None:
                    f = batch_image_files[i]
                    writer.writerow(
                        (f, best_choice[0], '%.2f' % best_choice[1]))
            pg.update()
        pg.done()
Пример #20
0
    def import_from_xml_folder(self, folder):
        raise DeprecationWarning

        # build file list and grouping if necessary
        file_list = os.listdir(folder)
        grouped_files = defaultdict(list)
        errors = []
        progress_bar = ProgressBar(len(file_list))

        for file in file_list:
            fullname = os.path.join(folder, file)
            parts = file.replace(".xml", "").split("-")
            series_name = ".".join(parts[0:-2])
            if self.settings.influxdb['group_fields']:
                grouped_files[series_name].append((parts[-2], fullname))
            else:
                grouped_files[".".join([series_name, parts[-2]])].append(('value', fullname))

        if self.settings.interactive:
            show = raw_input("Would you like to see the prospective series and columns? y/[n]: ") or "n"
            if show in ("y", "Y"):
                for series_name in sorted(grouped_files):
                    print("  - {2}{0}{3}: {1}".format(series_name, [name for name, _ in grouped_files[series_name]], Color.GREEN, Color.CLEAR))

        print("Importing {0} XML files".format(len(file_list)))
        for series_name in grouped_files:
            data = []
            keys_name = ['time']
            values = defaultdict(list)
            for field, file in grouped_files[series_name]:
                progress_bar.update()

                keys_name.append(field)

                content = read_xml_file(file)
                [values[key].append(value) for key, value in content.items()]

            # join data with time as first column
            data.extend([[k]+v for k, v in values.items()])

            try:
                pass
                # self.upload_values(series_name, keys_name, data)
            except Exception as e:
                errors.append(str(e))
                continue

            try:
                self.validate_record(series_name, keys_name)
            except Exception as e:
                errors.append("Validation error in {0}: {1}".format(series_name, e))

        if errors:
            print("The following errors were detected while importing:")
            for error in errors:
                print("  {0} {1}".format(Symbol.NOK_RED, error))
Пример #21
0
def write2files(init_path, file_list, D, write_file_num=14650):
    """写入文件
    """
    rand_ind = random.sample([i for i in range(len(file_list))],
                             write_file_num)  # 随机抽取轨迹
    p2 = ProgressBar(write_file_num, '写入文件')
    for i in range(write_file_num):
        p2.update(i)
        with open(init_path + file_list[rand_ind[i]], 'w') as f2:
            for step in D[rand_ind[i]]:
                f2.writelines(str(step[0]) + ',' + str(step[1]) + '\n')
def extract_features(images, model):
    features = dict()
    count = 0
    prog = ProgressBar(100, len(images))

    for id, image in images:
        feature = model.predict(image, verbose=0)
        features[id] = feature
        count += 1
        prog.update(count)

    return features
Пример #23
0
def markov_model(trajectory, N, epsilon):
    """basic description

    detailed description

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        O_: 中间点转移概率矩阵

    """
    O_ = np.zeros([N, N])  # 建立N*N的转移概率矩阵
    for t in trajectory:
        O_0 = np.zeros([N, N])
        for i in range(len(t) - 1):
            curr_point = t[i]
            next_point = t[i + 1]
            O_0[curr_point][next_point] += 1
        O_0 = O_0 / (len(t) - 1)  # 该轨迹的转移概率
        O_ += O_0

    line_all = []
    p = ProgressBar(N, '建立中间点转移概率矩阵')
    for i in range(N):
        p.update(i)
        score = 0
        for j in range(N):
            # 添加拉普拉斯噪声
            # sensitivity = 1
            # randomDouble = random.random() - 0.5
            # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log(
            #     1 - 2 * abs(randomDouble))

            noise = np.random.laplace(0, 1 / epsilon)
            # noise = 0.00000000000000000000000001
            O_[i][j] += noise
            if O_[i][j] < 0:
                O_[i][j] = 0
            score += O_[i][j]
        line_all.append(score)

    # compute X,归一
    for i in range(N):
        O_[i] /= line_all[i]

    sns.heatmap(data=O_, square=True)
    plt.show()

    return O_
def one_run(projects_train, projects_test, K, outlier_threshold, granularity):
    rmse_failed_run = []
    rmse_success_run = []
    rmse_run = []
    accuracy_run = []
    relative_time = np.linspace(0.025, 1, 20)
    bar = ProgressBar(end_value=len(relative_time), text="Time steps", count=True)
    bar.start()
    for i, rel_t in enumerate(relative_time):
        # Data
        t = int(rel_t * 999)
        samples = subsample(t, granularity)
        t = len(samples)
        T = 999

        # Remove outliers
        projects_train_filtered = [p for p in projects_train if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)]
        projects_test_filtered = [p for p in projects_test if np.all((p.money[T] - outlier_threshold) <= 0) and np.all((p.money[samples] - outlier_threshold) <= 0)]

        X_train = np.ndarray(shape=(len(projects_train_filtered), t), buffer=np.array([p.money[samples] for p in projects_train_filtered]), dtype=float)
        y_train = np.expand_dims(np.array([p.money[T] for p in projects_train_filtered]), axis=1)
        X_test = np.ndarray(shape=(len(projects_test_filtered), t), buffer=np.array([p.money[samples] for p in projects_test_filtered]), dtype=float)
        y_test = np.expand_dims(np.array([p.money[T] for p in projects_test_filtered]), axis=1)

        #X_max = np.max(X_train, axis=0)
        #X_train = X_train / X_max[np.newaxis, :]
        #X_test = X_test / X_max[np.newaxis, :]

        # Hyperparameters
        beta = 0.0001
        epsilon = 1e0
        lam = 0
        iterations = 50
        random_restarts = None

        mls = LeastSquaresMixture(X_train, y_train,
                                  K=K, beta=beta, lam=lam,
                                  iterations=iterations, epsilon=epsilon, random_restarts=random_restarts)
        mls.train(verbose=False)
        #print(mls)

        rmse_failed, rmse_success, rmse, accuracy = mls.evaluate(X_test, y_test, verbose=False)
        rmse_failed_run.append(rmse_failed)
        rmse_success_run.append(rmse_success)
        rmse_run.append(rmse)
        accuracy_run.append(accuracy)

        bar.update(i)

    print(accuracy_run)

    return rmse_failed_run, rmse_success_run, rmse_run, accuracy_run
Пример #25
0
    def run(self, mode, X, Y, batch_size, optimizer=None, clip=None):
        self.reset_states(batch_size)
        if optimizer:
            self.train(True)
        else:
            self.eval()

        nbatches = X.size(0) // batch_size

        pb = ProgressBar(mode, self.epoch, nbatches)
        _total_time = 0
        _total_loss = 0
        _total_word = 0

        L = nn.CrossEntropyLoss(size_average=False)

        for index in range(nbatches):
            begin = index * batch_size
            end = begin + batch_size

            # Start
            if optimizer:
                start = time.time()
                x = Variable(X[begin:end], requires_grad=False)
                t = Variable(Y[begin:end], requires_grad=False)
            else:
                start = time.time()
                x = Variable(X[begin:end], requires_grad=False, volatile=True)
                t = Variable(Y[begin:end], requires_grad=False, volatile=True)
            y = self(x)
            loss = L(y, t.view(-1))

            if optimizer:
                if clip:
                    torch.nn.utils.clip_grad_norm(self.parameters(), clip)
                self.zero_grad()
                loss.backward()
                optimizer.step()
            # End
            time_per_batch = time.time() - start
            _total_time += time_per_batch
            _total_loss += loss.cpu().data.numpy()[0]
            _total_word += float(numpy.prod(t.size()))
            pb.update([('ppl', numpy.exp(_total_loss / _total_word),
                        lambda x: x),
                       ('wps', _total_word / _total_time, lambda x: x)])

        print
        return numpy.exp(_total_loss / _total_word), _total_word / _total_time
Пример #26
0
def get_all_ranking(save_to):
    from utils import ProgressBar
    fout = open(save_to, 'w')

    images = Image.objects.all()
    progress = ProgressBar(len(images) * len(images), 20)
    for target in images:
        searcher = Searcher(target)
        searcher.run()
        results = []
        for _, image in searcher.results:
            results.append((image.origin_id, len(results)))
            progress.update()
        results.sort()
        print >> fout, ' '.join(str(x) for _, x in results)
    print('Finished. Written to file "{}"'.format(save_to))
Пример #27
0
def get_all_ranking(save_to):
    from utils import ProgressBar
    fout = open(save_to, 'w')

    images = Image.objects.all()
    progress = ProgressBar(len(images) * len(images), 20)
    for target in images:
        searcher = Searcher(target)
        searcher.run()
        results = []
        for _, image in searcher.results:
            results.append((image.origin_id, len(results)))
            progress.update()
        results.sort()
        print >> fout, ' '.join(str(x) for _, x in results)
    print('Finished. Written to file "{}"'.format(save_to))
def trip_distribution(trajectory, N, epsilon):
    """

    获取行程分布

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        R: 转移概率矩阵

    """
    R = np.zeros((N, N))  # 每个格子建立转移概率矩阵
    for t in trajectory:
        if len(t) > 1:
            sta = t[0]
            end = t[-1]
            R[sta][end] += 1
        else:
            print(t)

    count = np.sum(R)
    print(count)

    p = ProgressBar(N, '建立转移概率矩阵')
    for i in range(N):
        p.update(i)
        for j in range(N):
            # 添加拉普拉斯噪声
            # sensitivity = 1
            # randomDouble = random.random() - 0.5
            # noise = - (sensitivity / epsilon) * signum(randomDouble) * math.log(
            #     1 - 2 * abs(randomDouble))
            noise = np.random.laplace(0, 1 / epsilon)

            R[i][j] += noise

            if R[i][j] < 0:
                R[i][j] = 0

            # 是否计算加完噪声后的|D|, 存疑
            # count += R[i][j]
    R /= count

    return R
def markov_model(trajectory, N, epsilon):
    """

    马尔可夫模型

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        O_: 中间点转移概率矩阵

    """
    O_ = np.zeros((N, N))  # 建立 N*N 的转移概率矩阵
    for t in trajectory:
        O_sub = np.zeros((N, N))
        for i in range(len(t) - 1):
            curr_point = t[i]
            next_point = t[i + 1]
            O_sub[curr_point][next_point] += 1
        O_sub /= (len(t) - 1)  # 该轨迹的转移概率
        O_ += O_sub

    p = ProgressBar(N, '生成中间点转移概率矩阵')
    for i in range(N):
        p.update(i)
        for j in range(N):
            noise = np.random.laplace(0, 1 / epsilon)  # 添加拉普拉斯噪声
            O_[i][j] += noise

            if O_[i][j] < 0:
                O_[i][j] = 0

    # compute X
    row_sum = [sum(O_[i]) for i in range(N)]
    for j in range(N):
        O_[j] /= row_sum[j]

    # 绘制矩阵热力图
    sns.heatmap(data=O_, square=True)
    plt.title('mobility model construction matrix (epsilon=%s)' %
              str(used_pair[0]))
    plt.show()

    return O_
Пример #30
0
def spectrogram2wav(spectrogram):
    '''
    spectrogram: [t, f], i.e. [t, nfft // 2 + 1]
    '''
    spectrogram = spectrogram.T  # [f, t]
    X_best = copy.deepcopy(spectrogram)  # [f, t]
    bar = ProgressBar(hp.n_iter, unit='')
    for i in range(hp.n_iter):
        bar.update(i)
        X_t = invert_spectrogram(X_best)
        est = librosa.stft(
            X_t, hp.n_fft, hp.hop_length, win_length=hp.win_length)  # [f, t]
        phase = est / np.maximum(1e-8, np.abs(est))  # [f, t]
        X_best = spectrogram * phase  # [f, t]
    X_t = invert_spectrogram(X_best)

    return np.real(X_t)
Пример #31
0
 def predict(self, steps, freq="D", regressors=None):
     print("Forecasting...")
     progress_bar = ProgressBar(len(self.models.items()))
     for item, model in self.models.items():
         future = model.make_future_dataframe(steps, freq=freq).set_index("ds")
         if regressors is not None:
             future = pd.merge(future, regressors, left_index=True, right_index=True, how="left")
         pred = model.predict(future.reset_index()).set_index("ds")
         pred = pred[["yhat", "yhat_lower", "yhat_upper"]]
         self.fcst[item] = pred
         if self.use_boxcox:
             self.fcst[item] = inv_boxcox(
                 self.fcst[item], 
                 self.lmbda_boxcox[item])
         progress_bar.update()
     progress_bar.finish()
     fcst_df = pd.concat(self.fcst, axis=1).sort_index(axis=1)
     return fcst_df
Пример #32
0
def route_length_estimate(trajectory, A, lo, hi, epsilon, sensitivity):
    """

    轨迹长度估计

    Args:

    Returns:

    """
    C = A * A
    L_matrix = [[] for _ in range(C)]  # L矩阵
    L_array = []

    for t in trajectory:
        lenT = len(t)
        if lenT > hi:
            continue
        if lenT < 2 or lo > lenT:
            continue

        row = t[0]
        col = t[-1]
        l_index = row * A + col  # 转一维坐标
        L_matrix[l_index].append(lenT)

    p = ProgressBar(C, '计算轨迹中值长度矩阵')
    for i in range(C):
        p.update(i)
        score_arr = []
        K = L_matrix[i].copy()  # 取一种头尾轨迹的所有轨迹长
        K.sort()  # 顺序排序
        if len(K) < 1:
            L_array.append(0)
            continue
        m_index = len(K) / 2  # 中值下标
        for j in range(len(K)):
            score_arr.append(-abs(j - m_index))  # 得分函数
        r_index = exp_mechanism(score_arr, len(K), epsilon, sensitivity)
        # print(K, '--->', K[r_index])
        L_array.append(K[r_index])

    return L_array
def trip_distribution(trajectory, N, epsilon):
    """

    获取行程分布

    Args:
        trajectory: 轨迹数据(二维数组)
        N         : 二级网格数
        epsilon   : 隐私预算

    Returns:
        R: 转移概率矩阵

    """
    R = np.zeros((N, N))  # 每个格子建立转移概率矩阵
    for t in trajectory:
        if len(t) > 1:
            sta = t[0]
            end = t[-1]
            R[sta][end] += 1

    count = 0

    p = ProgressBar(N, '建立转移概率矩阵')
    for i in range(N):
        p.update(i)
        for j in range(N):
            # 添加拉普拉斯噪声
            sensitivity = 1
            randomDouble = random.random() - 0.5
            noise = -(sensitivity / epsilon) * signum(randomDouble) * math.log(
                1 - 2 * abs(randomDouble))

            R[i][j] += noise

            if R[i][j] < 0:
                R[i][j] = 0

            count += R[i][j]
    R /= count

    return R
Пример #34
0
def export_to_xml(settings):
    progress_bar = ProgressBar(settings.nb_rrd_files)

    try:
        os.makedirs(settings.paths['xml'])
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    for domain, host, plugin, field in settings.iter_fields():
        _field = settings.domains[domain].hosts[host].plugins[plugin].fields[field]

        if _field.rrd_found:
            progress_bar.update()

            code = subprocess.check_call(['rrdtool', 'dump', _field.rrd_filename, _field.xml_filename])
            if code == 0:
                _field.rrd_exported = True

    return progress_bar.current
Пример #35
0
    def generate(self):
        progress_bar = ProgressBar(self.settings.nb_rrd_files)

        self.add_header(self.settings)

        for domain in self.settings.domains:
            for host in self.settings.domains[domain].hosts:
                row = self.add_row("{0} / {1}".format(domain, host))
                for plugin in self.settings.domains[domain].hosts[host].plugins:
                    _plugin = self.settings.domains[domain].hosts[host].plugins[plugin]
                    panel = row.add_panel(_plugin.settings["graph_title"] or plugin, plugin)

                    for field in _plugin.fields:
                        query = panel.add_query(field)
                        if "label" in _plugin.fields[field].settings:
                            query.alias = _plugin.fields[field].settings["label"]
                        progress_bar.update()

                    panel.width = 12//self.settings.grafana['graph_per_row']
                    panel.process_graph_settings(_plugin.settings)
                    panel.process_graph_thresholds(_plugin.fields)
                    panel.process_graph_types(_plugin.fields)
Пример #36
0
def export_to_xml(settings):
    progress_bar = ProgressBar(settings.nb_rrd_files)

    try:
        os.makedirs(settings.paths['xml'])
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

    for domain, host, plugin, field in settings.iter_fields():
        _field = settings.domains[domain].hosts[host].plugins[plugin].fields[
            field]

        if _field.rrd_found:
            progress_bar.update()

            code = subprocess.check_call(
                ['rrdtool', 'dump', _field.rrd_filename, _field.xml_filename])
            if code == 0:
                _field.rrd_exported = True

    return progress_bar.current
Пример #37
0
def one_run(projects_train, projects_test, outlier_threshold):
    rmse_run = []
    accuracy_run = []
    relative_time = np.linspace(0.025, 1, 20)
    bar = ProgressBar(end_value=len(relative_time), text="Time steps", count=True)
    bar.start()
    for i, rel_t in enumerate(relative_time):
        # n_samples = 1
        # t0 = 1
        # t1 = 500
        # samples = subsample(t0, t1, n_samples)
        samples = rel_t * 1000 - 1
        t = 1
        T = 999
        ARD = False

        projects_train = [p for p in projects_train if
                          p.money[T] * p.goal < outlier_threshold and p.money[samples] * p.goal < outlier_threshold]
        projects_test = [p for p in projects_test if
                         p.money[T] * p.goal < outlier_threshold and p.money[samples] * p.goal < outlier_threshold]

        X_train = np.ndarray(shape=(len(projects_train), t),
                             buffer=np.array([p.money[samples] * p.goal for p in projects_train]), dtype=float)
        y_train = np.expand_dims(np.array([p.money[T] * p.goal for p in projects_train]), axis=1)
        X_test = np.ndarray(shape=(len(projects_test), t),
                            buffer=np.array([p.money[samples] * p.goal for p in projects_test]), dtype=float)
        y_test = np.expand_dims(np.array([p.money[T] * p.goal for p in projects_test]), axis=1)

        kernel = GPy.kern.RBF(input_dim=t, ARD=ARD)
        m = GPy.models.GPRegression(X_train, y_train, kernel)
        m.optimize()

        rmse, accuracy = evaluate(X_test, y_test, projects_test, m)
        rmse_run.append(rmse)
        accuracy_run.append(accuracy)

        bar.update(i)

    return rmse_run, accuracy_run
Пример #38
0
def discover_from_rrd(settings, insert_missing=True, print_missing=False):
    """
    Builds a Munin dashboard structure (domain/host/plugins) by listing the files in the RRD folder

    http://munin-monitoring.org/wiki/MuninFileNames:
    /var/lib/munin/SomeGroup/foo.example.com-cpu-irq-d.rrd
               --------- --------------- --- --- -
                   |            |         |   |  `-- Data type (a = absolute, c = counter, d = derive, g = gauge)
                   |            |         |   `----- Field name / data source: 'irq'
                   |            |         `--------- Plugin name: 'cpu'
                   |            `------------------- Node name: 'foo.example.com'
                   `-------------------------------- Group name: 'SomeGroup'
    """

    folder = settings.paths['munin']
    print "Reading Munin RRD cache: ({0})".format(folder)

    not_inserted = defaultdict(dict)

    for domain in os.listdir(folder):
        if not os.path.isdir(os.path.join(folder, domain)):
            #domains are represented as folders
            continue

        if not insert_missing and not domain in settings.domains:
            #skip unknown domains (probably no longer wanted)
            continue

        files = os.listdir(os.path.join(folder, domain))
        progress_bar = ProgressBar(len(files), title=domain)
        for filename in files:
            progress_bar.update()

            path = os.path.join(folder, domain, filename)
            if os.path.isdir(path) or not path.endswith(".rrd"):
                # not a RRD database
                continue

            parts = os.path.splitext(filename)[0].split('-')
            length = len(parts)

            if(length < 4):
                print "Error:", filename, parts, length
                continue

            host, plugin, field, datatype = parts[0], ".".join(parts[1:-2]), parts[-2], parts[-1]

            if not insert_missing and (not host in settings.domains[domain].hosts or not plugin in settings.domains[domain].hosts[host].plugins):
                if not host in not_inserted[domain]:
                    not_inserted[domain][host] = set()
                not_inserted[domain][host].add(plugin)
                continue

            plugin_data = settings.domains[domain].hosts[host].plugins[plugin]
            try:
                assert os.path.exists(os.path.join(folder, domain, "{0}-{1}-{2}-{3}.rrd".format(host, plugin.replace(".", "-"), field, datatype[0])))
            except AssertionError:
                print "{0} != {1}-{2}-{3}-{4}.rrd".format(filename, host, plugin, field, datatype[0])
                plugin_data.fields[field].rrd_found = False
            else:
                plugin_data.fields[field].rrd_found = True
                plugin_data.fields[field].rrd_filename = os.path.join(settings.paths['munin'], domain, filename)
                plugin_data.fields[field].xml_filename = os.path.join(settings.paths['xml'], domain, filename.replace(".rrd", ".xml"))
                plugin_data.fields[field].settings = {
                    "type": DATA_TYPES[datatype]
                }
                settings.nb_fields += 1

    if print_missing and len(not_inserted):
        print "The following plugin databases were ignored"
        for domain, hosts in not_inserted.items():
            print "  - Domain {0}:".format(domain)
            for host, plugins in hosts.items():
                print "    {0} Host {1}: {2}".format(Symbol.NOK_RED, host, ", ".join(plugins))

    return settings
Пример #39
0
    def import_from_xml(self):
        print("\nUploading data to InfluxDB:")
        progress_bar = ProgressBar(self.settings.nb_rrd_files*3)  # nb_files * (read + upload + validate)
        errors = []

        def _upload_and_validate(measurement, tags, fields, packed_values):
            try:
                self.write_series(measurement, tags, fields, packed_values)
            except Exception as e:
                errors.append((Symbol.NOK_RED, "Error writing {0} to InfluxDB: {1}".format(measurement, e)))
                return
            finally:
                progress_bar.update(len(fields)-1)  # 'time' column ignored

            try:
                self.validate_record(measurement, fields)
            except Exception as e:
                errors.append((Symbol.WARN_YELLOW, "Validation error in {0}: {1}".format(measurement, e)))
            finally:
                progress_bar.update(len(fields)-1)  # 'time' column ignored

        try:
            assert self.client and self.valid
        except:
            raise Exception("Not connected to a InfluxDB server")
        else:
            print("  {0} Connection to database \"{1}\" OK".format(Symbol.OK_GREEN, self.settings.influxdb['database']))

        if self.settings.influxdb['group_fields']:
            """
            In "group_fields" mode, all fields of a same plugin (ex: system, user, nice, idle... of CPU usage)
             will be represented as columns of the same time series in InfluxDB.

             Schema will be:
                +----------------------+-------+----------+----------+-----------+
                |   time_series_name   | col_0 |  col_1   |  col_2   | col_3 ... |
                +----------------------+-------+----------+----------+-----------+
                | domain.host.plugin   | time  | metric_1 | metric_2 | metric_3  |
                | acadis.org.tesla.cpu | time  | system   | user     | nice      |
                | ...                  |       |          |          |           |
                +----------------------+-------+----------+----------+-----------+
            """
            for domain, host, plugin in self.settings.iter_plugins():
                _plugin = self.settings.domains[domain].hosts[host].plugins[plugin]
                measurement = plugin
                tags = {
                    "domain": domain,
                    "host": host,
                    "plugin": plugin
                }
                if _plugin.is_multigraph:
                    tags["is_multigraph"] = True
                    print(host, plugin)

                field_names = ['time']
                values = defaultdict(list)
                values_with_time = []

                for field in _plugin.fields:
                    _field = _plugin.fields[field]

                    if _field.rrd_exported:
                        field_names.append(field)
                        try:
                            content = read_xml_file(_field.xml_filename)
                        except Exception as e:
                            errors.append((Symbol.WARN_YELLOW, "Could not read file for {0}: {1}".format(field, e)))
                        else:
                            [values[key].append(value) for key, value in content.items()]

                            # keep track of influxdb storage info to allow 'fetch'
                            _field.influxdb_measurement = measurement
                            _field.influxdb_field = field
                            _field.xml_imported = True

                    # update progress bar [######      ] 42 %
                    progress_bar.update()

                # join data with time as first column
                values_with_time.extend([[k]+v for k, v in values.items()])

                _upload_and_validate(measurement, tags, field_names, values_with_time)

        else:  # non grouping
            """
            In "non grouped" mode, all fields of a same plugin will have a dedicated time series and the values
             will be written to a 'value' column

             Schema will be:
                +-----------------------------+-------+-------+
                |      time_series_name       | col_0 | col_1 |
                +-----------------------------+-------+-------+
                | domain.host.plugin.metric_1 | time  | value |
                | domain.host.plugin.metric_2 | time  | value |
                | acadis.org.tesla.cpu.system | time  | value |
                | ...                         |       |       |
                +-----------------------------+-------+-------+
            """
            for domain, host, plugin, field in self.settings.iter_fields():
                _field = self.settings.domains[domain].hosts[host].plugins[plugin].fields[field]
                if not _field.rrd_exported:
                    continue
                measurement = field
                tags = {
                    "domain": domain,
                    "host": host,
                    "plugin": plugin
                }
                field_names = ['time', 'value']
                values = defaultdict(list)
                values_with_time = []

                _field.influxdb_measurement = measurement
                _field.influxdb_field = 'value'

                content = read_xml_file(_field.xml_filename)
                [values[key].append(value) for key, value in content.items()]
                _field.xml_imported = True
                progress_bar.update()

                # join data with time as first column
                values_with_time.extend([[k]+v for k, v in values.items()])
                _upload_and_validate(measurement, tags, field_names, values_with_time)

        for error in errors:
            print("  {} {}".format(error[0], error[1]))