Пример #1
0
def predict(saved_model, song):

	model = compile_and_load(saved_model)
	if os.is_dir(song):
		os.mkdir(song + '/predictions')
		L = os.listdir(song)
		for l in L:
			if l.split('.')[-1] == 'wav':
				separate_song(model, song + '/' + l, song + '/' + 'predictions')


	else:
		separate_song(model, song, 'predictions')
Пример #2
0
def apply_model(model_dir, datagen, train = True, output_write_path = 'test/predictions'):
	model = tf.keras.models.load_model(model_dir, compile = False)
	model = compile_model(model)

	if train:
		for x, y in datagen:
			model.fit(x, y)
			model.save(model_dir)

	else:
		if not os.is_dir(output_write_path):
			os.mkdir(output_write_path)
		for x in datagen:
			y = model.predict(x)
			Y = 
Пример #3
0
def create_list_of_files(abs_path, file_list=None):
    
    if file_list is None:
        file_list = []
    
    directory_files = os.list_dir(abs_path)
    
    for file_name in directory_files:
        file_path = os.path.join(abs_path, file_name)
        #if file_path is not a directory
        if not os.is_dir(file_path):
            file_list.append(file_path)
        else: #file_path is a directory; recurse
            create_list_of_files(file_path, file_list)
    
    if file_list:
        return file_list
    else:
        return []
Пример #4
0
    def restore(self, snapshotName, overwrite=False):
        # Get the configuration of the snapshot
        snapshot = self.configManager.getSnapshot(snapshotName)
        if snapshot:
            # See if we already have an existing instance with this name
            matchingInstances = [
                i for i in self.configManager.instances()
                if i['name'] == snapshot['instance-name']
            ]
            if len(matchingInstances) == 0 or (len(matchingInstances) == 1
                                               and overwrite):
                # In case the instance already exists, wipe it
                if len(matchingInstances) > 0:
                    self.instanceManager.remove(snapshot['instance-name'])

                # Create a new instance
                self.instanceManager.create(snapshot['instance-name'],
                                            snapshot['version'])

                # Remove unused files / folders
                for file in snapshot['removed-elements']:
                    if os.is_dir(file):
                        os.rmdir(file)
                    else:
                        os.remove(file)

                # Unzip the backup that we had
                shutil.unpack_archive(
                    snapshot.getPath(),
                    self.instanceManager.getInstancePath(
                        snapshot['instance-name']))

            else:
                self.logger.error(
                    'An instance with the name [{}] already exists, aborting ...'
                    .format(snapshot['instance-name']))
        else:
            self.logger.error(
                'The snapshot name [{}] is invalid'.format(snapshotName))
Пример #5
0
 def save(self, path):
     if os.is_dir(path):
         pass
Пример #6
0
def make_plots(outputdir, dirs, loss_delta):
    colors = plot.get_separate_colors(len(dirs))
    dirs.sort()
    count_plot = plot.Plotter(colors)
    select_and_train_plot = plot.Plotter(colors)
    time_plot = plot.Plotter(colors)
    mae_plot = plot.Plotter(colors)
    ymax = float('-inf')
    for d in dirs:
        curdir = os.path.join(outputdir, d)
        data = np.array(get_data(curdir))
        # for the first document, read off first dimension (the labeled set
        # counts)
        counts = data[0,0,:]
        # set up a 2D matrix with each experiment on its own row and each
        # experiment's pR^2 results in columns
        ys_mat = data[:,-1,:]
        ys_medians, ys_means, ys_errs_minus, ys_errs_plus = get_stats(ys_mat)
        ys_errs_plus_max = max(ys_errs_plus + ys_means)
        if ys_errs_plus_max > ymax:
            ymax = ys_errs_plus_max
        # set up a 2D matrix with each experiment on its own row and each
        # experiment's time results in columns
        times_mat = data[:,1,:]
        times_medians, times_means, times_errs_minus, times_errs_plus = \
                get_stats(times_mat)
        count_plot.plot(
            counts,
            ys_means,
            d,
            ys_medians,
            [ys_errs_minus, ys_errs_plus])
        time_plot.plot(
            times_means,
            ys_means,
            d,
            ys_medians,
            [ys_errs_minus, ys_errs_plus],
            times_medians,
            [times_errs_minus, times_errs_plus])
        select_and_train_mat = data[:,2,:]
        sandt_medians, sandt_means, sandt_errs_minus, sandt_errs_plus = \
            get_stats(select_and_train_mat)
        select_and_train_plot.plot(
            counts,
            sandt_means,
            d,
            sandt_medians,
            [sandt_errs_minus, sandt_errs_plus])
        # get mae results
        losses = []
        for maedir in os.listdir(curdir):
            curmaedir = os.path.join(curdir, maedir)
            if os.is_dir(curmaedir):
                losses.append([])
                for i in range(len(counts)):
                    maedata = np.loadtxt(os.path.join(curmaedir, str(i)))
                    # generalized zero-one loss
                    losses[-1].append(np.sum(maedata < loss_delta) / len(maedata))
        losses = np.array(losses)
        mae_medians, mae_means, mae_errs_minus, mae_errs_plus = \
            get_stats(losses)
        mae_plot.plot(
            counts,
            mae_means,
            d,
            mae_medians,
            [mae_errs_minus, mae_errs_plus])
    corpus = os.path.basename(outputdir)
    count_plot.set_xlabel('Number of Labeled Documents')
    count_plot.set_ylabel('pR$^2$')
    count_plot.set_ylim([-0.05, ymax])
    count_plot.savefig(os.path.join(outputdir, corpus+'.counts.pdf'))
    time_plot.set_xlabel('Time elapsed (seconds)')
    time_plot.set_ylabel('pR$^2$')
    time_plot.set_ylim([-0.05, ymax])
    time_plot.savefig(os.path.join(outputdir,
        corpus+'.times.pdf'))
    select_and_train_plot.set_xlabel('Number of Labeled Documents')
    select_and_train_plot.set_ylabel('Time to select and train')
    select_and_train_plot.savefig(os.path.join(outputdir,
        corpus+'.select_and_train.pdf'))
Пример #7
0
def create_pem_bundle(destination,
                      urls=None,
                      resource_dir=None,
                      set_env_var=True):
    """create a PEM formatted certificate bundle from the specified resources

    Args:
        destination(str, required):
            pathname for created pem bundle file
        urls(iterable, optional, default=None):
            passed to `download_resources` if specified, else the existing contents of `resource_dir` are processed;
            `urls` and/or `resource_dir` must be specified
        resource_dir(str, optional, default=None):
            location of resources to process; passed to `download_resources` along with `urls` if both specified, else
            a temporary location is utilized
        set_env_var(bool, optional, default=True):
            determines whether the `DOD_CA_CERTS_PEM_PATH` environmental variable is set with the value of created pem
            bundle pathname

    Returns:
        pathname of created pem bundle file
    """
    if resource_dir is not None:
        assert os.exists(resource_dir)
        assert os.is_dir(resource_dir)
    else:
        assert urls is not None  # `urls` or `resource_dir` must be specified

    if urls is not None:
        resource_dir = download_resources(urls, resource_dir)

    # create empty bytes stream
    pem_bundle = "# Bundle Created: {} \n".format(datetime.now()).encode()
    # get file list
    files = sorted(os.listdir(resource_dir))
    # process CAs first then Roots
    for type in ['ca', 'root']:
        for file in files:
            if any([file.endswith(ext) for ext in cert_exts]):
                fpath = os.path.join(resource_dir, file)
                if file.lower().find(type) > -1 and os.path.isfile(fpath):
                    with open(fpath, 'rb') as f:
                        contents = f.read()
                        try:
                            cert = load_der_x509_certificate(
                                contents, backend=default_backend())
                        except ValueError:
                            try:
                                cert = load_pem_x509_certificate(
                                    contents, backend=default_backend())
                            except ValueError:
                                log.warning(
                                    'Unable to load public key from: {}'.
                                    format(file))
                                continue
                        # add cert's info and public key in PEM format to the bytes stream
                        pem_bundle += describe_cert(cert).encode()
                        pem_bundle += cert.public_bytes(Encoding.PEM)
    destination = os.path.abspath(destination)
    with open(destination, 'wb') as f:
        f.write(pem_bundle)

    if set_env_var:
        os.environ['DOD_CA_CERTS_PEM_PATH'] = destination
        log.info('Set DOD_CA_CERTS_PEM_PATH environment variable')

    return destination
Пример #8
0
batch_size = 8
_WINDOW_LEN = 16384

import os
import sys

from scipy.io.wavfile import write as wavwrite
import tensorflow as tf

tfrecord_fp, out_dir = sys.argv[1:]

if not os.is_dir(out_dir):
    os.makedirs(out_dir)


def _mapper(example_proto):
    features = {
        'id': tf.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'label': tf.FixedLenSequenceFeature([], tf.string, allow_missing=True),
        'slice': tf.FixedLenFeature([], tf.int64),
        'samples': tf.FixedLenSequenceFeature([1],
                                              tf.float32,
                                              allow_missing=True)
    }
    example = tf.parse_single_example(example_proto, features)

    wav_id = tf.reduce_join(example['id'], 0)
    wav_label = tf.reduce_join(example['label'], 0)
    wav_slice = example['slice']
    wav = example['samples']
    wav_len = tf.shape(wav)[0]
Пример #9
0
def process_xyz_files(data,
                      process_file_fn,
                      file_ext=None,
                      file_idx_list=None,
                      stack=True):
    """
    Take a set of datafiles and apply a predefined data processing script to each
    one. Data can be stored in a directory, tarfile, or zipfile. An optional
    file extension can be added.

    Parameters
    ----------
    data : str
        Complete path to datafiles. Files must be in a directory, tarball, or zip archive.
    process_file_fn : callable
        Function to process files. Can be defined externally.
        Must input a file, and output a dictionary of properties, each of which
        is a torch.tensor. Dictionary must contain at least three properties:
        {'num_elements', 'charges', 'positions'}
    file_ext : str, optional
        Optionally add a file extension if multiple types of files exist.
    file_idx_list : ?????, optional
        Optionally add a file filter to check a file index is in a
        predefined list, for example, when constructing a train/valid/test split.
    stack : bool, optional
        ?????
    """
    logging.info('Processing data file: {}'.format(data))
    if tarfile.is_tarfile(data):
        tardata = tarfile.open(data, 'r')
        files = tardata.getmembers()

        readfile = lambda data_pt: tardata.extractfile(data_pt)

    elif os.is_dir(data):
        files = os.listdir(data)
        files = [os.path.join(data, file) for file in files]

        readfile = lambda data_pt: open(data_pt, 'r')

    else:
        raise ValueError('Can only read from directory or tarball archive!')

    # Use only files that end with specified extension.
    if file_ext is not None:
        files = [file for file in files if file.endswith(file_ext)]

    # Use only files that match desired filter.
    if file_idx_list is not None:
        files = [
            file for idx, file in enumerate(files) if idx in file_idx_list
        ]

    # Now loop over files using readfile function defined above
    # Process each file accordingly using process_file_fn

    molecules = []

    for file in files:
        with readfile(file) as openfile:
            molecules.append(process_file_fn(openfile))

    # Check that all molecules have the same set of items in their dictionary:
    props = molecules[0].keys()
    assert all(
        props == mol.keys() for mol in
        molecules), 'All molecules must have same set of properties/keys!'

    # Convert list-of-dicts to dict-of-lists
    molecules = {prop: [mol[prop] for mol in molecules] for prop in props}

    # If stacking is desireable, pad and then stack.
    if stack:
        molecules = {
            key: pad_sequence(val, batch_first=True)
            if val[0].dim() > 0 else torch.stack(val)
            for key, val in molecules.items()
        }

    return molecules