Exemplo n.º 1
0
 def test_run_jobs(self):
     """
     Checks whether run_jobs really returns only 1 network.
     """
     inputs = {
         'biom_file': None,
         'cluster': None,
         'otu_meta': None,
         'prefix': None,
         'sample_data': None,
         'split': None,
         'tax_table': [(testloc[:-17] + 'otu_tax.txt')],
         'fp': testloc + '/data',
         'otu_table': [(testloc[:-17] + 'otu_otus.txt')],
         'tools': ['conet'],
         'conet_bash': None,
         'spiec': None,
         'conet': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\CoNet3'),
         'spar_pval': None,
         'spar_boot': None,
         'levels': ['family'],
         'prev': ['20'],
         'name': ['test'],
         'cores': None,
         'min': ['10'],
         'spar': None
     }
     batch = Batch(testbiom, inputs)
     netbatch = Nets(batch)
     jobs = get_joblist(netbatch)
     netbatch.collapse_tax()
     netbatch.write_bioms()
     orig_ids, obs_ids = netbatch._prepare_conet()
     filenames = netbatch.get_filenames()
     network = run_jobs(spar=inputs['spar'],
                        conet=inputs['conet'],
                        orig_ids=orig_ids,
                        obs_ids=obs_ids,
                        job=jobs[0],
                        filenames=filenames)
     x = inputs['name'][0]
     filename = netbatch.inputs['fp'] + '/' + x + '_species.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_genus.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_order.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_class.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_phylum.hdf5'
     call("rm " + filename)
     call(("rm " + inputs['fp'] + '/' + inputs['tools'][0] + '_' +
           inputs['name'][0] + '_' + inputs['levels'][0] + '.hdf5'))
     self.assertEqual(len(network), 1)
Exemplo n.º 2
0
 def test_run_parallel(self):
     """Checks if the run_parallel function works without raising an error."""
     inputs = {
         'biom_file': None,
         'cluster': None,
         'otu_meta': None,
         'prefix': None,
         'sample_data': None,
         'split': None,
         'tax_table': [(testloc[:-17] + 'otu_tax.txt')],
         'fp': testloc,
         'otu_table': [(testloc[:-17] + 'otu_otus.txt')],
         'tools': ['conet'],
         'spiec': None,
         'conet': (os.path.dirname(massoc.__file__)[:-6] +
                   'tests\\CoNet3'),  # cannot be used in general testing
         'conet_bash': None,
         'spar_pval': None,
         'spar_boot': None,
         'levels': ['family'],
         'prev': 20,
         'min': 10,
         'name': ['test'],
         'cores': None,
         'rar': None,
         'spar': None
     }
     batch = Batch(testbiom, inputs)
     batch.collapse_tax()
     batch.inputs['procbioms'] = dict()
     batch.inputs['procbioms']['family'] = dict()
     batch.inputs['procbioms']['family'][
         'test'] = 'C://Users//u0118219//Documents//massoc//test_family.hdf5'
     netbatch = Nets(batch)
     netbatch = run_parallel(netbatch)
     filename = netbatch.inputs['fp'] + '/' + x + 'otu.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/conet_family_test.txt'
     call("rm " + filename)
     self.assertEqual(len(netbatch.networks), 1)
Exemplo n.º 3
0
 def test_get_joblist(self):
     """
     Checks whether the joblist function
     returns a joblist in the appropriate format:
     list of dicts with each only 1 key.
     """
     inputs = {
         'biom_file': None,
         'cluster': None,
         'otu_meta': None,
         'prefix': None,
         'sample_data': None,
         'split': None,
         'tax_table': [(testloc + 'otu_tax.txt')],
         'fp': testloc,
         'otu_table': [(testloc + 'otu_otus.txt')],
         'tools': ['spiec-easi', 'conet'],
         'spiec': ['somefile.txt'],
         'conet': None,
         'spar_pval': None,
         'spar_boot': None,
         'levels': ['family', 'class'],
         'prev': ['20'],
         'name': ['test'],
         'cores': None
     }
     batch = Batch(testbiom, inputs)
     netbatch = Nets(batch)
     jobs = get_joblist(netbatch)
     filename = netbatch.inputs['fp'] + '/' + x + 'otu.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/' + x + '_class.hdf5'
     call("rm " + filename)
     filename = netbatch.inputs['fp'] + '/spiec-easi_family_test.txt'
     call("rm " + filename)
     self.assertEqual(len(jobs), 6)
Exemplo n.º 4
0
def run_network(inputs, publish=False):
    """
    Pipes functions from the different massoc modules to run complete network inference.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    _create_logger(inputs['fp'])
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    old_inputs.update(inputs)
    inputs = old_inputs
    # handler to file
    filestore = read_bioms(inputs['procbioms'])
    bioms = Batch(filestore, inputs)
    bioms = Nets(bioms)
    if inputs['tools'] is not None:
        logger.info('Tools to run with default settings: ' + str(inputs['tools']) + ' ')
    bioms.inputs['network'] = list()
    network_names = list()
    for tool in bioms.inputs['tools']:
        for level in bioms.inputs['levels']:
            for name in bioms.inputs['name']:
                filename = bioms.inputs['fp'] + '/' + tool + '_' + level + '_' + name + '.txt'
                network_names.append(filename)
    bioms.inputs['network'] = network_names
    if publish:
        pub.sendMessage('update', msg='Starting network inference. This may take some time!')
    try:
        logger.info('Running network inference...  ')
        networks = run_parallel(bioms)
        networks.write_networks()
    except Exception:
        logger.warning('Failed to complete network inference.  ', exc_info=True)
    write_settings(networks.inputs)
    if publish:
        pub.sendMessage('update', msg="Finished running network inference!")
    logger.info('Finished running network inference.  ')
Exemplo n.º 5
0
    'tax_table': None,
    'fp': testloc,
    'name': ['test'],
    'otu_table': None,
    'tools': ['spiec-easi'],
    'spiec': None,
    'conet': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\CoNet3'),
    'spar': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\SparCC'),
    'spar_pval': None,
    'spar_boot': None,
    'levels': ['otu', 'order'],
    'prev': ['20'],
    'cores': ['4'],
    'neo4j': [(os.path.dirname(massoc.__file__)[:-6] + 'tests\\neo4j')]
}
netbatch = Nets(Batch(testbiom, inputs))

filenames = list()
for x in inputs['name']:
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_otu.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_species.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_genus.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_family.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_order.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_class.hdf5')
    filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_phylum.hdf5')


class TestNetWrap(unittest.TestCase):
    """Tests netwrap.
    More specifically, checks ability to call network inference tools.
Exemplo n.º 6
0
    'otu_table': None,
    'tools': ['spiec-easi'],
    'spiec': None,
    'conet': None,
    'spar': None,
    'spar_pval': None,
    'spar_boot': None,
    'levels': ['otu', 'order'],
    'prev': ['20'],
    'cores': ['4'],
    'neo4j': os.path.dirname(massoc.__file__)[:-6] + 'tests\\neo4j',
    'address': 'bolt://localhost:7687',
    'username': '******',
    'password': '******'
}
networks = Nets(Batch(deepcopy(testbiom), inputs))
g = nx.Graph()
nodes = ["GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5"]
g.add_nodes_from(nodes)
g.add_edges_from([("GG_OTU_1", "GG_OTU_2"), ("GG_OTU_2", "GG_OTU_5"),
                  ("GG_OTU_3", "GG_OTU_4")])
g["GG_OTU_1"]["GG_OTU_2"]['weight'] = 1.0
g["GG_OTU_2"]["GG_OTU_5"]['weight'] = 1.0
g["GG_OTU_3"]["GG_OTU_4"]['weight'] = -1.0
networks.networks['test_g'] = g

f = nx.Graph()
f.add_nodes_from(nodes)
f.add_edges_from([("GG_OTU_1", "GG_OTU_2"), ("GG_OTU_2", "GG_OTU_3"),
                  ("GG_OTU_3", "GG_OTU_4")])
f["GG_OTU_1"]["GG_OTU_2"]['weight'] = 1.0
Exemplo n.º 7
0
def run_neo4j(inputs, publish=False):
    """
    Starts and carries out operations on the Neo4j database.

    :param inputs: Dictionary of inputs.
    :param publish: If True, publishes messages to be received by GUI.
    :return:
    """
    _create_logger(inputs['fp'])
    # overwritten settings should be retained
    old_inputs = read_settings(inputs['fp'] + '/settings.json')
    # handler to file
    # check if password etc is already there
    if 'username' in old_inputs:
        logins = dict((k, old_inputs[k]) for k in ('username', 'password', 'address', 'neo4j'))
    old_inputs.update(inputs)
    inputs = old_inputs
    if 'pid' in inputs:
        existing_pid = pid_exists(inputs['pid'])
    else:
        existing_pid = False
    if not inputs['neo4j']:
        inputs.update(logins)
    checks = str()
    if inputs['job'] == 'start':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        else:
            logger.info("Database is already running.  ")
    elif inputs['job'] == 'quit':
        if not existing_pid:
            logger.info("No database open.  ")
        else:
            try:
                if publish:
                    pub.sendMessage('update', msg='Getting PID...')
                # there is a lingering Java process that places a lock on the database.
                # terminating the subprocess does NOT terminate the Java process,
                # so the store lock has to be deleted manually.
                # This is different for Linux & Windows machines and may not be trivial
                # however, PID solution may be platform-independent
                # CURRENT SOLUTION:
                # get parent PID of subprocess
                # use psutil to get child PIDs
                # kill child PIDs too
                parent_pid = inputs['pid']
                parent = Process(parent_pid)
                children = parent.children(recursive=True)
                for child in children:
                    child.kill()
                # apparently killing the children also kills the parent
            except Exception:
                logger.warning("Failed to close database.  ", exc_info=True)
    elif inputs['job'] == 'clear':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Clearing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.clear_database()
            importdriver.close()
        except Exception:
            logger.warning("Failed to clear database.  ", exc_info=True)
    elif inputs['job'] == 'write':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Accessing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.export_network(path=inputs['fp'])
            importdriver.close()
        except Exception:
            logger.warning("Failed to write database to graphml file.  ", exc_info=True)
    elif inputs['job'] == 'cyto':
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        try:
            if publish:
                pub.sendMessage('update', msg='Accessing database...')
            importdriver = ImportDriver(user=inputs['username'],
                                        password=inputs['password'],
                                        uri=inputs['address'], filepath=inputs['fp'])
            importdriver.export_cyto()
            importdriver.close()
        except Exception:
            logger.warning("Failed to export networks to Cytoscape.  ", exc_info=True)
    else:
        if not existing_pid:
            start_database(inputs, publish)
            existing_pid = True
        if publish:
            pub.sendMessage('update', msg='Uploading files to database...')
        filestore = None
        if inputs['procbioms']:
            filestore = read_bioms(inputs['procbioms'])
        # ask users for additional input
        bioms = Batch(filestore, inputs)
        bioms = Nets(bioms)
        for file in inputs['network']:
            network = _read_network(file)
            bioms.add_networks(network, file)
        importdriver = None
        sleep(12)
        importdriver = ImportDriver(user=inputs['username'],
                                    password=inputs['password'],
                                    uri=inputs['address'], filepath=inputs['fp'])
        # importdriver.clear_database()
        try:
            # pub.sendMessage('update', msg='Uploading BIOM files...')
            logger.info("Uploading BIOM files...")
            itemlist = list()
            for level in inputs['procbioms']:
                for item in inputs['procbioms'][level]:
                    name = inputs['procbioms'][level][item]
                    biomfile = load_table(name)
                    importdriver.convert_biom(biomfile=biomfile, exp_id=name)
                    itemlist.append(name)
            checks += 'Successfully uploaded the following items and networks to the database: \n'
            for item in itemlist:
                checks += (item + '\n')
            checks += '\n'
            logger.info(checks)
        except Exception:
            logger.warning("Failed to upload BIOM files to Neo4j database.  ", exc_info=True)
        try:
            # pub.sendMessage('update', msg='Uploading network files...')
            logger.info('Uploading network files...  ')
            for item in bioms.networks:
                network = bioms.networks[item]
                # try to split filename to make a nicer network id
                subnames = item.split('/')
                if len(subnames) == 1:
                    subnames = item.split('\\')
                name = subnames[-1].split('.')[0]
                importdriver.convert_networkx(network=network, network_id=name, mode='weight')
                itemlist.append(item)
        except Exception:
            logger.warning('Unable to upload network files to Neo4j database. ', exc_info=True)
            checks += 'Unable to upload network files to Neo4j database.\n'
        if publish:
            pub.sendMessage('database_log', msg=checks)
        importdriver.close()
    logger.info('Completed database operations!  ')
    write_settings(inputs)