def test_run_jobs(self): """ Checks whether run_jobs really returns only 1 network. """ inputs = { 'biom_file': None, 'cluster': None, 'otu_meta': None, 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': [(testloc[:-17] + 'otu_tax.txt')], 'fp': testloc + '/data', 'otu_table': [(testloc[:-17] + 'otu_otus.txt')], 'tools': ['conet'], 'conet_bash': None, 'spiec': None, 'conet': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\CoNet3'), 'spar_pval': None, 'spar_boot': None, 'levels': ['family'], 'prev': ['20'], 'name': ['test'], 'cores': None, 'min': ['10'], 'spar': None } batch = Batch(testbiom, inputs) netbatch = Nets(batch) jobs = get_joblist(netbatch) netbatch.collapse_tax() netbatch.write_bioms() orig_ids, obs_ids = netbatch._prepare_conet() filenames = netbatch.get_filenames() network = run_jobs(spar=inputs['spar'], conet=inputs['conet'], orig_ids=orig_ids, obs_ids=obs_ids, job=jobs[0], filenames=filenames) x = inputs['name'][0] filename = netbatch.inputs['fp'] + '/' + x + '_species.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_genus.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_order.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_class.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_phylum.hdf5' call("rm " + filename) call(("rm " + inputs['fp'] + '/' + inputs['tools'][0] + '_' + inputs['name'][0] + '_' + inputs['levels'][0] + '.hdf5')) self.assertEqual(len(network), 1)
def test_normalize_transform(self): """Is the transformed batch file different from the original one?""" inputs = { 'biom_file': None, 'cluster': ['Affinity'], 'nclust': ['4'], 'otu_meta': None, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': 'Rocket Science', 'tax_table': ['tax_bananas.txt'], 'name': ['test'], 'fp': os.path.dirname(massoc.__file__)[:-7].replace('\\', '/') } batch = Batch(testbiom, inputs) clrbatch = batch.normalize_transform(mode="clr") self.assertFalse(batch.otu['test'] == clrbatch.otu['test'])
def test_split_biom(self): """Does 'split_biom' correctly split a biom file according to sample data properties?""" inputs = { 'biom_file': None, 'cluster': None, 'otu_meta': None, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': 'BODY_SITE', 'tax_table': ['tax_bananas.txt'], 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) batch.split_biom() self.assertEqual(len(batch.otu), 3)
def test_collapse_tax(self): """Does the function for collapsing by taxonomy correctly add additional dictionaries?""" inputs = { 'biom_file': None, 'cluster': None, 'otu_meta': None, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': ['BODY_SITE'], 'tax_table': ['tax_bananas.txt'], 'levels': ['otu', 'genus'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests'), 'name': ['test'] } batch = Batch(testbiom, inputs) batch.collapse_tax() self.assertEqual(len(batch.genus), 1)
def test_run_parallel(self): """Checks if the run_parallel function works without raising an error.""" inputs = { 'biom_file': None, 'cluster': None, 'otu_meta': None, 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': [(testloc[:-17] + 'otu_tax.txt')], 'fp': testloc, 'otu_table': [(testloc[:-17] + 'otu_otus.txt')], 'tools': ['conet'], 'spiec': None, 'conet': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\CoNet3'), # cannot be used in general testing 'conet_bash': None, 'spar_pval': None, 'spar_boot': None, 'levels': ['family'], 'prev': 20, 'min': 10, 'name': ['test'], 'cores': None, 'rar': None, 'spar': None } batch = Batch(testbiom, inputs) batch.collapse_tax() batch.inputs['procbioms'] = dict() batch.inputs['procbioms']['family'] = dict() batch.inputs['procbioms']['family'][ 'test'] = 'C://Users//u0118219//Documents//massoc//test_family.hdf5' netbatch = Nets(batch) netbatch = run_parallel(netbatch) filename = netbatch.inputs['fp'] + '/' + x + 'otu.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/conet_family_test.txt' call("rm " + filename) self.assertEqual(len(netbatch.networks), 1)
def test_prev_filter(self): """Does the prevalence filter correctly reduce the number of taxa in a table?""" inputs = { 'biom_file': None, 'cluster': 'Affinity', 'nclust': ['4'], 'otu_meta': None, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': 'Rocket Science', 'tax_table': ['tax_bananas.txt'], 'prev': 40, 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) batch.prev_filter() self.assertEqual(batch.otu['test'].shape[0], 4)
def test_cluster_bioms_spectral(self): """Does 'cluster_bioms.py' correctly cluster a biom file and split the file into multiple subsets of the data?""" inputs = { 'biom_file': None, 'cluster': 'Spectral', 'otu_meta': None, 'nclust': 4, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': 'TRUE', 'tax_table': ['tax_bananas.txt'], 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) np.random.seed(8888) batch.cluster_biom() self.assertEqual(len(batch.otu), 4)
def test_norm_machine(self): """While data is normalized in the machine learning function, it should NOT be returned as normalized count data.""" inputs = { 'biom_file': None, 'cluster': 'K-means', 'otu_meta': None, 'nclust': 4, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': ['tax_bananas.txt'], 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) batch.cluster_biom() self.assertEqual(batch.otu['test']._data[1, 1], testbiom['otu']['test']._data[1, 1])
def test_prev_filter_qual(self): """Does the prevalence filter remove the correct taxon?""" inputs = { 'biom_file': None, 'cluster': 'Affinity', 'nclust': 4, 'otu_meta': None, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': 'Rocket Science', 'tax_table': ['tax_bananas.txt'], 'prev': 40, 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) rawtable = batch.otu['test'].matrix_data batch.prev_filter(mode='prev') newtable = batch.otu['test'].matrix_data self.assertEqual((rawtable.sum(axis=1)[0] + rawtable.sum(axis=1)[4]), newtable.sum(axis=1)[3])
def test_get_joblist(self): """ Checks whether the joblist function returns a joblist in the appropriate format: list of dicts with each only 1 key. """ inputs = { 'biom_file': None, 'cluster': None, 'otu_meta': None, 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': [(testloc + 'otu_tax.txt')], 'fp': testloc, 'otu_table': [(testloc + 'otu_otus.txt')], 'tools': ['spiec-easi', 'conet'], 'spiec': ['somefile.txt'], 'conet': None, 'spar_pval': None, 'spar_boot': None, 'levels': ['family', 'class'], 'prev': ['20'], 'name': ['test'], 'cores': None } batch = Batch(testbiom, inputs) netbatch = Nets(batch) jobs = get_joblist(netbatch) filename = netbatch.inputs['fp'] + '/' + x + 'otu.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_family.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/' + x + '_class.hdf5' call("rm " + filename) filename = netbatch.inputs['fp'] + '/spiec-easi_family_test.txt' call("rm " + filename) self.assertEqual(len(jobs), 6)
def run_network(inputs, publish=False): """ Pipes functions from the different massoc modules to run complete network inference. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ _create_logger(inputs['fp']) old_inputs = read_settings(inputs['fp'] + '/settings.json') old_inputs.update(inputs) inputs = old_inputs # handler to file filestore = read_bioms(inputs['procbioms']) bioms = Batch(filestore, inputs) bioms = Nets(bioms) if inputs['tools'] is not None: logger.info('Tools to run with default settings: ' + str(inputs['tools']) + ' ') bioms.inputs['network'] = list() network_names = list() for tool in bioms.inputs['tools']: for level in bioms.inputs['levels']: for name in bioms.inputs['name']: filename = bioms.inputs['fp'] + '/' + tool + '_' + level + '_' + name + '.txt' network_names.append(filename) bioms.inputs['network'] = network_names if publish: pub.sendMessage('update', msg='Starting network inference. This may take some time!') try: logger.info('Running network inference... ') networks = run_parallel(bioms) networks.write_networks() except Exception: logger.warning('Failed to complete network inference. ', exc_info=True) write_settings(networks.inputs) if publish: pub.sendMessage('update', msg="Finished running network inference!") logger.info('Finished running network inference. ')
def test_rarefy(self): """The rarefaction function should remove samples below a certain read count and then perform rarefaction. """ inputs = { 'biom_file': None, 'cluster': 'K-means', 'otu_meta': None, 'nclust': 4, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': ['tax_bananas.txt'], 'rar': 'True', 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) rawsums = batch.otu['test'].sum(axis='sample') batch.rarefy() newsums = batch.otu['test'].sum(axis='sample') self.assertGreater(np.mean(rawsums), np.mean(newsums))
def test_rarefy_qual(self): """The rarefaction function should remove samples below a certain read count and then perform rarefaction. Are the lowest values of the table equal to the specified rarefication number? """ inputs = { 'biom_file': None, 'cluster': 'K-means', 'otu_meta': None, 'nclust': 4, 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': ['tax_bananas.txt'], 'rar': 3, 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) batch.rarefy() newsums = batch.otu['test'].sum(axis='sample') self.assertEqual(np.mean(newsums), 3)
def test_min(self): """The prevalence function should correctly filter taxa with mean abundances below the specified threshold.""" inputs = { 'biom_file': None, 'cluster': ['K-means'], 'otu_meta': None, 'nclust': ['4'], 'otu_table': ['otu_bananas.txt'], 'prefix': None, 'sample_data': None, 'split': None, 'tax_table': ['tax_bananas.txt'], 'rar': ['True'], 'min': 3, 'prev': None, 'name': ['test'], 'fp': (os.path.dirname(massoc.__file__)[:-6] + 'tests') } batch = Batch(deepcopy(testbiom), inputs) rawsums = batch.otu['test'].sum(axis='observation') batch.prev_filter(mode='min') newsums = batch.otu['test'].sum(axis='observation') self.assertEqual((rawsums[0] + rawsums[4]), newsums[3])
'tax_table': None, 'fp': testloc, 'name': ['test'], 'otu_table': None, 'tools': ['spiec-easi'], 'spiec': None, 'conet': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\CoNet3'), 'spar': (os.path.dirname(massoc.__file__)[:-6] + 'tests\\SparCC'), 'spar_pval': None, 'spar_boot': None, 'levels': ['otu', 'order'], 'prev': ['20'], 'cores': ['4'], 'neo4j': [(os.path.dirname(massoc.__file__)[:-6] + 'tests\\neo4j')] } netbatch = Nets(Batch(testbiom, inputs)) filenames = list() for x in inputs['name']: filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_otu.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_species.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_genus.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_family.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_order.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_class.hdf5') filenames.append(netbatch.inputs['fp'][0] + '/' + x + '_phylum.hdf5') class TestNetWrap(unittest.TestCase): """Tests netwrap. More specifically, checks ability to call network inference tools.
'otu_table': None, 'tools': ['spiec-easi'], 'spiec': None, 'conet': None, 'spar': None, 'spar_pval': None, 'spar_boot': None, 'levels': ['otu', 'order'], 'prev': ['20'], 'cores': ['4'], 'neo4j': os.path.dirname(massoc.__file__)[:-6] + 'tests\\neo4j', 'address': 'bolt://localhost:7687', 'username': '******', 'password': '******' } networks = Nets(Batch(deepcopy(testbiom), inputs)) g = nx.Graph() nodes = ["GG_OTU_1", "GG_OTU_2", "GG_OTU_3", "GG_OTU_4", "GG_OTU_5"] g.add_nodes_from(nodes) g.add_edges_from([("GG_OTU_1", "GG_OTU_2"), ("GG_OTU_2", "GG_OTU_5"), ("GG_OTU_3", "GG_OTU_4")]) g["GG_OTU_1"]["GG_OTU_2"]['weight'] = 1.0 g["GG_OTU_2"]["GG_OTU_5"]['weight'] = 1.0 g["GG_OTU_3"]["GG_OTU_4"]['weight'] = -1.0 networks.networks['test_g'] = g f = nx.Graph() f.add_nodes_from(nodes) f.add_edges_from([("GG_OTU_1", "GG_OTU_2"), ("GG_OTU_2", "GG_OTU_3"), ("GG_OTU_3", "GG_OTU_4")]) f["GG_OTU_1"]["GG_OTU_2"]['weight'] = 1.0
def generate_cluster_figures(self): """Generates figures for diagnostics canvas.""" from massoc.scripts.batch import Batch from sklearn.cluster import KMeans, DBSCAN, SpectralClustering, AffinityPropagation from sklearn.mixture import GaussianMixture from sklearn.metrics import silhouette_score from sklearn.decomposition import PCA nums = list(range(2, 5)) try: file = self.file_list.GetSelection() file = self.file_list.GetString(file) x = 'init' biomfile = {x: biom.load_table(file)} algo = self.cluster_choice.GetSelection() algo = self.cluster_choice.GetString(algo) inputs = {'biom_file': [file], 'cluster': [algo]} normbatch = Batch(biomfile, inputs) normbatch = normbatch.normalize_transform(mode='clr') norm_table = normbatch.otu[x] topscore = 0 bestcluster = [1] * len(norm_table.ids()) data = csr_matrix.todense(norm_table.matrix_data) data = np.matrix.transpose(data) data = PCA(n_components=2).fit_transform(data) randomclust = np.random.randint(2, size=len(data)) sh_score = [silhouette_score(data, randomclust)] # K-means clustering, tests 2-4 clusters if inputs['cluster'][0] == 'K-means': for i in nums: clusters = KMeans(i).fit_predict(data) silhouette_avg = silhouette_score(data, clusters) sh_score.append(silhouette_avg) topscore = int(np.argmax(sh_score) + 1) bestcluster = KMeans(topscore).fit_predict(data) # DBSCAN clustering, automatically finds optimal cluster size if inputs['cluster'][0] == 'DBSCAN': bestcluster = DBSCAN().fit_predict(data) topscore = len(set(bestcluster)) - (1 if -1 in bestcluster else 0) # Gaussian Mixture Model (gmm) probability distribution if inputs['cluster'][0] == 'Gaussian': for i in nums: fit = GaussianMixture(i).fit(data) clusters = fit.predict(data) silhouette_avg = silhouette_score(data, clusters) sh_score.append(silhouette_avg) topscore = int(np.argmax(sh_score) + 1) bestfit = GaussianMixture(topscore).fit(data) bestcluster = bestfit.predict(data) # Spectral Clustering if inputs['cluster'][0] == 'Spectral': for i in nums: clusters = SpectralClustering(i).fit_predict(data) silhouette_avg = silhouette_score(data, clusters) sh_score.append(silhouette_avg) topscore = int(np.argmax(sh_score) + 1) bestcluster = SpectralClustering(topscore).fit_predict(data) # Affinity Propagation clustering if inputs['cluster'] == 'Affinity': bestcluster = AffinityPropagation().fit_predict(data) topscore = len(set(bestcluster)) - (1 if -1 in bestcluster else 0) if max(sh_score) < 0.25: raise ValueError("Silhouette score too low: please try a different algorithm. " "Your data may not be suitable for clustering.") for i in range(topscore): mask, = np.where(bestcluster == i) for j in mask: norm_table._sample_metadata[j]['cluster'] = inputs['cluster'][0] + '_' + str(i) x, y = zip(*data) self.prev.scatter(x, y, bestcluster) self.canvas1.draw() except Exception: logger.error("Failed to generate figures. ", exc_info=True)
def get_input(inputs, publish=False): """ Takes all input and returns a dictionary of biom files. If tab-delimited files are supplied, these are combined into a biom file. File names are used as keys. This is mostly a utility wrapper, as all biom-related functions are from biom-format.org. At the moment, rarefaction is performed after sample splitting. This means that samples with uneven sequence counts will not be rarefied to equal depths. All files are written to BIOM files, while a settings file is also written to disk for use by other massoc commands. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ # handler to file # construct logger after filepath is provided _create_logger(inputs['fp']) if inputs['biom_file'] is not None: logger.info('BIOM file(s) to process: ' + ", ".join(inputs['biom_file'])) if inputs['otu_table'] is not None: logger.info('Tab-delimited OTU table(s) to process: ' + ", ".join(inputs['otu_table'])) if inputs['tax_table'] is not None: if len(inputs['otu_table']) is not len(inputs['tax_table']): logger.error("Add a taxonomy table for every OTU table!", exc_info=True) raise ValueError("Add a taxonomy table for every OTU table!") if inputs['sample_data'] is not None: if len(inputs['otu_table']) is not len(inputs['sample_data']): logger.error("Add a sample data table for every OTU table!", exc_info=True) raise ValueError("Add a sample data table for every OTU table!") if inputs['otu_meta'] is not None: if len(inputs['otu_table']) is not len(inputs['otu_meta']): logger.error("Add a metadata table for every OTU table!", exc_info=True) raise ValueError("Add a metadata table for every OTU table!") filestore = {} if inputs['biom_file'] is None and inputs['network'] is None: if inputs['otu_table'] is None and inputs['network'] is None: logger.error("Please supply either a biom file" ", a tab-delimited OTU table or a network!", exc_info=True) raise ValueError("Please supply either a biom file" ", a tab-delimited OTU table or a network!") # Only process count files if present i = 0 if inputs['name'] is None: inputs['name'] = list() inputs['name'].append('file_') if inputs['biom_file'] is not None: try: for x in inputs['biom_file']: biomtab = load_table(x) filestore[inputs['name'][i]] = biomtab i += 1 except Exception: logger.error("Failed to import BIOM files.", exc_info=True) if inputs['otu_table'] is not None: try: j = 0 # j is used to match sample + tax data to OTU data for x in inputs['otu_table']: input_fp = x sample_metadata_fp = None observation_metadata_fp = None obs_data = None sample_data = None biomtab = load_table(input_fp) try: sample_metadata_fp = inputs['sample_data'][j] observation_metadata_fp = inputs['tax_table'][j] except TypeError or KeyError: pass if sample_metadata_fp is not None: sample_f = open(sample_metadata_fp, 'r') sample_data = MetadataMap.from_file(sample_f) sample_f.close() biomtab.add_metadata(sample_data, axis='sample') if observation_metadata_fp is not None: obs_f = open(observation_metadata_fp, 'r') obs_data = MetadataMap.from_file(obs_f) obs_f.close() # for taxonomy collapsing, # metadata variable needs to be a complete list # not separate entries for each tax level for b in list(obs_data): tax = list() for l in list(obs_data[b]): tax.append(obs_data[b][l]) obs_data[b].pop(l, None) obs_data[b]['taxonomy'] = tax biomtab.add_metadata(obs_data, axis='observation') filestore[inputs['name'][j]] = biomtab j += 1 except Exception: logger.warning("Failed to combine input files.", exc_info=True) bioms = Batch({'otu': filestore}, inputs) # it is possible that there are forbidden characters in the OTU identifiers # we can forbid people from using those, or replace those with an underscore if inputs['biom_file'] or inputs['otu_table']: for name in bioms.otu: biomfile = bioms.otu[name] taxon_ids = biomfile._observation_ids # need to be careful with these operations taxon_index = biomfile._obs_index # likely to corrupt BIOM file if done wrong new_ids = deepcopy(taxon_ids) new_indexes = deepcopy(taxon_index) for i in range(0, len(taxon_ids)): id = taxon_ids[i] new_id = id.replace(" ", "_") new_ids[i] = new_id new_indexes[new_id] = new_indexes.pop(id) biomfile._observation_ids = new_ids biomfile._obs_index = new_indexes bioms.otu[name] = biomfile logger.info('Collapsing taxonomy... ') bioms.collapse_tax() if inputs['cluster'] is not None: if publish: pub.sendMessage('update', msg='Clustering BIOM files...') logger.info('Clustering BIOM files... ') bioms.cluster_biom() if inputs['split'] is not None and inputs['split'] is not 'TRUE': bioms.split_biom() if inputs['min'] is not None: if publish: pub.sendMessage('update', msg='Setting minimum mean abundance...') logger.info('Removing taxa below minimum count... ') bioms.prev_filter(mode='min') if inputs['prev'] is not None: if publish: pub.sendMessage('update', msg='Setting prevalence filter...') logger.info('Setting prevalence filter... ') bioms.prev_filter(mode='prev') if inputs['rar'] is not None: if publish: pub.sendMessage('update', msg='Rarefying counts...') logger.info('Rarefying counts... ') bioms.rarefy() bioms.inputs['procbioms'] = dict() if inputs['biom_file'] or inputs['otu_table']: if 'otu' not in bioms.inputs['levels']: # add otu level always bioms.inputs['procbioms']['otu'] = dict() for name in bioms.inputs['name']: biomname = bioms.inputs['fp'] + '/' + name + '_' + 'otu' + '.hdf5' bioms.inputs['procbioms']['otu'][name] = biomname for level in bioms.inputs['levels']: bioms.inputs['procbioms'][level] = dict() for name in bioms.inputs['name']: biomname = bioms.inputs['fp'] + '/' + name + '_' + level + '.hdf5' bioms.inputs['procbioms'][level][name] = biomname all_bioms = {**bioms.otu, **bioms.genus, **bioms.family, **bioms.order, **bioms.class_, **bioms.phylum} for biomfile in all_bioms: if all_bioms[biomfile].shape[0] == 1: logger.error("The current preprocessing steps resulted in BIOM files with only 1 row.", exc_info=True) if inputs['network'] is not None: if publish: pub.sendMessage('update', msg='Checking previously generated networks...') logger.info('Checking previously generated networks...') filelist = deepcopy(inputs['network']) for file in filelist: network = _read_network(file) nodes = len(network.nodes) edges = len(network.edges) logger.info("This network has " + str(nodes) + \ " nodes and " + str(edges) + " edges.") weight = nx.get_edge_attributes(network, 'weight') if len(weight) > 0: logger.info('This is a weighted network.') else: logger.info('This is an unweighted network.') try: if inputs['biom_file'] or inputs['otu_table']: bioms.write_bioms() logger.info('BIOM files written to disk. ') except Exception: logger.warning('Failed to write BIOM files to disk. ', exc_info=True) write_settings(bioms.inputs) logger.info('Settings file written to disk. ')
def run_neo4j(inputs, publish=False): """ Starts and carries out operations on the Neo4j database. :param inputs: Dictionary of inputs. :param publish: If True, publishes messages to be received by GUI. :return: """ _create_logger(inputs['fp']) # overwritten settings should be retained old_inputs = read_settings(inputs['fp'] + '/settings.json') # handler to file # check if password etc is already there if 'username' in old_inputs: logins = dict((k, old_inputs[k]) for k in ('username', 'password', 'address', 'neo4j')) old_inputs.update(inputs) inputs = old_inputs if 'pid' in inputs: existing_pid = pid_exists(inputs['pid']) else: existing_pid = False if not inputs['neo4j']: inputs.update(logins) checks = str() if inputs['job'] == 'start': if not existing_pid: start_database(inputs, publish) existing_pid = True else: logger.info("Database is already running. ") elif inputs['job'] == 'quit': if not existing_pid: logger.info("No database open. ") else: try: if publish: pub.sendMessage('update', msg='Getting PID...') # there is a lingering Java process that places a lock on the database. # terminating the subprocess does NOT terminate the Java process, # so the store lock has to be deleted manually. # This is different for Linux & Windows machines and may not be trivial # however, PID solution may be platform-independent # CURRENT SOLUTION: # get parent PID of subprocess # use psutil to get child PIDs # kill child PIDs too parent_pid = inputs['pid'] parent = Process(parent_pid) children = parent.children(recursive=True) for child in children: child.kill() # apparently killing the children also kills the parent except Exception: logger.warning("Failed to close database. ", exc_info=True) elif inputs['job'] == 'clear': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Clearing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.clear_database() importdriver.close() except Exception: logger.warning("Failed to clear database. ", exc_info=True) elif inputs['job'] == 'write': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Accessing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.export_network(path=inputs['fp']) importdriver.close() except Exception: logger.warning("Failed to write database to graphml file. ", exc_info=True) elif inputs['job'] == 'cyto': if not existing_pid: start_database(inputs, publish) existing_pid = True try: if publish: pub.sendMessage('update', msg='Accessing database...') importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) importdriver.export_cyto() importdriver.close() except Exception: logger.warning("Failed to export networks to Cytoscape. ", exc_info=True) else: if not existing_pid: start_database(inputs, publish) existing_pid = True if publish: pub.sendMessage('update', msg='Uploading files to database...') filestore = None if inputs['procbioms']: filestore = read_bioms(inputs['procbioms']) # ask users for additional input bioms = Batch(filestore, inputs) bioms = Nets(bioms) for file in inputs['network']: network = _read_network(file) bioms.add_networks(network, file) importdriver = None sleep(12) importdriver = ImportDriver(user=inputs['username'], password=inputs['password'], uri=inputs['address'], filepath=inputs['fp']) # importdriver.clear_database() try: # pub.sendMessage('update', msg='Uploading BIOM files...') logger.info("Uploading BIOM files...") itemlist = list() for level in inputs['procbioms']: for item in inputs['procbioms'][level]: name = inputs['procbioms'][level][item] biomfile = load_table(name) importdriver.convert_biom(biomfile=biomfile, exp_id=name) itemlist.append(name) checks += 'Successfully uploaded the following items and networks to the database: \n' for item in itemlist: checks += (item + '\n') checks += '\n' logger.info(checks) except Exception: logger.warning("Failed to upload BIOM files to Neo4j database. ", exc_info=True) try: # pub.sendMessage('update', msg='Uploading network files...') logger.info('Uploading network files... ') for item in bioms.networks: network = bioms.networks[item] # try to split filename to make a nicer network id subnames = item.split('/') if len(subnames) == 1: subnames = item.split('\\') name = subnames[-1].split('.')[0] importdriver.convert_networkx(network=network, network_id=name, mode='weight') itemlist.append(item) except Exception: logger.warning('Unable to upload network files to Neo4j database. ', exc_info=True) checks += 'Unable to upload network files to Neo4j database.\n' if publish: pub.sendMessage('database_log', msg=checks) importdriver.close() logger.info('Completed database operations! ') write_settings(inputs)