def make_pass(stressor): #print "Parsing data for '%s'...\n" % stressor experiments = [ DataSet.load(input % stressor) for input in inputs ] reference = experiments[0] for data in experiments: # Find the ratio between the amount of red and green fluorescence that was # detected. This ratio is assumed to be one for most data analysis # purposes, so the raw data needs to be corrected. green, red = 0, 0 for feature in data: red += feature.signal.red.intensity green += feature.signal.green.intensity data.intensity_ratio = red / green data.log_ratio = math.log(red / green, 2) def correction(feature): feature.log_ratio -= data.log_ratio return feature data.apply(correction) for data in experiments: for feature, zero in zip(data, reference): feature.normed_ratio = feature.log_ratio - zero.log_ratio for data in experiments: def irrational(feature): return math.isnan(feature.normed_ratio) def noisy(feature): return (feature.signal.red.signal_to_noise < 1) or \ (feature.signal.green.signal_to_noise < 1) def unnamed(feature): return feature.name in ('None', 'EMPTY') # This filter was proposed by team JKRW. def inconsistent(feature): return feature.regression_quality < 0.5 data.prune(irrational) data.prune(noisy) data.prune(unnamed) data.prune(inconsistent) for data, output in zip(experiments, outputs): print "Saving %d features for '%s'." % (len(data), stressor) data.save(output % stressor) print
def find_interesting_genes(inputs, threshold): print " Restoring pickled data (%d)..." % len(inputs) experiment = [ DataSet.restore(input) for input in inputs ] uninteresting = lambda feature: abs(feature.normed_ratio) < threshold print " Pruning uninteresting data (%d)..." % len(inputs) for timepoint in experiment: timepoint.prune(uninteresting) print " Flattening all timepoints (%d)...\n" % len(inputs) target, others = experiment[0], experiment[1:] target.union(*others) return target
#!/usr/bin/env python # vim: tw=0 import sys from microarray import DataSet try: stress, ours, theirs = sys.argv[1:] except ValueError: print "Usage: display.py <stress> <ours> <theirs>" sys.exit() input = 'pickles/{}/ours={},theirs={}.pkl'.format(stress, ours, theirs) output = 'output/{}/ours={},theirs={}.txt'.format(stress, ours, theirs) # Display a number of useful parameters: #header = '{:<15}{:<15}{:<20}{:<20}{:<20}'.format("Gene_ID", "Gene_Name", "Expression_Level", "Signal_Quality", "Regression") #template = '{0.id:<15}{0.name:<15}{0.normed_ratio:<20}{0.signal.red.signal_to_noise:<20}{0.regression_quality}' # Produce only raw output (for use in database queries): header = "" template = '{0.name}' data = DataSet.restore(input) data.display(template, header, output) print "Formatting %d genes." % len(data)
#!/usr/bin/env python # vim: tw=0 from microarray import DataSet inputs = [ 'pickles/control.1/000.pkl', 'pickles/control.1/030.pkl', 'pickles/control.1/060.pkl', 'pickles/control.1/180.pkl' ] experiments = [ DataSet.restore(input) for input in inputs ] header = '{0.path}' template = '{0.id}\t{0.normed_ratio}' DataSet.tabulate(header, template, *experiments)
#!/usr/bin/env python from __future__ import division from microarray import DataSet inputs = [ 'data/A+D.000.gpr', 'data/A+D.030.gpr', 'data/A+D.060.gpr', 'data/A+D.180.gpr' ] for input in inputs: data = DataSet.load(input) print len(data)
#!/usr/bin/env python from __future__ import division import math from microarray import DataSet pickles = ["pickles/A+D.000.pkl", "pickles/A+D.030.pkl", "pickles/A+D.060.pkl", "pickles/A+D.180.pkl"] def log_ratio(feature): return feature.log_ratio def too_extreme(feature): return abs(feature.log_ratio) > 15 header = "{0.path} (R/G = {0.intensity_ratio})" feature = "{0.id:<15} {0.log_ratio}" timepoints = [DataSet.restore(path) for path in pickles] for timepoint in timepoints: timepoint.prune(too_extreme) timepoint.sort(log_ratio, reverse=True) timepoint.truncate(50) DataSet.tabulate(header, feature, *timepoints)