if (sample.id == options.target_sample): target_samples.append(sample) elif options.target_samples_filename: target_sample_ids = [x.strip() for x in open(options.target_samples_filename).readlines()] for target_sample_id in target_sample_ids: for sample in samples: if (sample.id == target_sample_id): target_samples.append(sample) else: print "You must specify a target sample" sys.exit(1) if len(target_samples) ==0: print "Could not find samples!" sys.exit() samples_time = pt.stop() print "Loaded samples (%0.2fs)"%(samples_time) pt.start() rules = load_rules(options.model_filename) rules = rules.remap_feature_to_index(samples) training_time = pt.stop() newrules = [] for rule in rules: keep_rule = False for target_sample in target_samples: if target_sample.satisfies(rule.ls): keep_rule = True if keep_rule:
metavar="FILE") parser.add_option("-t", "--target_class", help="Target class.", metavar="CLASS") (options, args) = parser.parse_args() pt.start() fileio = FileIO() samples = fileio.load_samples(options.samples_filename) classes = fileio.load_classes(options.classes_filename) samples.load_class_labels(classes) samples.set_current_class(options.target_class) target_samples = [] samples_time = pt.stop() print "Loaded samples (%0.2fs)" % (samples_time) pt.start() rules = load_rules(options.model_filename) indexed_rules = rules.remap_feature_to_index(samples) training_time = pt.stop() newsamples = {} for sample in samples: keep_sample = False for rule in indexed_rules: if sample.satisfies(rule.ls): if not newsamples.has_key(sample.id): newsamples[sample.id] = []
error("Please provide a genotype sample file with -s /path/to/genotype.file") errorCount += 1 if not options.input_classes_filename: error("Please provide a phenotype class file with -c /path/to/phenotype.file") errorCount += 1 if not options.target_class: error("Please provide the phenotype target to be predicted with -t \"TRAITNAME\"") errorCount += 1 if errorCount > 0: error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0])) exit(1) pt.start() fileio = FileIO() samples = fileio.load_samples(options.input_samples_filename) samples_time = pt.stop() print "Loaded samples (%0.2fs)"%(samples_time) if options.feature_select: print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score) pt.start() from pica.AssociationRule import load_rules,AssociationRuleSet selected_rules = AssociationRuleSet() rules = load_rules(options.feature_select) rules.set_target_accuracy(options.feature_select_score) selected_rules.extend(rules[:options.feature_select_top_n]) samples = samples.feature_select(selected_rules) print "Finished feature selection (%0.2fs)"%(pt.stop()) classes = fileio.load_classes(options.input_classes_filename) samples.load_class_labels(classes) print samples.get_number_of_features() samples.set_current_class(options.target_class)
) errorCount += 1 if not options.target_class: error( "Please provide the phenotype target to be predicted with -t \"TRAITNAME\"" ) errorCount += 1 if errorCount > 0: error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0])) exit(1) pt.start() fileio = FileIO() samples = fileio.load_samples(options.input_samples_filename) samples_time = pt.stop() print "Loaded samples (%0.2fs)" % (samples_time) if options.feature_select: print "Selecting top %d features from %s, ordered by %s" % ( options.feature_select_top_n, options.feature_select, options.feature_select_score) pt.start() from pica.AssociationRule import load_rules, AssociationRuleSet selected_rules = AssociationRuleSet() rules = load_rules(options.feature_select) rules.set_target_accuracy(options.feature_select_score) selected_rules.extend(rules[:options.feature_select_top_n]) samples = samples.feature_select(selected_rules) print "Finished feature selection (%0.2fs)" % (pt.stop()) classes = fileio.load_classes(options.input_classes_filename) samples.load_class_labels(classes)