Exemplo n.º 1
0
			if (sample.id == options.target_sample):
				target_samples.append(sample)
	elif options.target_samples_filename:
		target_sample_ids = [x.strip() for x in open(options.target_samples_filename).readlines()]
		for target_sample_id in target_sample_ids:
			for sample in samples:
				if (sample.id == target_sample_id):
					target_samples.append(sample)
	else:
		print "You must specify a target sample"
		sys.exit(1)
	
	if len(target_samples) ==0:
		print "Could not find samples!"
		sys.exit()
	samples_time = pt.stop()
	print "Loaded samples (%0.2fs)"%(samples_time)
	
	pt.start()
	
	rules = load_rules(options.model_filename)
	rules = rules.remap_feature_to_index(samples)
	training_time = pt.stop()
	newrules = []
	
	for rule in rules:
		keep_rule = False
		for target_sample in target_samples:
			if target_sample.satisfies(rule.ls):
				keep_rule = True
		if keep_rule:
Exemplo n.º 2
0
                      metavar="FILE")
    parser.add_option("-t",
                      "--target_class",
                      help="Target class.",
                      metavar="CLASS")

    (options, args) = parser.parse_args()

    pt.start()
    fileio = FileIO()
    samples = fileio.load_samples(options.samples_filename)
    classes = fileio.load_classes(options.classes_filename)
    samples.load_class_labels(classes)
    samples.set_current_class(options.target_class)
    target_samples = []
    samples_time = pt.stop()
    print "Loaded samples (%0.2fs)" % (samples_time)

    pt.start()

    rules = load_rules(options.model_filename)
    indexed_rules = rules.remap_feature_to_index(samples)
    training_time = pt.stop()
    newsamples = {}

    for sample in samples:
        keep_sample = False
        for rule in indexed_rules:
            if sample.satisfies(rule.ls):
                if not newsamples.has_key(sample.id):
                    newsamples[sample.id] = []
Exemplo n.º 3
0
		error("Please provide a genotype sample file with -s /path/to/genotype.file")
		errorCount += 1
	if not options.input_classes_filename:
		error("Please provide a phenotype class file with -c /path/to/phenotype.file")
		errorCount += 1
	if not options.target_class:
		error("Please provide the phenotype target to be predicted with -t \"TRAITNAME\"")
		errorCount += 1
	if errorCount > 0:
		error("For help on usage, try calling:\n\tpython %s -h" % os.path.basename(sys.argv[0]))
		exit(1)
	
	pt.start()
	fileio = FileIO()
	samples = fileio.load_samples(options.input_samples_filename)
	samples_time = pt.stop()
	print "Loaded samples (%0.2fs)"%(samples_time)
	if options.feature_select:
		print "Selecting top %d features from %s, ordered by %s"%(options.feature_select_top_n,options.feature_select,options.feature_select_score)
		pt.start()
		from pica.AssociationRule import load_rules,AssociationRuleSet
		selected_rules = AssociationRuleSet()
		rules = load_rules(options.feature_select)
		rules.set_target_accuracy(options.feature_select_score)
		selected_rules.extend(rules[:options.feature_select_top_n])
		samples = samples.feature_select(selected_rules)
		print "Finished feature selection (%0.2fs)"%(pt.stop())
	classes = fileio.load_classes(options.input_classes_filename)
	samples.load_class_labels(classes)
	print samples.get_number_of_features()
	samples.set_current_class(options.target_class)
Exemplo n.º 4
0
        )
        errorCount += 1
    if not options.target_class:
        error(
            "Please provide the phenotype target to be predicted with -t \"TRAITNAME\""
        )
        errorCount += 1
    if errorCount > 0:
        error("For help on usage, try calling:\n\tpython %s -h" %
              os.path.basename(sys.argv[0]))
        exit(1)

    pt.start()
    fileio = FileIO()
    samples = fileio.load_samples(options.input_samples_filename)
    samples_time = pt.stop()
    print "Loaded samples (%0.2fs)" % (samples_time)
    if options.feature_select:
        print "Selecting top %d features from %s, ordered by %s" % (
            options.feature_select_top_n, options.feature_select,
            options.feature_select_score)
        pt.start()
        from pica.AssociationRule import load_rules, AssociationRuleSet
        selected_rules = AssociationRuleSet()
        rules = load_rules(options.feature_select)
        rules.set_target_accuracy(options.feature_select_score)
        selected_rules.extend(rules[:options.feature_select_top_n])
        samples = samples.feature_select(selected_rules)
        print "Finished feature selection (%0.2fs)" % (pt.stop())
    classes = fileio.load_classes(options.input_classes_filename)
    samples.load_class_labels(classes)