示例#1
0
def getDataPackage():
    softfile = "data/GDS2545.soft.gz"
    gnfile  = "data/c2.biocarta.v2.5.symbols.gmt"
    synfile = "data/Homo_sapiens.gene_info.gz"
    gnf = GMTParser.GMTParser(gnfile)

    sp = SOFTParser.SOFTParser(softfile)

    normal = []
    tumor = []
    for line in sp.column_heading_info[0]:
        if string.find(line[1], 'normal prostate tissue free') > 0:
            normal.append(line[0].strip())
        elif string.find(line[1], 'tumor') > 0:
            tumor.append(line[0].strip())

    dt = DataCleaner.DataTable()
    dt.getSOFTData(sp)
    dp = DataPackager.dataPackager()
    dp.addGeneNetwork(gnf.getAllNetworks())
    dp.addDataTable(dt)
    dp.addSynonyms(synfile)
    dp.createClassification("Tumor")
    dp.createClassification("Normal")
    for samp in tumor:
        dp.addToClassification("Tumor", dt.dt_id, samp)


    for samp in normal:
        dp.addToClassification("Normal", dt.dt_id, samp)
    return dp
示例#2
0
   sp3 = SOFTParser.SOFTParser("data/GDS3329.soft.gz")
   gnfile = "data/c2.biocarta.v2.5.symbols.gmt"
   print "creating Table"
   dt1 = DataCleaner.DataTable()
   print "importing parser"
   dt1.getSOFTData(sp)
   print "gene parser"
   dt2 = DataCleaner.DataTable()
   print "importing parser"
   dt2.getSOFTData(sp2)
   dt3 = DataCleaner.DataTable()
   print "importing parser"
   dt3.getSOFTData(sp3)
   gnf = GMTParser.GMTParser(gnfile)
   print "data packager"
   dp = DataPackager.dataPackager()
   dp.addGeneNetwork(gnf.getAllNetworks())
   print "adding data table 1"
   dp.addDataTable(dt1)
   print "adding data table 2"
   dp.addDataTable(dt2)
   dp.addDataTable(dt3)
   dp.createClassification("first")
   dp.createClassification("post")
   for val in dt1.getSamples()[:len(dt1.getSamples())/2]:
       dp.addToClassification("first", dt1.dt_id, val)
   for val in dt2.getSamples()[len(dt2.getSamples())/2:]:
       dp.addToClassification("post", dt2.dt_id, val)
   for val in dt3.getSamples()[len(dt3.getSamples())/2:]:
       dp.addToClassification("post", dt3.dt_id, val)
 
示例#3
0
def gen_TFRecord_from_file(out_dir, out_filename, bag_filename, flip=False):
	packager = DataPackager(flip=flip)
	bag = rosbag.Bag(bag_filename)	

	output_filenames = []

	#######################
	##  Get Label Info   ##
	#######################

	example_id = out_filename

	file_end = bag_filename.find(".bag")
	label_code = bag_filename[file_end-5:file_end]
	print("")
	print("bag_filename: ", bag_filename)
	print("label_code:", label_code)

	img_lab, opt_lab, aud_lab = 0,0,0
	if("z" in example_id):
		img_lab = 1
	if("g" in example_id):
		opt_lab = 1
	if("a" in example_id):
		aud_lab = 1
	total_lab = (img_lab+opt_lab+aud_lab > 0)

	print(example_id)
	print(img_lab, opt_lab, aud_lab, ':', total_lab)

	end_file = ".tfrecord"
	if(flip):
		end_file = "_flip"+end_file

	#######################
	##     READ FILE     ##
	#######################

	p_t = 0

	stored_data = []
	for topic, msg, t in bag.read_messages(topics=topic_names):
		if(topic == topic_names[0]):
			
			last_action = str(msg.data)

			if(msg.data > 0):
				# perform data pre-processing steps
				packager.formatOutput()

				if(msg.data == 1):
					print("packager.getImgStack().shape: ", packager.getImgStack().shape)
					stored_data = {
						"img_raw": packager.getImgStack()[:], "img_lab": 0, 
						"aud_raw": packager.getAudStack()[:], "aud_lab": 0, 
						"p_t": p_t,
						"total_lab": int(last_action),
						"example_id": example_id}
					p_t += 1
					
				elif(msg.data > 1):
					break

			packager.reset()
		elif(topic == topic_names[1]):
			packager.imgCallback(msg)
		elif(topic == topic_names[2]):
			packager.audCallback(msg)

	if(p_t > 0):
		ex = make_sequence_example (
			img_raw=stored_data["img_raw"], img_lab=stored_data["img_lab"], 
			aud_raw=stored_data["aud_raw"], aud_lab=stored_data["aud_lab"], 
			p_t=stored_data["p_t"], 
			first_action=stored_data["total_lab"],
			example_id=stored_data["example_id"],
			img_raw2=packager.getImgStack(), 
			aud_raw2=packager.getAudStack(),
			second_action=int(last_action))
		output_filename = out_dir+out_filename+"_"+str(stored_data["total_lab"])+end_file
		output_filenames.append(output_filename)
		writer = tf.python_io.TFRecordWriter(output_filename)
		writer.write(ex.SerializeToString())
		writer.close()

	# generate TFRecord data
	ex = make_sequence_example (
		img_raw=packager.getImgStack(), img_lab=img_lab, 
		aud_raw=packager.getAudStack(), aud_lab=aud_lab, 
		p_t=p_t, 
		first_action=int(last_action),
		example_id=example_id)
	print("last_action:", msg.data, int(last_action))

	# write TFRecord data to file
	output_filename = out_dir+out_filename+"_"+last_action+end_file
	output_filenames.append(output_filename)
	writer = tf.python_io.TFRecordWriter(output_filename)
	writer.write(ex.SerializeToString())
	writer.close()

	packager.reset()
	bag.close()

	return output_filenames