Exemplo n.º 1
0
	def __init__(self, **keywords):
		"""
		2009-4-10
			simplified further by moving db-common lines to ElixirDB
		2008-07-31
		"""
		from pymodule.ProcessOptions import ProcessOptions
		ProcessOptions.process_function_arguments(keywords, self.option_default_dict, error_doc=self.__doc__, class_to_have_attr=self)
		self.setup_engine(metadata=__metadata__, session=__session__, entities=entities)
		"""
Exemplo n.º 2
0
 def __init__(self, **keywords):
     from pymodule.ProcessOptions import ProcessOptions
     ProcessOptions.process_function_arguments(keywords,
                                               self.option_default_dict,
                                               error_doc=self.__doc__,
                                               class_to_have_attr=self)
     self.setup_engine(metadata=__metadata__,
                       session=__session__,
                       entities=entities)
     """
Exemplo n.º 3
0
	def __init__(self, **keywords):
		"""
		2012.9.5 set default minDepth=0
		2011-9-27
		"""
		self.ad = ProcessOptions.process_function_arguments(keywords, self.option_default_dict, error_doc=self.__doc__, \
														class_to_have_attr=self)
		
		self.header = None	#2012.5.10 the list of column headers (the header line starting by #CHROM)
		self.headerWithoutHash = None	#2012.5.10 same as self.header except, instead of "#CHROM", it is "CHROM".
		self.sample_id_ls = []
		self.sample_id2index = {}	#the index is the index of its column in the genotype_call_matrix
		self.locus_id_ls = []
		self.locus_id2row_index = {}
		self.locus_id2data = {}
		self.genotype_call_matrix = []
		self.col_name2index = {}	#column index in file
		self.col_index_individual_name_ls = None
		self.individual_name2col_index = {}	#not the matrix column, the column in input file
		self.metaInfoLs = []	#2012.3.28 anything before the "#CHROM" line. each entry is a raw line content, including '\n'
		self.sampleIDHeader = []	#2012.3.20 a list of sample column headers (from sampleStartingColumn)
		
		self.inf = None
		self.reader = None
		self._initializeInput(self.inputFname)
		
		self.outf = None
		self.writer = None
		self._initializeOutput(self.outputFname)
Exemplo n.º 4
0
	def __init__(self, inputFname=None, **keywords):
		self.ad = ProcessOptions.process_function_arguments(keywords, self.option_default_dict, error_doc=self.__doc__, \
														class_to_have_attr=self)
		if not self.inputFname:
			self.inputFname = inputFname
		
		self.header = None
		self.combinedColIDList = None	#same as header
		self.combinedColID2ColIndex = None
		
		self.hdf5File = h5py.File(self.inputFname, self.openMode)
		self.tableObjectList = []
		self.tablePath2Index = {}
		
		if self.openMode=='r':
			self._readInData()
		elif self.openMode=='w':
			self.createNewTable(tableName=self.tableName, dtype=self.dtype, rowDefinition=self.rowDefinition)
		
		self.rowIndexCursor = 0	#2012.11.16 for iteration
Exemplo n.º 5
0
	def __init__(self, **keywords):
		"""
		dataMatrixDtype could be a compound type:
			http://docs.scipy.org/doc/numpy/reference/arrays.dtypes.html
			http://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.html
				
				#A record data type containing a 16-character string (in field name)
					#and a sub-array of two 64-bit floating-point number (in field grades):
				dt = numpy.dtype([('name', numpy.str_, 16), ('grades', numpy.float64, (2,))])
				
				my_dtype = numpy.dtype([('field1', 'i'), ('field2', 'f'), ('field3', varLenStrType)])
				
				#Using array-protocol type strings:
				#each number below is counting bytes, not bits
				>>> numpy.dtype([('a','f8'),('b','S10')])
				dtype([('a', '<f8'), ('b', '|S10')])
				
				#Using tuples. int is a fixed type, 3 the field's shape. void is a flexible type, here of size 10:
				numpy.dtype([('hello',(numpy.int,3)),('world',numpy.void,10)])
				
				#Using dictionaries. Two fields named 'gender' and 'age':
				numpy.dtype({'names':['gender','age'], 'formats':['S1',numpy.uint8]})
				
				#Offsets in bytes, here 0 and 25:
				numpy.dtype({'surname':('S25',0),'age':(numpy.uint8,25)})
		"""
		self.ad = ProcessOptions.process_function_arguments(keywords, self.option_default_dict, error_doc=self.__doc__, \
														class_to_have_attr=self)
		
		self.dataMatrixDSName = "dataMatrix"
		self.rowIDListDSName = "rowIDList"
		self.colIDListDSName = "colIDList"
		if not self.newGroup:
			self._readInData()
		else:
			self._createDatasetSkeletonForOneGroup(h5Group=self.h5Group, dtype=self.dataMatrixDtype)
		
		self.newWrite = True	#a flag used to control whether it's first time to write stuff (first time=set whole matrix)
		
		self.rowIndexCursor = 0
Exemplo n.º 6
0
	def __init__(self, inputFname=None, **keywords):
		self.ad = ProcessOptions.process_function_arguments(keywords, self.option_default_dict, error_doc=self.__doc__, \
														class_to_have_attr=self)
		if not self.inputFname:
			self.inputFname = inputFname
		if self.inputFname and self.inputFile is None:
			self.inputFile = utils.openGzipFile(self.inputFname, openMode=self.openMode)
		
		self.filename = self.inputFname	#2013.05.03 for easy access
		
		self.csvFile = None
		self.isRealCSV = False
		if self.openMode=='r':	#reading mode
			if self.delimiter is None:
				self.delimiter = figureOutDelimiter(self.inputFile)
			
			if self.delimiter=='\t' or self.delimiter==',':
				self.csvFile = csv.reader(self.inputFile, delimiter=self.delimiter)
				self.isRealCSV = True
			else:
				self.csvFile = self.inputFile
				self.isRealCSV = False
		else:	#writing mode
			if not self.delimiter:
				self.delimiter = '\t'
			self.csvFile = csv.writer(self.inputFile, delimiter=self.delimiter)
			self.isRealCSV = True
			#else:
			#	self.csvFile = self.inputFile
			#	self.isRealCSV = False
		self.col_name2index = None
		
		self._row = None	#2013.08.30 to store the current row being read
		self.headerPattern = re.compile(r'^[a-zA-Z]')	#default header pattern, line beginned with letter
		self.commentPattern = re.compile(r'^#')	#default, beginned with #
		self.comment_row_list  = []
Exemplo n.º 7
0
						nonFounderIndividualIDSet=nonFounderIndividualIDSet)
	
	def _reportFamilyStructure(self, noOfParents2FamilyData=None):
		"""
		2013.07.19
		"""
		sys.stderr.write("\t%s\t%s\t%s\t%s\t%s\n"%("parentSetSize", "noOfFamilies", "noOfParents", "noOfKids", "noOfUniqueIndividuals"))
		for noOfParents, familyData in noOfParents2FamilyData.iteritems():
			parentIDSet = familyData.parentIDSet
			childIDSet = familyData.childIDSet
			individualIDSet = familyData.individualIDSet
			sys.stderr.write("\t%s\t%s\t%s\t%s\t%s\n"%(noOfParents, len(familyData.parentTupleSet), len(parentIDSet), \
													len(childIDSet), len(individualIDSet)))
		
	
	def next(self):
		try:
			row = self.csvFile.next()
		except:
			raise StopIteration
		if not self.isRealCSV:
			row = row.strip().split()
		familyID, individualID, paternalID, maternalID, sex, phenotype = row
		return PassingData(familyID=familyID, individualID=individualID, paternalID=paternalID, \
						maternalID=maternalID, sex=sex, phenotype=phenotype)

if __name__ == '__main__':
	main_class = PlinkPedigreeFile
	po = ProcessOptions(sys.argv, main_class.option_default_dict, error_doc=main_class.__doc__)
	instance = main_class(**po.long_option2value)
	instance.run()