示例#1
0
 def __readInfile__(self):
     # Reads diagnosis data into class
     first = True
     with open(self.infile, "r") as f:
         for line in f:
             line = line.strip()
             if not first:
                 self.__parseLine__(line.split(d))
             else:
                 d = unixpath.getDelim(line)
                 self.__setHeader__(line.split(d))
                 first = False
示例#2
0
 def __setData__(self, infile):
     # Reads file into data frame
     d = None
     rows = []
     name = unixpath.getFileName(infile)
     with open(infile, "r") as f:
         for line in f:
             line = line.strip()
             if d:
                 rows.append(line.split(d))
             else:
                 d = unixpath.getDelim(line)
                 head = line.split(d)
     self.data[name] = pandas.DataFrame(rows, columns=head)
示例#3
0
 def __setTaxa__(self, infile):
     # Stores references and species names
     first = True
     print("\n\tReading taxonomy...")
     with open(infile, "r") as f:
         for line in f:
             line = line.strip()
             if not first:
                 s = line.split(d)
                 self.taxa[s[-2]] = s[1:-2]
             else:
                 d = unixpath.getDelim(line)
                 self.names = line.split(d)[1:-1]
                 first = False
示例#4
0
	def __setDiagnoses__(self):
		# Reads in diagnoses as dict
		first = True
		print("\n\tReading diagnosis file...")
		with open(self.diagfile, "r") as f:
			for line in f:
				line = line.strip()
				if first == False:
					row = line.split(d)
					self.diagnoses[row[0]] = row[-1].strip()
				else:
					d = unixpath.getDelim(line)
					self.__setHeader__(line.split(d))
					first = False
		print(("\tExtracted {:,} diagnosis records.").format(len(self.diagnoses)))
示例#5
0
	def setSpecies(self, indir):
		# Calls stores species as dict of classes
		print("\n\tReading species totals files...")
		for i in glob(indir + "*"):
			first = True
			with open(i, "r") as f:
				for line in f:
					if first == True:
						d = unixpath.getDelim(line)
						first = False
					s = self.__getRow__(d, line)
					if len(s) >= 3:
						if s[-1] in self.species.keys():
							self.species[s[-1]].resolveSpecies(s)
						else:
							self.species[s[-1]] = Species(self.delim, s)
 def __readFile__(self, infile):
     # Reads file and returns dict
     first = True
     ret = {}
     print(("\tReading {}...").format(os.path.split(infile)[1]))
     with open(infile, "r") as f:
         for line in f:
             line = line.strip()
             if not first:
                 s = line.split(d)
                 # Store with ID as key
                 ret[s[0]] = s[1:]
             else:
                 d = unixpath.getDelim(line)
                 h = line.split(d)
                 first = False
     return ret, h
示例#7
0
	def __readInfile__(self, infile, diagnosis, malignant):
		# Sorts and stores records from input file
		first = True
		with open(infile, "r") as f:
			for line in f:
				if first == False:
					s = line.strip().split(d)
					if len(s) == self.columns.length:
						if s[0] in self.records.keys():
							self.records[s[0]].resolveRecord(self.columns, s)
						else:
							self.records[s[0]] = Record(malignant, self.delim, diagnosis)
							self.records[s[0]].setRecord(self.columns, s)
				else:
					d = unixpath.getDelim(line)
					if self.columns is None:
						self.columns = Columns(line.split(d))
					first = False
示例#8
0
	def mergeDiagnoses(self):
		# Merges full data with diagnoses
		first = True
		count = 0
		print("\tMerging full data file with diagnoses...")
		with open(self.outfile, "w") as out:
			with open(self.infile, "r") as f:
				for line in f:
					line = line.strip()
					if first == False:
						row = line.split(d)
						uid = row[self.head["UID"]].strip()
						if len(row) > self.head["Diagnosis"]:
							row[self.head["Diagnosis"]], c = self.__getDiagnosis__(uid, row[self.head["Diagnosis"]].strip())
							count += c
						row = self.__checkQuotes__(row)
						out.write(",".join(row) + "\n")
					else:
						d = unixpath.getDelim(line)
						self.__setHeader__(line.split(d))
						out.write(line + "\n")
						first = False
		print(("\tMerged {:,} diagnosis records.").format(count))