def yetanotherscript3(obesityfile, catfile, filetodump): """ Merges two columns of a csv file to produce a new column """ oreader = csv.reader(open(obesityfile, "r"), delimiter=",") creader = csv.reader(open(catfile, "r"), delimiter=",") writer = csvwriter.csvwriter(filetodump) rows = list() odict = dict() owdict = dict() for row in oreader: odict[row[0]] = row[1] owdict[row[0]] = row[2] for row in creader: thisrow = list() thisrow.append(row[0]) thisrow.append(row[1]) thisrow.append(row[2]) thisrow.append(odict.get(row[0])) thisrow.append(owdict.get(row[0])) rows.append(thisrow) writer.writerows(rows) writer.closewriter()
def yetanotherscript(fileToRead, filetodump): """ operates on csv file to recorde the variable to string representation """ reader = csv.reader(open(fileToRead, "r"), delimiter=",") writer = csvwriter.csvwriter(filetodump) rows = list() for row in reader: thisrow = list() thisrow.append(row[0]) if row[1] == "0.0": thisrow.append("0") elif row[1] == "1.0": thisrow.append("1") else: thisrow.append("NA") if row[2] == "4.0" or row[2] == "3.0": thisrow.append("1") elif row[2] == "1.0" or row[2] == "2.0": thisrow.append("0") else: thisrow.append("NA") rows.append(thisrow) writer.writerows(rows) writer.closewriter()
def printentropy(self, filename , listpr , key , ngram ,agelower ,ageupper): fhandle = open(filename , 'w') writer = csvwriter.csvwriter(filename) for pr in listpr: data = False if pr.getobese(24, 55 ,'MONTHS') == 1: for timeattrs in pr.getalltimeattribute(): if timeattrs.getattribute(key+':JOINTPROB:'+str(ngram)) is not None \ and timeattrs.getage().compare(Decimal(agelower)) >= 0 and \ timeattrs.getage().compare(Decimal(ageupper)) <= 0 : fhandle.write(str(timeattrs.getage()) + ': ') fhandle.write(dc.getstring(self, timeattrs.getattribute(key+':JOINTPROB:'+str(ngram)), \ '.0000001') + ' ') data = True if data: fhandle.write('\n') fhandle.close()
def printprobabilty(self, filename, key , ngram): writer = csvwriter.csvwriter(filename) freqdist = None if ngram == 1: freqdist = self.unigrams.get(key).freqdist elif ngram == 2: freqdist = self.bigrams.get(key).freqdist elif ngram == 3: freqdist = self.trigrams.get(key).freqdist samplesize = float (freqdist.N()) print samplesize for item in freqdist: prob = freqdist.get(item) / samplesize op = list() op.append(' '.join(item)) op.append(dc.getstring(prob, '.000000001')) writer.writerow(op) writer.closewriter()
def csvappender(files, filetodump, skiheader): """ appends two or more files csv files. If skip header is set then there is header row in each file and only the header row from the file is copied """ writer = csvwriter.csvwriter(filetodump) # rows = list() firstfile = True for fi in files: reader = csv.reader(open(fi, "r"), delimiter=",") if skiheader and not firstfile: reader.next() for row in reader: writer.writerow(row) # rows.append(row) firstfile = False # writer.writerows(rows) writer.closewriter()
def mergetwocol(fileToRead, filetodump, skipheader, col1, col2, dumpcolnum): """ Merges two columns of a csv file to produce a new column """ reader = csv.reader(open(fileToRead, "r"), delimiter=",") writer = csvwriter.csvwriter(filetodump) rows = list() if skipheader: rows.append(reader.next()) for row in reader: thisrow = list(row) length = len(thisrow) while length <= dumpcolnum - 1: thisrow.append("") length = length + 1 if row[col1 - 1] != "": thisrow.insert(dumpcolnum - 1, row[col1 - 1]) elif row[col2 - 1] != "": thisrow.insert(dumpcolnum - 1, row[col2 - 1]) rows.append(thisrow) writer.writerows(rows) writer.closewriter()
def writefeedingvalue(filename, listpr, lowage, upage): writer = csvwriter.csvwriter(filename) rows = list() for pr in listpr: feedlist = list() visit = list() for i in range(int(upage - lowage)): feedlist.append("") visit.append(False) for timeattrs in pr.getalltimeattribute(): age = timeattrs.getage() doc = timeattrs.getattribute("DOC") value = None if doc is not None and age.compare(Decimal(lowage)) >= 0 and age.compare(Decimal(upage)) < 0: value = returnfeedtype(doc.text.get("Feeding:")) if value is None: value = returnfeedtype(doc.text.get("Appetite:")) if value is None: value = returnfeedtype(doc.text.get("Eating:")) if value is not None: if int(age) == len(feedlist): feedlist[int(age) - 1] = value else: feedlist[int(age)] = value if int(age) == len(feedlist): visit[int(age) - 1] = True else: visit[int(age)] = True # if age.compare(Decimal(lowage)) >= 0 and age.compare(Decimal(upage)) < 0 and value is not None: # row = list() # row.append(str(pr.getgenericattribute('MRN'))) # row.append(str(pr.getgenericattribute('UID'))) # row.append(str(age)) # row.append(value) # print row # rows.append(row) # break # if len(feedtype) > 0: # row = list() # row.append(str(pr.getgenericattribute('MRN'))) # row.append(str(pr.getgenericattribute('UID'))) # row.append(str(agetype[0])) # row.append(feedtype[0]) # if changed(feedtype) is not None: # row.append('NOTPURE') # row.append(changed(feedtype)) # else: # row.append('PURE') # row.append(feedtype[0]) # row.append(highestfreq(feedtype)) # rows.append(row) prevfeed = None i = 0 # print feedlist for feed in feedlist: if feed == "" and prevfeed == "Bottle": feedlist[i] = "Bottle" else: prevfeed = feed i = i + 1 feedlist.reverse() i = 0 # print 'after ', feedlist prevfeed = None for feed in feedlist: if feed == "" and prevfeed == "Breast": feedlist[i] = "Breast" else: prevfeed = feed i = i + 1 # print 'after 2', feedlist feedlist.reverse() row = list() row.append(str(pr.getgenericattribute("MRN"))) row.append(str(pr.getgenericattribute("UID"))) count = 0 for feed in feedlist: row.append(feed) row.append(str(visit[count])) count = count + 1 if len(row) > 0: rows.append(row) writer.writerows(rows) writer.closewriter()