lineNO += 1 # some cells have " appended outside |^ delimiters (?) so s/,"|^/,|^/ and s/|^",/|^,/ # test: even number of gsub! if len(lead2quote.findall(line)) != len(trail2quote.findall(line)): print "l. 36: CSV has unbalanced double quotes, line # %i" % lineNO ##DEBUG print "len(lead2quote.findall(line)): " + str(len(lead2quote.findall(line))) print "len(trail2quote.findall(line)): " + str(len(trail2quote.findall(line))) sys.exit() ##KILL PRogram if throw excep line = lead2quote.sub(r",~~~", line) line = trail2quote.sub(r"~~~,", line) # remove lead/trail |^, split, check # cells\ line = leaddelim.sub(r"", line) line = traildelim.sub(r"", line) line_exploded = delim_re.split(line, 15) # FIX: pyrb: trail'g "~";; rails: no trail'g # Line # in CSV file is 1-indexed, colNO = 0 while colNO < HEADCOLS: try: if not allblank.match(line_exploded[colNO]): if 255 < len(line_exploded[colNO]): print "!!!! col headg too long: " + line_exploded[colNO] print "row num in XLS: " + repr(lineNO + REAL_1ST_LINE_NUM) if colNO == 0: rowheadgs.initheadgs() ## rowheadgs=[0,"","","",""] rowheadgs.rowheadgs[0] = lineNO + REAL_1ST_LINE_NUM rowheadgs.rowheadgs[1 + colNO] = line_exploded[colNO] # fix case: same-col head'g split over 2 vert cells subseqcolNO = colNO + 1
for line in incsv: line=line.rstrip() lineNO+=1 # few cells:" appended outside "~~~"delimiters: srch/replc w/trail2quote & lead, SO # test: even number of gsub! if (len(lead2quote.findall(line)) != len(trail2quote.findall(line)) ): print "CSV has unbalanced double quotes, line # %i" % lineNO line=lead2quote.sub(r',~~~',line) line=trail2quote.sub(r'~~~,',line) # remove lead/trail |^, split, check # cells\ line = leaddelim.sub(r'',line) line = traildelim.sub(r'',line) ## these 4 chars become $lt; etc: & < > " ## check/ test this method ## line = mult_rplc(multrplc.htmlDICT,line) ## removed for debug, 9/19/06 line_exploded=delim_re.split(line) ## 3 debug lines if len(line_exploded)!=NUMCOLS: print "PROB LIne numb: " ; print lineNO print print "line_exploded: " print line_exploded print "PROBLEM:: Len(line_exploded_: "; print len(line_exploded) ### arrayofcells is all data in CSVtest.csv arrayofcells.append(line_exploded) ## print "len(arrayofcells)"; print len(arrayofcells) ## print "()()()() arrayofcells"; print arrayofcells ## debug ############ build cells separated by \n for mysql LOAD IN INFILE #1st diffs on newcellrows e.g. rb each_cons firstdiff=[]