def __init__(self): # Public # Init structures self.fp = rfp.reqboxfileparser() #self.fp.parsefile("./data/LRCv12.txt") self.uniquerfi = {} self.uniquerfn = {} self.uniquernf = {} self.uniquergn = {} # Init vlogger self.__verbosity = VERB_MAX self.vlog = vlogger(self.__verbosity, sys.stdout)
def __init__(self): # Public # Init structures self.rbm = rbm.reqboxmodel() self.inputfile = None self.parseall = 0 self.parsefun = 0 self.parserfi = 0 self.parserfn = 0 self.parsernf = 0 self.parsergn = 0 # Init vlogger self.__verbosity = VERB_MAX self.logv = vlogger(self.__verbosity, sys.stdout)
def __init__(self): # Public self.filename = '' self.funlist = [] self.fundict = {} # Init vlogger self.__verbosity = VERB_MAX self.vlog = vlogger(self.__verbosity, sys.stdout) #self.vlog = self.__log() # Init mmap self.__file = None self.__f = None pass
def __init__(self, filename): # Public self.filename = filename self.outfile = 'out.csv' self.clist = [] # Init vlogger self.__verbosity = VERB_MAX self.vlog = vlogger(self.__verbosity, sys.stdout) #self.vlog = self.__log() # Init mmap self.__file = codecs.open(filename, encoding='utf-8', mode='r') # open(filename, 'r') self.vlog(VERB_MIN, "opening file: %s" % filename) self.__f = mmap.mmap(self.__file.fileno(), 0, access=mmap.ACCESS_READ) self.__f.seek(0) # rewind pass
def fixcrlf(sourcefile, destinationfile): vlog = vlogger(2, sys.stdout) vlog(VERB_MIN, "Inputfile: %s" % sourcefile) print("1") sf = codecs.open(sourcefile, encoding='latin-1', mode='r') # open(filename, 'r') #sf = open(sourcefile, 'r') sm = mmap.mmap(sf.fileno(), 0, access=mmap.ACCESS_READ) #df = codecs.open(destinationfile, encoding='utf-8', mode='wb') # open(filename, 'r') df = open(destinationfile, 'wb') #dm = mmap.mmap(df.fileno(), 0)#, access=mmap.ACCESS_DEFAULT) loc = 0 end = sm.size() terminator = b'\r\n' linecounter = 0 totallines = 0 while loc < end: fileline = sm.readline() vlog(VERB_MED, "parsing line: %d" % linecounter) # fileline = fileline.encode('utf-8') loc = loc + len(fileline) count = fileline.count(b'\r') if count == 1: print(type(fileline)) df.write(fileline) totallines += 1 else: vlog(VERB_MED, "FIXING multiline: %d lines on source line %d" % (count, linecounter)) fileline = fileline.replace(b'\r', b'\r\n') df.write(fileline) totallines += count linecounter += 1 vlog(VERB_MIN, "Inputfile: %s" % sourcefile) vlog(VERB_MIN, " * Total number of lines: %d" % linecounter) vlog(VERB_MIN, "Outfile: %s" % destinationfile) vlog(VERB_MIN, " * Total number of lines: %d" % totallines) def fixdoublecrlf(sourcefile, destinationfile): # HEX pattern to match: 0D0A0A0D0A # Idea: just remove 0A0D0A pass