if '-' in line: position, person = line[:line.rindex('-')], line[line.rindex('-')+1:] names = [x.strip() for x in person.split()] persons.append({'position':position, 'last':names[-1], 'first':' '.join(names[:-1])}) else: break return persons[::-1] with open(txtPath,'rb') as readFile: chunks = textChunker(readFile) rowArray = [] for chunk in chunks: chunk = [x.encode('utf-8') for x in chunk] chunk[0] = chunk[0].replace('•','-').replace('·','-') # city name, pop, county city, pop, county = getCPC(chunk[0]) pb, Zip = poBox(chunk) phone, fax = phone_fax(chunk) wh = hours(chunk) address = getAddress(chunk) persons = getPeople(chunk)
] with open(result, 'wb') as writeFile: wD = csv.DictWriter(writeFile, headersList, restval='', extrasaction='raise', dialect='excel') wD.writeheader() personList = [] for f in getFiles(source): chapter = f.split('/')[-1].replace('.txt', '').replace('_', ' ') print chapter.upper() print with open(f, 'rb') as text: chunks = textChunker(text) for chunk in chunks: p = person(chunk) p.category = chapter p.analyse() personList.append(p) p.plotRaw() for person in personList: wD.writerow(person.asDict()) print 'done!'
'web', 'rawText'] with open(result,'wb') as writeFile: wD = csv.DictWriter(writeFile, headersList,restval='', extrasaction='raise', dialect='excel') wD.writeheader() personList =[] for f in getFiles(source): chapter = f.split('/')[-1].replace('.txt','').replace('_',' ') print chapter.upper() print with open(f, 'rb') as text: chunks = textChunker(text) for chunk in chunks: p = person(chunk) p.category = chapter p.analyse() personList.append(p) p.plotRaw() for person in personList: wD.writerow(person.asDict()) print 'done!'
position, person = line[:line.rindex('-')], line[line.rindex('-') + 1:] names = [x.strip() for x in person.split()] persons.append({ 'position': position, 'last': names[-1], 'first': ' '.join(names[:-1]) }) else: break return persons[::-1] with open(txtPath, 'rb') as readFile: chunks = textChunker(readFile) rowArray = [] for chunk in chunks: chunk = [x.encode('utf-8') for x in chunk] chunk[0] = chunk[0].replace('•', '-').replace('·', '-') # city name, pop, county city, pop, county = getCPC(chunk[0]) pb, Zip = poBox(chunk) phone, fax = phone_fax(chunk) wh = hours(chunk) address = getAddress(chunk) persons = getPeople(chunk)