def process(htmlfile): """ The workhorse program. Iterates through each file and extracts the passers from each HTML file and writes them to a csv file """ page = open(htmlfile) soup = BeautifulSoup(page) tables = soup.findAll('table', {'class': ['printable']}) #check if file exists and append if it does outfile = 'passers.csv' if os.path.isfile(outfile): f = open('passers.csv','ab') writer = csvkit.writer(f) else: f = open('passers.csv','wb') writer = csvkit.writer(f) writer.writerow(("Name", "Campus", "Course")) for row in tables[0].findAll('tr')[1:]: col = row.findAll('td') name = col[0].text if name.endswith(")"): name = name[:-13] campus = col[1].text course = col[2].text #print name, campus, course writer.writerow( (name, campus, course)) f.close() page.close()
def to_csv(self, path, **kwargs): """ Write this table to a CSV. This method will use csvkit if it is available, otherwise it will use Python's builtin csv module. ``kwargs`` will be passed through to :meth:`csv.writer`. If you are using Python 2 and not using csvkit, this method is not unicode-safe. :param path: Filepath or file-like object to write to. """ if 'lineterminator' not in kwargs: kwargs['lineterminator'] = '\n' close = True try: if hasattr(path, 'write'): f = path close = False else: f = open(path, 'w') writer = csv.writer(f, **kwargs) writer.writerow(self._column_names) for row in self._rows: writer.writerow(row) finally: if close: f.close()
def __init__(self): self.argparser = argparse.ArgumentParser( description='A command line utility for processing FEC data dumps.' ) self.argparser.add_argument( dest='input', action='store', help='Path to input CSV.' ) self.argparser.add_argument( dest='output', action='store', help='Path to output CSV.' ) self.argparser.add_argument( '-a', '--amendments', dest='keep_amendments', action='store_true', help='Keep amendments (instead of filtering them out).' ) self.argparser.add_argument( '-o', '--office', dest='office', action='store', help='Filter output only a certain office.' ) self.args = self.argparser.parse_args() self.amended_ids = set() # Read input data with open(self.args.input) as f: reader = csvkit.reader(f) self.header = reader.next() rows = list(reader) sys.stdout.write('Read %i rows\n' % len(rows)) # Discover amendments if not self.args.keep_amendments: for row in rows: if row[self.header.index('amn_ind')] != 'N': self.amended_ids.add(row[self.header.index('prev_file_num')]) # Filter data output_rows = filter(self.filter_row, rows) sys.stdout.write('Saving %i rows\n' % len(output_rows)) # Write output with open(self.args.output, 'w') as f: writer = csvkit.writer(f) writer.writerow(self.header) writer.writerows(output_rows)
def to_csv(self, output, **kwargs): """ Serializes the table to CSV and writes it to any file-like object. """ rows = self.to_rows(serialize_dates=True) # Insert header row rows.insert(0, self.headers()) csv_writer = writer(output, **kwargs) csv_writer.writerows(rows)
def test_writer_alias(self): output = six.StringIO() writer = csvkit.writer(output) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.reader(written) self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def test_writer_alias(self): output = six.StringIO() writer = csvkit.writer(output, encoding='utf-8') self.assertEqual(writer._eight_bit, True) writer.writerow(['a', 'b', 'c']) writer.writerow(['1', '2', '3']) writer.writerow(['4', '5', u'ʤ']) written = six.StringIO(output.getvalue()) reader = csvkit.reader(written, encoding='utf-8') self.assertEqual(next(reader), ['a', 'b', 'c']) self.assertEqual(next(reader), ['1', '2', '3']) self.assertEqual(next(reader), ['4', '5', u'ʤ'])
def clean(self): """ Cleans the provided source TSV file and writes it out in CSV format. """ # Create the output object with open(self.csv_path, 'w') as csv_file: # Create the CSV writer csv_writer = csvkit.writer(csv_file) # Write the headers csv_writer.writerow(self.headers) # Write out the rows [csv_writer.writerow(row) for row in self._convert_tsv()] # Log errors if there are any if self.log_rows: # Log to the terminal if self.verbosity > 2: msg = ' {} errors logged (not including empty lines)' self.failure(msg.format(len(self.log_rows))) # Log to the file with open(self.error_log_path, 'w') as log_file: log_writer = csvkit.writer(log_file, quoting=csv.QUOTE_ALL) log_writer.writerow(['headers', 'fields', 'value']) log_writer.writerows(self.log_rows) # Add counts to raw_file_record self.raw_file.clean_columns_count = self.headers_count self.raw_file.error_count = len(self.log_rows) self.raw_file.clean_records_count = self.raw_file.download_records_count - self.raw_file.error_count # Add file size to the raw_file_record self.raw_file.download_file_size = os.path.getsize(self.tsv_path) or 0 self.raw_file.clean_file_size = os.path.getsize(self.csv_path) or 0 # Save it in case it crashes in the next step self.raw_file.save()
def export_sources_csv(cursor, output_filename): cursor.row_factory = sqlite3.Row sql = """ SELECT author,name,country,volume_number,volume_date,editor,edition_date,pages,shelf_number,URL,source_category,type,notes FROM sources as s WHERE s.slug in (SELECT distinct source from flow_joined) OR s.slug in (SELECT distinct source from exchange_rates)""" rows = cursor.execute(sql) first = next(rows) with open(output_filename, 'w') as f: dw = csvkit.writer(f) dw.writerow(["bibliographic reference"] + first.keys()) dw.writerow(formatRef(first)) dw.writerows(formatRef(r) for r in rows) return 0 return 1
def tsvTOcsv(input_file_name, out_file): """ converts tsv formatted files to csv, used to make the csv file that is readable by this EEG report feature analyzer :param input_file_name: a tsv file :param out_file: a csv file :return: """ i = 0 with open(input_file_name, 'rb') as tsvin, open(out_file, 'wb') as csvout: tsvin = csv.reader(tsvin, delimiter='\t') csvout = csv.writer(csvout) for row in tsvin: if len(row) > 0: csvout.writerow(row)
def to_csv(self, file_path): """ Store all categories as a csv file """ columns = ["id", "label", "parent"] with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(columns) for category in self.categories: if category.parent: parent = category.parent.id else: parent = None row = [ category.id, category.label, parent, ] writer.writerow(row)
def generate_dictionary(self): dataset_id = self.dataset.id dim_id = self.id if dim_id in AJAX_API_ENDPOINTS[dataset_id]: opts = AJAX_API_ENDPOINTS[dataset_id][dim_id] categories = self._categories_from_ajax_api() file_dir = os.path.join("vantetider/data", dataset_id) file_path = os.path.join(file_dir, dim_id + ".csv") if not os.path.exists(file_dir): os.makedirs(file_dir) with open(file_path, 'w') as f: writer = csv.writer(f) headers = ["id", "label"] if "attributes" in opts: headers += [x[1] for x in opts["attributes"]] writer.writerow(headers) for cat in categories.values(): row = [getattr(cat, x) for x in headers] writer.writerow(row)
def to_csv(self, path, **kwargs): """ Write table to a CSV. Will use csvkit if it is available, otherwise will use Python's builtin csv module. ``args`` and ``kwargs`` will be passed through to :meth:`csv.writer`. Note: if using Python 2 and not using csvkit, this method is not unicode-safe. :param path: Path to the CSV file to read from. """ if 'lineterminator' not in kwargs: kwargs['lineterminator'] = '\n' with open(path, 'w') as f: writer = csv.writer(f, **kwargs) writer.writerow(self._column_names) for row in self._data: writer.writerow(row)
def csv_writer(csv_file): yield csv.writer(csv_file)
#! /usr/bin/env python # from http://unix.stackexchange.com/questions/60590/is-there-a-command-line-utility-to-transpose-a-csv-file import csvkit as csv, sys rows = list(csv.reader(sys.stdin)) writer = csv.writer(sys.stdout) for col in xrange(0, len(rows[0])): writer.writerow([row[col] for row in rows])
def csv_writer(response): yield csv.writer(response)
def process_files(): # Get the uploaded files isw_name = request.forms.get('isw_name') ineligible_attended_file = request.files.get('ineligible_attended') ineligible_registered_file = request.files.get('ineligible_registered') waitlist_file = request.files.get('waitlist') registrants_file = request.files.get('registrants') if not (file_ext_ok(ineligible_attended_file.filename) and file_ext_ok(ineligible_registered_file.filename) and file_ext_ok(waitlist_file.filename) and file_ext_ok(registrants_file.filename)): return 'Unfortunately, this thing can only handle CSV or XLS files.' # Read all the files into lists of Persons ineligible_registered = listreader.readRegistered(listreader.getReader(ineligible_registered_file)) ineligible_attended = listreader.readAttended(listreader.getReader(ineligible_attended_file)) registrants = listreader.readRegistrants(listreader.getReader(registrants_file)) waitlist = listreader.readWaitlist(listreader.getReader(waitlist_file)) # Remove ineligible registrants registrants = [person for person in registrants if person not in ineligible_registered] registrants = [person for person in registrants if person not in ineligible_attended] # All registrants are eligible at this point, so add them into the pool once pool = [] info = {} for person in registrants: pool.append(person) info.setdefault(person.email, person) # Insert people into the pool however many times they're on the waitlist waitlist = Counter(waitlist) # Count how many times each person appears in the waitlist registrants_in_waitlist = [person for person in registrants if person in waitlist] for person in registrants_in_waitlist: numTimesWaitlisted = waitlist[person] for i in range(numTimesWaitlisted): pool.append(person) # randomly assign people in the pool a ranking ranking = [] while len(pool) > 0: rand = randrange(len(pool)) # this person was randomly selected randperson = pool[rand] ranking.append(randperson) # remove this person from the pool pool = [person for person in pool if person != randperson] # write the output file output_file_path = "results/" + isw_name + ".csv" with open(output_file_path, 'wb') as output_file: outputwriter = writer(output_file) outputwriter.writerow(["First Name", "Last Name", "Email", "Cell or Home Phone Number", "Address (Street Address)", "Address (Address Line 2)", "Address (City)", "Address (State / Province)", "Address (ZIP / Postal Code)", "Address (Country)", "Institution", "Degree", "Faculty", "Department", "Is this your first time submitting your name for an ISW workshop?", "Have you ever been employed OR received funds from UBC?"]) for person in ranking: user = info.get(person.email) outputwriter.writerow([user.firstname, user.lastname, user.email, user.number, user.address1, user.address2, user.city, user.state, user.zip, user.country, user.institution, user.degree, user.faculty, user.department, user.firsttime, user.fund]) return dict(ranking=ranking, output_file=output_file_path)
def handle(self, *args, **options): verbosity = options['verbosity'] if verbosity == '0': self.logger.setLevel(logging.ERROR) elif verbosity == '1': self.logger.setLevel(logging.WARNING) elif verbosity == '2': self.logger.setLevel(logging.INFO) elif verbosity == '3': self.logger.setLevel(logging.DEBUG) csvfile = options['csv_file'] encoding = options['encoding'] csv_out = out = options['out'] if type(out) == str: csv_out = open(out, 'wb') writer = csv.writer(csv_out, delimiter=';', quotechar='"', encoding=encoding) writer.writerow(['slug', 'url', 'attivo', 'tema', 'natura', 'cup', 'programma', 'classificazione_qsn', 'fondo_comunitario', 'fin_totale_pubblico', 'fin_totale_pubblico_netto', 'pagamento', 'stato_progetto','stato_finanziamenti']) locale.setlocale(locale.LC_ALL, 'it_IT.UTF-8') with open(csvfile, 'rb') as cfile: reader = csv.reader(cfile, delimiter=',', quotechar='"') for r in reader: slug = None url = '-' output_r = r if not r: continue url = r[0].strip() slug_search = re.search( '^(http://){0,1}(www\.){0,1}opencoesione.gov.it/progetti/(' '.*?)/?$', url, re.IGNORECASE ) if slug_search: slug = slug_search.group(3) if slug and '/' not in slug: output_r = [slug, r[0]] try: p = Progetto.fullobjects.get(slug=slug) is_active = p.active_flag tema = p.tema.tema_superiore.short_label natura = p.classificazione_azione.classificazione_superiore\ .short_label cup = p.cup programma = ','.join([f.descrizione for f in p.fonti_fin]) class_qsn = p.classificazione_qsn.classificazione_superiore.classificazione_superiore.descrizione fondo_com = p.get_fondo_comunitario_display() fin_tot = locale.currency(p.fin_totale_pubblico).replace('Eu', u'€') fin_tot_netto = locale.currency(p.fin_totale_pubblico_netto).replace('Eu', u'€') pagamento = locale.currency(p.pagamento).replace('Eu', u'€') stato_fin = p.get_stato_finanziario_display() stato_prog = p.get_stato_progetto_display() output_r.extend([is_active, tema, natura, cup, programma, class_qsn, fondo_com, fin_tot, fin_tot_netto, pagamento, stato_fin, stato_prog]) except ObjectDoesNotExist: pass self.logger.info(r[0]) writer.writerow(output_r)
diode_forw = Diode(modif_param("I0", diode_forw.I0), modif_param("eta", diode_forw.eta)) resistance_circ = Resistances( modif_param("Rsh", resistance_circ.Rsh), modif_param("Rs", resistance_circ.Rs)) # Saving results:it creates a folder in a given path and write the parameters and simulated IV in a csv file namefile = input("Enter the name of the file: ") namefolder = "IV_Simulation" path = input("Enter the path to save the result: ") # namefile = "TestResults_" #This block can be used to speed up during testing # namefolder = "IV_Simulation" # path = data_path os.chdir(path) try: os.mkdir(namefolder) os.chdir(namefolder) except: os.chdir(namefolder) with open(namefile + ".csv", "w") as file: writer = csvkit.writer(file, delimiter=";") writer.writerows([["I0 [A]", "eta", "Rs [Ohm]", "Rsh [Ohm]"], [ diode_forw.I0, diode_forw.eta, resistance_circ.Rs, resistance_circ.Rsh ]]) writer.writerows([["Voltage", "Current"], ["V", "A"]]) for volt in volt_fit: writer.writerow([str(volt), str(curr_fit[volt_fit.index(volt)])]) file.close()
import csvkit, sys from collections import defaultdict writer = csvkit.writer(sys.stdout) with open(sys.argv[1]) as csv_file: for i, row in enumerate(csvkit.reader(csv_file)): if i == 0: col_count = len(row) - 1 freqs = [defaultdict(int) for col in range(col_count)] continue for col in range(col_count): freqs[col][int(row[col + 1])] += 1 values = sum((freqs[col].keys() for col in range(col_count)), []) for val in sorted(set(values)): val_freqs = [freqs[col][val] for col in range(col_count)] row = [val] + val_freqs writer.writerow(row)
with open('../../csv_data/flows.csv', 'r') as f: with open('../../csv_data/new_flows.csv', 'w') as nf: flows = csvkit.DictReader(f) newFlows = csvkit.DictWriter(nf, flows.fieldnames) newFlows.writeheader() for flow in flows: if flow['source'] in swapSources: flow['source'] = swapSources[flow['source']] else: missingSources.add(flow['source']) newFlows.writerow(flow) with open('../../csv_data/exchange_rates.csv', 'r') as f: with open('../../csv_data/new_exchange_rates.csv', 'w') as nf: rates = csvkit.DictReader(f) newRates = csvkit.DictWriter(nf, rates.fieldnames) newRates.writeheader() for rate in rates: if rate['source'] in swapSources: rate['source'] = swapSources[rate['source']] else: missingSources.add(rate['source']) newRates.writerow(rate) with open('missing_sources.list', 'w') as ms: csvkit.writer(ms).writerows([_] for _ in missingSources) # modify schema (by hand) : done # try to generate the new database # test and update sources.csv API # test and update source representation in client (metadata and data tables)
if endorseCount.text is None: endorseCount.text = 0 print endorseCount.text newRow.append(endorseCount.text) else: newRow.append("") print 0 except: print 0 # Save New Row print newRow f = open('csvOutput copy.csv', 'a') try: writer = csvkit.writer(f) writer.writerow(newRow) except: print"could not write row" finally: f.close() except: print "messed up skills" # 2 goes 1-40 for otherSkills in range(1, 41):
def run(): if not os.path.isdir(DIR): os.mkdir(DIR) csvf = open('{}/info.csv'.format(DIR), 'wb') csvw = csvkit.writer(csvf) csvw.writerow([ u'設計館帳號', u'設計館名稱', u'公司名稱 / 姓名', u'統編 / 身分證', u'地址', u'電話', u'email', u'收款戶名', u'收款銀行名稱', u'收款帳號', ]) with db as cur: cur.execute(''' select sid, name, contact_tel, contact_mobile, identity, identity_type, payment_info, receipt_setting, finance_email from shop where sid in {} '''.format(in_operand(SIDS))), rows = list(cur) for (sid, name, contact_tel, contact_mobile, identity, identity_type, payment_info, receipt_setting, finance_email) in rows: payment_info = json.loads(payment_info) if payment_info else {} receipt_setting = json.loads( receipt_setting) if receipt_setting else {} # 設計館名稱 name_d = json.loads(name) # get 身分證號碼, 發票抬頭名稱, 地址 nationalid = identity title = receipt_setting.get('title', '') address = receipt_setting.get('address', '') # get 銀行* bank_name_n_code = '' bank_account = '' bank_account_name = '' payment_method = payment_info.get('payment_method') if payment_method == 'atm': bank_code = payment_info['bank'] bank_name = Payment.TW_BANK_MAP.get(bank_code, '') if bank_name: bank_name_n_code = u'{} ({})'.format(bank_name, bank_code) bank_account = payment_info['account'] bank_account_name = payment_info['name'] csvw.writerow([ sid, name_d.get('zh_TW', name_d.get('en', '')), title, nationalid, address, contact_tel or contact_mobile or '', finance_email, bank_account_name, bank_name_n_code, bank_account ]) if not os.path.isfile('{}/{}_201701-201712.xlsx'.format(DIR, sid)): export_bill(sid) csvf.close()
#!/usr/bin/env python # Remove newline chars from CSV "cells" # Input is taken from stdin and output spit to stdout import csvkit import sys reader = csvkit.reader(sys.stdin) writer = csvkit.writer(sys.stdout) for row in reader: for i in range(0, len(row)): if isinstance(row[i], str): if "\n" in row[i]: row[i] = row[i].replace("\n", '') writer.writerow(row)