示例#1
0
def process(htmlfile):
    """
    The workhorse program. Iterates through each file and extracts the 
    passers from each HTML file and writes them to a csv file
    """
    page = open(htmlfile)
    soup = BeautifulSoup(page)
    tables = soup.findAll('table', {'class': ['printable']})

    #check if file exists and append if it does
    outfile = 'passers.csv'
    if os.path.isfile(outfile):
        f = open('passers.csv','ab')
        writer = csvkit.writer(f)
    else:
        f = open('passers.csv','wb')
        writer = csvkit.writer(f)
        writer.writerow(("Name", "Campus", "Course"))

    for row in tables[0].findAll('tr')[1:]:
        col = row.findAll('td')
        name = col[0].text
        if name.endswith(")"):
            name = name[:-13]
        campus = col[1].text    
        course = col[2].text
        #print name, campus, course
        writer.writerow( (name, campus, course))
    
    f.close()
    page.close()
示例#2
0
文件: table.py 项目: wpdavis/agate
    def to_csv(self, path, **kwargs):
        """
        Write this table to a CSV. This method will use csvkit if it is
        available, otherwise it will use Python's builtin csv module.

        ``kwargs`` will be passed through to :meth:`csv.writer`.

        If you are using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param path: Filepath or file-like object to write to.
        """
        if 'lineterminator' not in kwargs:
            kwargs['lineterminator'] = '\n'

        close = True

        try:
            if hasattr(path, 'write'):
                f = path
                close = False
            else:
                f = open(path, 'w')

            writer = csv.writer(f, **kwargs)

            writer.writerow(self._column_names)

            for row in self._rows:
                writer.writerow(row)
        finally:
            if close:
                f.close()
示例#3
0
    def __init__(self):
        self.argparser = argparse.ArgumentParser(
            description='A command line utility for processing FEC data dumps.'
        )

        self.argparser.add_argument(
            dest='input', action='store',
            help='Path to input CSV.'
        )

        self.argparser.add_argument(
            dest='output', action='store',
            help='Path to output CSV.'
        )

        self.argparser.add_argument(
            '-a', '--amendments',
            dest='keep_amendments', action='store_true',
            help='Keep amendments (instead of filtering them out).'
        )

        self.argparser.add_argument(
            '-o', '--office',
            dest='office', action='store',
            help='Filter output only a certain office.'
        )

        self.args = self.argparser.parse_args()

        self.amended_ids = set()

        # Read input data
        with open(self.args.input) as f:
            reader = csvkit.reader(f)
            self.header = reader.next()
            rows = list(reader)

        sys.stdout.write('Read %i rows\n' % len(rows))

        # Discover amendments
        if not self.args.keep_amendments:
            for row in rows:
                if row[self.header.index('amn_ind')] != 'N':
                    self.amended_ids.add(row[self.header.index('prev_file_num')])

        # Filter data
        output_rows = filter(self.filter_row, rows)

        sys.stdout.write('Saving %i rows\n' % len(output_rows))

        # Write output
        with open(self.args.output, 'w') as f:
            writer = csvkit.writer(f)
            writer.writerow(self.header)

            writer.writerows(output_rows)
示例#4
0
    def to_csv(self, output, **kwargs):
        """
        Serializes the table to CSV and writes it to any file-like object.
        """
        rows = self.to_rows(serialize_dates=True)

        # Insert header row
        rows.insert(0, self.headers())

        csv_writer = writer(output, **kwargs)
        csv_writer.writerows(rows)
示例#5
0
文件: test_py3.py 项目: DATAQC/csvkit
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#6
0
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written)
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#7
0
文件: test_py2.py 项目: DATAQC/csvkit
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
示例#8
0
    def test_writer_alias(self):
        output = six.StringIO()
        writer = csvkit.writer(output, encoding='utf-8')
        self.assertEqual(writer._eight_bit, True)
        writer.writerow(['a', 'b', 'c'])
        writer.writerow(['1', '2', '3'])
        writer.writerow(['4', '5', u'ʤ'])

        written = six.StringIO(output.getvalue())

        reader = csvkit.reader(written, encoding='utf-8')
        self.assertEqual(next(reader), ['a', 'b', 'c'])
        self.assertEqual(next(reader), ['1', '2', '3'])
        self.assertEqual(next(reader), ['4', '5', u'ʤ'])
    def clean(self):
        """
        Cleans the provided source TSV file and writes it out in CSV format.
        """
        # Create the output object
        with open(self.csv_path, 'w') as csv_file:
            # Create the CSV writer
            csv_writer = csvkit.writer(csv_file)
            # Write the headers
            csv_writer.writerow(self.headers)
            # Write out the rows
            [csv_writer.writerow(row) for row in self._convert_tsv()]

        # Log errors if there are any
        if self.log_rows:
            # Log to the terminal
            if self.verbosity > 2:
                msg = '  {} errors logged (not including empty lines)'
                self.failure(msg.format(len(self.log_rows)))

            # Log to the file
            with open(self.error_log_path, 'w') as log_file:
                log_writer = csvkit.writer(log_file, quoting=csv.QUOTE_ALL)
                log_writer.writerow(['headers', 'fields', 'value'])
                log_writer.writerows(self.log_rows)

        # Add counts to raw_file_record
        self.raw_file.clean_columns_count = self.headers_count
        self.raw_file.error_count = len(self.log_rows)
        self.raw_file.clean_records_count = self.raw_file.download_records_count - self.raw_file.error_count

        # Add file size to the raw_file_record
        self.raw_file.download_file_size = os.path.getsize(self.tsv_path) or 0
        self.raw_file.clean_file_size = os.path.getsize(self.csv_path) or 0

        # Save it in case it crashes in the next step
        self.raw_file.save()
示例#10
0
def export_sources_csv(cursor, output_filename):
    cursor.row_factory = sqlite3.Row
    sql = """
  SELECT author,name,country,volume_number,volume_date,editor,edition_date,pages,shelf_number,URL,source_category,type,notes
  FROM sources as s 
  WHERE s.slug in (SELECT distinct source from flow_joined) OR 
        s.slug in (SELECT distinct source from exchange_rates)"""
    rows = cursor.execute(sql)
    first = next(rows)
    with open(output_filename, 'w') as f:
        dw = csvkit.writer(f)
        dw.writerow(["bibliographic reference"] + first.keys())
        dw.writerow(formatRef(first))
        dw.writerows(formatRef(r) for r in rows)
        return 0
    return 1
def tsvTOcsv(input_file_name, out_file):
    """
    converts tsv formatted files to csv,
    used to make the csv file that is readable by this EEG report feature analyzer
    :param input_file_name: a tsv file
    :param out_file: a csv file
    :return: 
    """
    i = 0
    with open(input_file_name, 'rb') as tsvin, open(out_file, 'wb') as csvout:
        tsvin = csv.reader(tsvin, delimiter='\t')
        csvout = csv.writer(csvout)

        for row in tsvin:
            if len(row) > 0:
                csvout.writerow(row)
示例#12
0
    def to_csv(self, file_path):
        """ Store all categories as a csv file
        """
        columns = ["id", "label", "parent"]
        with open(file_path, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(columns)

            for category in self.categories:
                if category.parent:
                    parent = category.parent.id
                else:
                    parent = None
                row = [ 
                    category.id,
                    category.label,
                    parent,
                ]
                writer.writerow(row)
示例#13
0
    def generate_dictionary(self):
        dataset_id = self.dataset.id
        dim_id = self.id
        if dim_id in AJAX_API_ENDPOINTS[dataset_id]:
            opts = AJAX_API_ENDPOINTS[dataset_id][dim_id]
            categories = self._categories_from_ajax_api()
            file_dir = os.path.join("vantetider/data", dataset_id)
            file_path = os.path.join(file_dir, dim_id + ".csv")
            if not os.path.exists(file_dir):
                os.makedirs(file_dir)
            with open(file_path, 'w') as f:
                writer = csv.writer(f)
                headers = ["id", "label"]
                if "attributes" in opts:
                    headers += [x[1] for x in opts["attributes"]]

                writer.writerow(headers)
                for cat in categories.values():
                    row = [getattr(cat, x) for x in headers]
                    writer.writerow(row)
示例#14
0
文件: table.py 项目: immarvin/agate
    def to_csv(self, path, **kwargs):
        """
        Write table to a CSV. Will use csvkit if it is available, otherwise
        will use Python's builtin csv module. ``args`` and ``kwargs``
        will be passed through to :meth:`csv.writer`.

        Note: if using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param path: Path to the CSV file to read from.
        """
        if 'lineterminator' not in kwargs:
            kwargs['lineterminator'] = '\n'

        with open(path, 'w') as f:
            writer = csv.writer(f, **kwargs)

            writer.writerow(self._column_names)

            for row in self._data:
                writer.writerow(row)
示例#15
0
    def to_csv(self, path, **kwargs):
        """
        Write table to a CSV. Will use csvkit if it is available, otherwise
        will use Python's builtin csv module. ``args`` and ``kwargs``
        will be passed through to :meth:`csv.writer`.

        Note: if using Python 2 and not using csvkit, this method is not
        unicode-safe.

        :param path: Path to the CSV file to read from.
        """
        if 'lineterminator' not in kwargs:
            kwargs['lineterminator'] = '\n'

        with open(path, 'w') as f:
            writer = csv.writer(f, **kwargs)

            writer.writerow(self._column_names)

            for row in self._data:
                writer.writerow(row)
示例#16
0
def csv_writer(csv_file):
    yield csv.writer(csv_file)
示例#17
0
#! /usr/bin/env python
# from http://unix.stackexchange.com/questions/60590/is-there-a-command-line-utility-to-transpose-a-csv-file
import csvkit as csv, sys
rows = list(csv.reader(sys.stdin))
writer = csv.writer(sys.stdout)

for col in xrange(0, len(rows[0])):
    writer.writerow([row[col] for row in rows])
示例#18
0
def csv_writer(response):
    yield csv.writer(response)
示例#19
0
def csv_writer(response):
    yield csv.writer(response)
示例#20
0
def csv_writer(csv_file):
    yield csv.writer(csv_file)
示例#21
0
def process_files():
	# Get the uploaded files
	isw_name = request.forms.get('isw_name')
	ineligible_attended_file = request.files.get('ineligible_attended')
	ineligible_registered_file = request.files.get('ineligible_registered')
	waitlist_file = request.files.get('waitlist')
	registrants_file = request.files.get('registrants')
	if not (file_ext_ok(ineligible_attended_file.filename) and file_ext_ok(ineligible_registered_file.filename)
		and file_ext_ok(waitlist_file.filename) and file_ext_ok(registrants_file.filename)):
		return 'Unfortunately, this thing can only handle CSV or XLS files.'

	# Read all the files into lists of Persons
	ineligible_registered = listreader.readRegistered(listreader.getReader(ineligible_registered_file))
	ineligible_attended = listreader.readAttended(listreader.getReader(ineligible_attended_file))
	registrants = listreader.readRegistrants(listreader.getReader(registrants_file))
	waitlist = listreader.readWaitlist(listreader.getReader(waitlist_file))

	# Remove ineligible registrants
	registrants = [person for person in registrants if person not in ineligible_registered]
	registrants = [person for person in registrants if person not in ineligible_attended]

	# All registrants are eligible at this point, so add them into the pool once
	pool = []
	info = {}
	for person in registrants:
		pool.append(person)
		info.setdefault(person.email, person)


	# Insert people into the pool however many times they're on the waitlist
	waitlist = Counter(waitlist) # Count how many times each person appears in the waitlist
	registrants_in_waitlist = [person for person in registrants if person in waitlist]
	for person in registrants_in_waitlist:
		numTimesWaitlisted = waitlist[person]
		for i in range(numTimesWaitlisted):
			pool.append(person)

	# randomly assign people in the pool a ranking
	ranking = []
	while len(pool) > 0:
		rand = randrange(len(pool))
		# this person was randomly selected
		randperson = pool[rand]
		ranking.append(randperson)
		# remove this person from the pool
		pool = [person for person in pool if person != randperson]

	# write the output file
	output_file_path = "results/" + isw_name + ".csv"
	with open(output_file_path, 'wb') as output_file:
		outputwriter = writer(output_file)
		outputwriter.writerow(["First Name", "Last Name", "Email", "Cell or Home Phone Number",	"Address (Street Address)",	"Address (Address Line 2)",
							   "Address (City)", "Address (State / Province)", "Address (ZIP / Postal Code)", "Address (Country)", "Institution",
							   "Degree", "Faculty", "Department", "Is this your first time submitting your name for an ISW workshop?",
                               "Have you ever been employed OR received funds from UBC?"])
		for person in ranking:
			user = info.get(person.email)
			outputwriter.writerow([user.firstname, user.lastname, user.email, user.number, user.address1, user.address2,
								   user.city, user.state, user.zip, user.country, user.institution,
								   user.degree, user.faculty, user.department, user.firsttime, user.fund])

	return dict(ranking=ranking, output_file=output_file_path)
示例#22
0
    def handle(self, *args, **options):
        verbosity = options['verbosity']
        if verbosity == '0':
            self.logger.setLevel(logging.ERROR)
        elif verbosity == '1':
            self.logger.setLevel(logging.WARNING)
        elif verbosity == '2':
            self.logger.setLevel(logging.INFO)
        elif verbosity == '3':
            self.logger.setLevel(logging.DEBUG)

        csvfile = options['csv_file']
        encoding = options['encoding']

        csv_out = out = options['out']

        if type(out) == str:
            csv_out = open(out, 'wb')

        writer = csv.writer(csv_out, delimiter=';', quotechar='"', encoding=encoding)
        writer.writerow(['slug', 'url', 'attivo', 'tema', 'natura', 'cup',
                         'programma', 'classificazione_qsn', 'fondo_comunitario',
                         'fin_totale_pubblico', 'fin_totale_pubblico_netto', 'pagamento',
                         'stato_progetto','stato_finanziamenti'])

        locale.setlocale(locale.LC_ALL, 'it_IT.UTF-8')

        with open(csvfile, 'rb') as cfile:
            reader = csv.reader(cfile, delimiter=',', quotechar='"')
            for r in reader:
                slug = None
                url = '-'
                output_r = r
                if not r:
                    continue

                url = r[0].strip()
                slug_search = re.search(
                    '^(http://){0,1}(www\.){0,1}opencoesione.gov.it/progetti/('
                    '.*?)/?$',
                    url, re.IGNORECASE
                )
                if slug_search:
                    slug = slug_search.group(3)

                if slug and '/' not in slug:
                    output_r = [slug, r[0]]

                try:
                    p = Progetto.fullobjects.get(slug=slug)
                    is_active = p.active_flag
                    tema = p.tema.tema_superiore.short_label
                    natura = p.classificazione_azione.classificazione_superiore\
                        .short_label
                    cup = p.cup
                    programma = ','.join([f.descrizione for f in p.fonti_fin])
                    class_qsn = p.classificazione_qsn.classificazione_superiore.classificazione_superiore.descrizione
                    fondo_com = p.get_fondo_comunitario_display()

                    fin_tot = locale.currency(p.fin_totale_pubblico).replace('Eu', u'€')
                    fin_tot_netto = locale.currency(p.fin_totale_pubblico_netto).replace('Eu', u'€')
                    pagamento = locale.currency(p.pagamento).replace('Eu', u'€')
                    stato_fin = p.get_stato_finanziario_display()
                    stato_prog = p.get_stato_progetto_display()

                    output_r.extend([is_active, tema, natura, cup, programma, class_qsn, fondo_com,
                                     fin_tot, fin_tot_netto, pagamento,
                                     stato_fin, stato_prog])
                except ObjectDoesNotExist:
                    pass

                self.logger.info(r[0])
                writer.writerow(output_r)
示例#23
0
            diode_forw = Diode(modif_param("I0", diode_forw.I0),
                               modif_param("eta", diode_forw.eta))
            resistance_circ = Resistances(
                modif_param("Rsh", resistance_circ.Rsh),
                modif_param("Rs", resistance_circ.Rs))

    # Saving results:it creates a folder in a given path and write the parameters and simulated IV in a csv file
    namefile = input("Enter the name of the file: ")
    namefolder = "IV_Simulation"
    path = input("Enter the path to save the result: ")
    # namefile = "TestResults_"                                 #This block can be used to speed up during testing
    # namefolder = "IV_Simulation"
    # path = data_path
    os.chdir(path)
    try:
        os.mkdir(namefolder)
        os.chdir(namefolder)
    except:
        os.chdir(namefolder)
    with open(namefile + ".csv", "w") as file:
        writer = csvkit.writer(file, delimiter=";")
        writer.writerows([["I0 [A]", "eta", "Rs [Ohm]", "Rsh [Ohm]"],
                          [
                              diode_forw.I0, diode_forw.eta,
                              resistance_circ.Rs, resistance_circ.Rsh
                          ]])
        writer.writerows([["Voltage", "Current"], ["V", "A"]])
        for volt in volt_fit:
            writer.writerow([str(volt), str(curr_fit[volt_fit.index(volt)])])
    file.close()
import csvkit, sys
from collections import defaultdict

writer = csvkit.writer(sys.stdout)
with open(sys.argv[1]) as csv_file:
    for i, row in enumerate(csvkit.reader(csv_file)):
        if i == 0:
            col_count = len(row) - 1
            freqs = [defaultdict(int) for col in range(col_count)]
            continue
        for col in range(col_count):
            freqs[col][int(row[col + 1])] += 1
    values = sum((freqs[col].keys() for col in range(col_count)), [])
    for val in sorted(set(values)):
        val_freqs = [freqs[col][val] for col in range(col_count)]
        row = [val] + val_freqs
        writer.writerow(row)
示例#25
0
    with open('../../csv_data/flows.csv', 'r') as f:
        with open('../../csv_data/new_flows.csv', 'w') as nf:
            flows = csvkit.DictReader(f)
            newFlows = csvkit.DictWriter(nf, flows.fieldnames)
            newFlows.writeheader()
            for flow in flows:
                if flow['source'] in swapSources:
                    flow['source'] = swapSources[flow['source']]
                else:
                    missingSources.add(flow['source'])
                newFlows.writerow(flow)

    with open('../../csv_data/exchange_rates.csv', 'r') as f:
        with open('../../csv_data/new_exchange_rates.csv', 'w') as nf:
            rates = csvkit.DictReader(f)
            newRates = csvkit.DictWriter(nf, rates.fieldnames)
            newRates.writeheader()
            for rate in rates:
                if rate['source'] in swapSources:
                    rate['source'] = swapSources[rate['source']]
                else:
                    missingSources.add(rate['source'])
                newRates.writerow(rate)

    with open('missing_sources.list', 'w') as ms:
        csvkit.writer(ms).writerows([_] for _ in missingSources)

# modify schema (by hand) : done
# try to generate the new database
# test and update sources.csv API
# test and update source representation in client (metadata and data tables)
                                if endorseCount.text is None:
                                    endorseCount.text = 0

                                print endorseCount.text
                                newRow.append(endorseCount.text)
                            else:
                                newRow.append("")
                                print 0
                        except:
                            print 0

                        # Save New Row
                        print newRow
                        f = open('csvOutput copy.csv', 'a')
                        try:
                            writer = csvkit.writer(f)
                            writer.writerow(newRow)

                        except:
                            print"could not write row"

                        finally:
                            f.close()


                    except:
                        print "messed up skills"
                # 2 goes 1-40


                for otherSkills in range(1, 41):
示例#27
0
def run():
    if not os.path.isdir(DIR):
        os.mkdir(DIR)

    csvf = open('{}/info.csv'.format(DIR), 'wb')

    csvw = csvkit.writer(csvf)
    csvw.writerow([
        u'設計館帳號',
        u'設計館名稱',
        u'公司名稱 / 姓名',
        u'統編 / 身分證',
        u'地址',
        u'電話',
        u'email',
        u'收款戶名',
        u'收款銀行名稱',
        u'收款帳號',
    ])

    with db as cur:
        cur.execute('''
            select
                sid,
                name,
                contact_tel,
                contact_mobile,
                identity,
                identity_type,
                payment_info,
                receipt_setting,
                finance_email
            from shop
            where
                sid in {}
        '''.format(in_operand(SIDS))),

        rows = list(cur)

    for (sid, name, contact_tel, contact_mobile, identity, identity_type,
         payment_info, receipt_setting, finance_email) in rows:

        payment_info = json.loads(payment_info) if payment_info else {}
        receipt_setting = json.loads(
            receipt_setting) if receipt_setting else {}

        # 設計館名稱
        name_d = json.loads(name)

        # get 身分證號碼, 發票抬頭名稱, 地址
        nationalid = identity
        title = receipt_setting.get('title', '')
        address = receipt_setting.get('address', '')

        # get 銀行*
        bank_name_n_code = ''
        bank_account = ''
        bank_account_name = ''

        payment_method = payment_info.get('payment_method')

        if payment_method == 'atm':

            bank_code = payment_info['bank']
            bank_name = Payment.TW_BANK_MAP.get(bank_code, '')
            if bank_name:
                bank_name_n_code = u'{} ({})'.format(bank_name, bank_code)

            bank_account = payment_info['account']
            bank_account_name = payment_info['name']

        csvw.writerow([
            sid,
            name_d.get('zh_TW', name_d.get('en', '')), title, nationalid,
            address, contact_tel or contact_mobile or '', finance_email,
            bank_account_name, bank_name_n_code, bank_account
        ])

        if not os.path.isfile('{}/{}_201701-201712.xlsx'.format(DIR, sid)):
            export_bill(sid)

    csvf.close()
示例#28
0
#!/usr/bin/env python

# Remove newline chars from CSV "cells"
# Input is taken from stdin and output spit to stdout

import csvkit
import sys

reader = csvkit.reader(sys.stdin)
writer = csvkit.writer(sys.stdout)
for row in reader:
    for i in range(0, len(row)):
        if isinstance(row[i], str):
            if "\n" in row[i]:
                row[i] = row[i].replace("\n", '')
    writer.writerow(row)