Exemplo n.º 1
0
 def convert(self):
     document_body = self._prepare_document()
     with codecs.open(self.target_name, 'w+') as target:
         self.writer = UnicodeWriter(target, FIELDNAMES)
         self._write_header_row()
         for outline in document_body:
             self.process_element(outline)
Exemplo n.º 2
0
def convert_opml_to_csv(args):
    """Convert OPML file to Todoist CSV."""

    tree = ET.parse(args.file)
    opml = tree.getroot()
    body = opml.find('body')
    with codecs.open(target_name(args.file, 'csv'), 'w+') as target:
        writer = UnicodeWriter(target, FIELDNAMES)
        writer.writerow(FIELDNAMES)
        def make_row(type='', content='', indent = ''):
            return [type, content, '', indent, '', '', '', '']

        def process_element(outline, level=1):
            # content
            row = make_row(TYPE_TASK, outline.get('text'), str(level))
            writer.writerow(row)
            # note
            note = outline.get(NOTE_ATTRIB)
            if note:
                row = make_row(TYPE_NOTE, note)
                writer.writerow(row)
            # separator
            writer.writerow(make_row())
            for subelement in outline.findall('outline'):
                process_element(subelement, level+1)

        for outline in body:
            process_element(outline)
Exemplo n.º 3
0
def dataset(query_arguments, type_output='csv', delimiter='|', output=stdout, lang=None):
    if set(['main', 'union', 'optional', 'filter']) <= set(query_arguments):
        query_arguments = format_query(query=query_arguments, lang=lang)
        query = factory(main=query_arguments['main'], union=query_arguments['union'],
                        optional=query_arguments['optional'], filter=query_arguments['filter'])

        results = retrieve_result(query)
        properties_set = set()
        formatted_result = defaultdict(lambda: defaultdict( lambda: defaultdict(lambda: defaultdict(list))))

        for result in results:
            quantity_of_interest = result['quantity_of_interest']['value']
            property_uri = result['property']['value']
            property_label = 'property_label' in result and result['property_label']['value'] or ''
            hasValue_uri = result['hasValue']['value']
            hasValue_label = 'hasValue_label' in result and result['hasValue_label']['value'] or ''
            properties_set.add((property_uri, property_label))

            formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_label'].append(hasValue_label)
            formatted_result[quantity_of_interest][property_uri]['hasValue']['hasValue_uri'].append(hasValue_uri)

        keys = list(properties_set)
        if type_output == 'csv':
            out = UnicodeWriter(output, ';')
            out.writerow(filter(None, (chain.from_iterable(keys))))
            for qoi in formatted_result:
                cells = [['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri']),
                          '%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_label'])]
                         if key[1] else ['%s' % delimiter.join(formatted_result[qoi][key[0]]['hasValue']['hasValue_uri'])] for key in keys]
                out.writerow(chain.from_iterable(cells))

        return formatted_result
def write_json_to_csv(json_list, filename):

  from unicode_csv import UnicodeWriter

  translate = {
    "class_nbr":"course_num",
    "crn":"course_num",
    "num":"course_num",
    "number":"department_num",
    "dist":"distribution",
    "div":"division",
    "lim":"course_cap",
    "graded_sem":"seminar",
    "graded_seminar":"seminar",
    "double_graded_seminar":"seminar",
    "dept_name":"department",
  }

  scrap = {"req", "sign", "note", "prerequisite", "prereq"}

  # Get all the available headers.
  header_set = {header for obj in json_list for header in obj.keys()}
  headers = [h for h in header_set if not (h in translate or h in scrap)]
  headers.extend(translate.values())

  headers = map(unicode, headers)
  headers = list(set(headers))
  headers.sort()

  with open(filename, "w") as f:

    # Prepare the csv.
    writer = UnicodeWriter(f)

    # Write "cleaned" headers to the CSV
    cleaned_headers = [unicode(h.replace(" ","_").lower()) for h in headers]
    writer.writerow(cleaned_headers)

    for obj in json_list:
      for key in translate.keys():
        if key in obj:
          new_key = translate[key]
          obj[new_key] = obj[key]

      vals = []
      for header in headers:
        val = obj.get(header, "")

        if type(val) == list:
          val = map(str, val)

        vals.append(unicode(val))

      writer.writerow(vals)
Exemplo n.º 5
0
class SqlExportFileWriter(object):
    """Writes rows to a CSV file, optionally filtering on a predicate."""
    def __init__(self, dest, predicate=None, use_unicode=False):
        if use_unicode:
            self._writer = UnicodeWriter(dest, delimiter=DELIMITER)
        else:
            self._writer = csv.writer(dest, delimiter=DELIMITER)
        self._predicate = predicate

    def write_header(self, keys):
        self._writer.writerow(keys)

    def write_rows(self, results):
        if self._predicate:
            results = [result for result in results if self._predicate(result)]
        if results:
            self._writer.writerows(results)
Exemplo n.º 6
0
def write_to_csv(file_name):
  documents = get_docs()
  print `documents.count()`
  serialized_documents = json.loads(dumps(documents))
  csv_file = open(file_name,'w')
  csv_writer = UnicodeWriter(csv_file, dialect='excel')
  count = 0
  for doc in serialized_documents:
    print `doc`
    del(doc['_id'])
    if count == 0:
      header = doc.keys()
      #header.sort()
      csv_writer.writerow(header)
      count = count+1
    csv_writer.writerow(doc.values())

  csv_file.close()
Exemplo n.º 7
0
def marcanalyse(files, sample_length=5):
    """
    returns a csv of marc keys and analysed values, showing, for example, how many records exist.
    
    =================   ==============================================================
    Column              Description
    =================   ==============================================================
    ``tag``             The 3-digit MARC tag.
    ``subfield``        The single-character subfield.
    ``tag_meaning``     The English meaning of the tag/subfield, if known.
    ``record_count``    The number of records that have at least one of these tags.
    ``min_valency``     The minimum number of this tag or subfield that each record has.
    ``max_valency``     The maximum number of this tag or subfield that each record has.
    ``samples``         Non-repeating sample values of the values of each tag or subfield.
    =================   ==============================================================

    """

    analysis = multifile_iter_records(files, sample_length = sample_length)
    
    csv_header=("tag", "subfield", "tag_meaning", "record_count", "min_valency", "max_valency","samples")

    
    writer = UnicodeWriter(sys.stdout)
    writer.writerow(csv_header)
    
    listanalysis = [x for x in analysis.iteritems()]
    listanalysis.sort()

    for key, value in listanalysis:
        v = []
        v.append(u'"%s"' % key) #tag
        v.append(u"") # subfield
        v.append(meaning(key)) #tag_meaning
        v.append(unicode(value['count'])) #record_count
        v.append(unicode(value['min_valency']))
        v.append(unicode(value['max_valency']))
        v.append(u"\r\r".join(value['samples']))
        writer.writerow(v)
        
        listanalysis = [x for x in value['subfields'].iteritems()]
        listanalysis.sort()
        for subfield, value in listanalysis:
            v = []
            v.append("") #tag
            v.append(subfield) # subfield
            v.append(meaning(key, subfield)) #tag_meaning
            v.append(unicode(value['count'])) #record_count
            v.append(unicode(value['min_valency']))
            v.append(unicode(value['max_valency']))
            v.append(u"\r\r".join(value['samples']))
            writer.writerow(v)
def all_csv():
    pseudofile = StringIO()
    spamwriter = UnicodeWriter(pseudofile, encoding='utf-8') #, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    header =  ','.join([
        'company.name',
        'company.cvr',
        'context.year',
        'grossprofitloss',
    ])

    for company in all_companies:
        for context in company.contexts:
            if 'grossprofitloss' in context.fields:
                spamwriter.writerow([
                    company.name,
                    company.cvr,
                    context.year,
                    context.fields['grossprofitloss'],
                ])

    return Response(header + '\n' + pseudofile.getvalue(), mimetype='text/csv')
Exemplo n.º 9
0
def main():
    writer = UnicodeWriter(sys.stdout)
    writer.writerow(["gemeente", "stembureau", "postcode", "stemmen"])
    for file_path in get_file_paths():
        rows = parse_eml_file(file_path)
        writer.writerows(rows)
    return 0
Exemplo n.º 10
0
    def on_data(self, data):
        raw_data = json.loads(data)
        user = {'screen_name': raw_data['user']['screen_name']}
        tweet = {'text': processText(raw_data['text'])}

        output = open('dados.csv', 'a')
        UnicodeWriter(output, delimiter='|', lineterminator='\n').writerow(
            (tweet['text'].replace('\n',
                                   ' ').replace('r',
                                                ' '), user['screen_name']))
        output.close()

        print('@' + user['screen_name'] + '\n' + tweet['text'] + '\n')
        return (True)
Exemplo n.º 11
0
def generate_csv(params, sort_by):
    class_dict = get_class_dict()
    primary_column_name = get_primary_column_name()
    all_classes = get_all_classes()
    keypairs = {}
    for param in params:
        name = param['name']
        class_type = class_dict[name]
        value = class_type.get_search_value(param)
        if name == primary_column_name:
            name = '_id'
        if value:
            keypairs.update({name: value})
    items = accounts_collection.find(keypairs)
    if sort_by:
        items = items.sort(sort_by)
    with TemporaryFile() as f:
        # csv_writer = csv.writer(f)
        csv_writer = UnicodeWriter(f)
        csv_writer.writerow(all_classes)
        for item in items:
            csv_columns =[]
            primary_key = item['_id']
            for name in all_classes:
                class_type = class_dict[name]
                if name == primary_column_name:
                    name = '_id'
                if name in item:
                    csv_string = class_type.get_csv_string(item[name])
                else:
                    csv_string = default_csv_string
                csv_columns.append(csv_string)
            csv_writer.writerow(csv_columns)
        f.seek(0)
        lines = f.read()
    return lines
Exemplo n.º 12
0
def main():
    reader = UnicodeReader(sys.stdin)
    writer = UnicodeWriter(sys.stdout)
    writer.writerow([
        "gemeente", "stembureau", "postcode", "stemmen", "postcode_google",
        "lat", "lng"
    ])
    for row in reader:
        result = find_voting_place(row)
        writer.writerow(result)
        sleep(1)
    return 0
Exemplo n.º 13
0
class OpmlToCsvConverter(OpmlConverter):
    """Convert OPML file to Todoist CSV."""
    EXT = 'csv'

    def __init__(self, args):
        super(OpmlToCsvConverter, self).__init__(args)

    def convert(self):
        document_body = self._prepare_document()
        with codecs.open(self.target_name, 'w+') as target:
            self.writer = UnicodeWriter(target, FIELDNAMES)
            self._write_header_row()
            for outline in document_body:
                self.process_element(outline)

    def _prepare_document(self):
        tree = ET.parse(self.source_name)
        opml = tree.getroot()
        return opml.find('body')

    def _write_header_row(self):
        self.writer.writerow(FIELDNAMES)

    def _make_row(self, type='', content='', indent=''):
        return [type, content, '', indent, '', '', '', '', '']

    def process_element(self, outline, level=1):
        # content
        row = self._make_row(self.TYPE_TASK, outline.get('text'), str(level))
        self.writer.writerow(row)
        # note
        note = outline.get(self.NOTE_ATTRIB)
        if note:
            row = self._make_row(self.TYPE_NOTE, note)
            self.writer.writerow(row)
        # separator
        self.writer.writerow(self._make_row())
        for subelement in outline.findall('outline'):
            self.process_element(subelement, level + 1)
Exemplo n.º 14
0
def main():
    shapes = get_shapes(sys.argv[1])
    writer = UnicodeWriter(sys.stdout)
    writer.writerow([
        'buurt_code', 'buurt_naam', 'wijk_code', 'gem_code',
        'gem_naam'])
    for geom, props in shapes:
        out_row = []
        for fld in [
            u'BU_CODE', u'BU_NAAM', u'WK_CODE',
            u'GM_CODE', u'GM_NAAM'
        ]:
            out_row.append(props[fld])
        writer.writerow(out_row)
    return 0
Exemplo n.º 15
0
def write_group_by_one_filed(todos, output_path, value_name, values_list):
    csv_file = open(output_path, 'wb')
    res = group_by_value(todos, value_name, values_list)
    print('write: %s' % output_path)
    writer = UnicodeWriter(csv_file)

    row = ['complete']
    row.extend(values_list)
    writer.writerow(row)

    for row in res:
        output_row = [row[0]]
        for _, v in row[1].items():
            output_row.append(str(v))
        writer.writerow(output_row)
Exemplo n.º 16
0
def main():
    if len(sys.argv) < 6:
        print >> sys.stderr, "Usage: merge.py <shape_file> <lat_field> <lon_field> <lat_fallbck> <lon_fallback>"
        return 1

    reader = UnicodeReader(sys.stdin)
    writer = UnicodeWriter(sys.stdout)
    header = reader.next()
    shapes = get_shapes(sys.argv[1])

    out_header = deepcopy(header)
    out_header += [
        'buurt_code', 'buurt_naam', 'wijk_code', 'wijk_naam', 'gem_code',
        'gem_naam'
    ]
    writer.writerow(out_header)

    lat_field = sys.argv[2]
    lon_field = sys.argv[3]
    lat_fb_field = sys.argv[4]
    lon_fb_field = sys.argv[5]

    for row in reader:
        out_row = deepcopy(row)
        data = dict(zip(header, row))
        if (data[lon_field] != u'-') and (data[lat_field] != u''):
            lat = data[lat_field]
            lon = data[lon_field]
        else:
            lat = data[lat_fb_field]
            lon = data[lon_fb_field]
        if (lat != u'-') and (lon != u'-'):
            point = shapely.geometry.Point(float(lat), float(lon))
            for shape, props in shapes:
                if shape.contains(point):
                    for fld in [
                            u'BU_CODE', u'BU_NAAM', u'BU_CODE', u'BU_NAAM',
                            u'GM_CODE', u'GM_NAAM'
                    ]:
                        out_row.append(props[fld])
                    break
        if len(out_row) == len(row):  # if we did not find anything
            out_row += [u'-', u'-', u'-', u'-', u'-', u'-']
        writer.writerow(out_row)

    return 0
Exemplo n.º 17
0
def write_timer(todos, output_path):
    csv_file = open(output_path, 'wb')
    date_timer = {}
    print('write: %s' % output_path)
    writer = UnicodeWriter(csv_file)

    for todo in todos:
        if not isinstance(todo.timer, int):
            continue
        if todo.completed not in date_timer:
            date_timer[todo.completed] = 0
        date_timer[todo.completed] += todo.timer

    row = ['date', 'timer']
    writer.writerow(row)

    for date, minutes in date_timer.items():
        output_row = [
            date.strftime('%Y-%m-%d'), "{0:.2f}".format(minutes / 60.0)
        ]
        writer.writerow(output_row)
Exemplo n.º 18
0
def main():
    if len(sys.argv) < 3:
        print >> sys.stderr, "Usage: merge.py <file1> <file2>"
        return 1
    places = get_places(sys.argv[2])
    election_file = UnicodeReader(open(sys.argv[1]))
    headers = election_file.next()

    writer = UnicodeWriter(sys.stdout)
    writer.writerow([
        "gemeente", "stembureau", "postcode", "stemmen", "postcode_google",
        "lat", "lng", "stembureau2017", "lat2017", "lon2017"
    ])

    for row in election_file:
        result = dict(zip(headers, row))
        place = None
        if result[u'postcode'] != u'-':
            place = find_place_by_postcode(
                places, re.sub(r'\s+', u'', result[u'postcode']))
        elif result[u'postcode_google'] != u'':
            place = find_place_by_postcode(
                places, re.sub(r'\s+', u'', result[u'postcode']))
        if place is None:
            place = find_place_by_muni_and_name(places, result[u'gemeente'],
                                                result[u'stembureau'])
        result_row = deepcopy(row)
        if place is not None:
            result_row.append(place[u'stembureau'])
            result_row.append(place[u'Longitude'])
            result_row.append(place[u'Latitude'])
        else:
            result_row.append(u'-')
            result_row.append(u'-')
            result_row.append(u'-')
        # if result_row[-1] != u'-':
        #     pprint(result_row)
        writer.writerow(result_row)
    return 0
Exemplo n.º 19
0
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.proxy import *

file_player_link2 = open("clean_result_player_link2")
result_player_link = pickle.load(file_player_link2)

links = list()
for i in range(len(result_player_link)):
    links.append(result_player_link[i][1])
result = list()
result_file = open("result_file.csv", "a")
wr = UnicodeWriter(result_file)


for link in links[6234:]:
    proxy = ["173.9.233.186", "54.227.39.120", "194.141.96.1", "218.108.232.190", "80.193.214.233", "125.39.171.194"]
    port = [3128, 80, 8080, 843, 3128, 86]
    while True:
        chosen = random.randint(0, len(proxy)-1)
        profile = webdriver.FirefoxProfile()
        profile.set_preference("network.proxy.type", 1)
        profile.set_preference("network.proxy.http", proxy[chosen])
        profile.set_preference("network.proxy.http_port", port[chosen])
        profile.update_preferences()
        ff = webdriver.Firefox(firefox_profile=profile)
        ff.set_page_load_timeout(20)
        try:
            viaf_code = line[1]
            sbn_code = line[2]
        except:
            pass

        if page:
            wikipedia[page] = {'viaf': viaf_code,
                               'sbn': sbn_code}
        if viaf_code:
            viaf2wiki[viaf_code] = page

        if sbn_code:
            sbn2wiki[sbn_code] = page

    outwikifile = open(OUTWIKIFILE, 'a+')
    wikiwriter = UnicodeWriter(outwikifile)

    wikipages_to_get = set(wikipages_with_authority_control) - set(wikipedia.keys())

    logger.debug('Wikipages with authority control: {no}'.format(
        no=len(set(wikipages_with_authority_control))))
    logger.debug('no. of keys already collected: {no}'.format(
        no=len(set(wikipedia.keys()))))
    logger.debug('no. of pages in it.wiki with authority control, still to get: {no}'.format(
        no=len(wikipages_to_get)))

    count = 0
    for page in wikipages_to_get:
        count += 1
        logger.debug(count)
Exemplo n.º 21
0
from unicode_csv import UTF8Recoder, UnicodeReader, UnicodeWriter

rdr = UnicodeReader(open(sys.argv[1]))
tag_re = re.compile('\W', re.UNICODE)

flickr_tags = set()
tag_source = os.path.join(os.path.dirname(__file__), '../data/tags_from_flickr.csv')
with open(tag_source) as f:
    tagreader = UnicodeReader(f)
    for tagrow in tagreader:
        text, raw, author = tagrow[:]
        flickr_tags.add(text)

outfile = open('updated_tags.orig.csv', 'w+')

wrtr = UnicodeWriter(outfile)
head = [ 'Normalized', 'Raw', 'Model', 'field' ]
wrtr.writerow(head)

for row in rdr:
    new_row = []
    ugly, raw, mitch, laura, model, field, basis = row[:]
    u = 'MISSING'
    if ugly:
        u = ugly if ugly in flickr_tags else ('CURR:NO_MATCH:%s' % ugly)
    else:
        ugly = unicodedata.normalize('NFC', raw)
        ugly = tag_re.sub('', ugly.strip().lower())
        u = ugly if ugly in flickr_tags else ('NEW:NO_MATCH:%s' % ugly)
    
    new_row = row[:]
import json
import os.path
import sys
import io
from unicode_csv import UnicodeWriter


def get_value_from_dict_or_return_NA(dict, key):
    if key in dict and dict[key] != None:
        return dict[key]
    return "NA"


with io.open('acl_instagram.csv', 'ab') as csv_data:
    writer = UnicodeWriter(csv_data, delimiter='`')

    with open('acl_instagram.txt') as instagram_file:
        for insta_data in instagram_file:
            post = json.loads(insta_data)
            writer.writerow([
                get_value_from_dict_or_return_NA(post, 'user'),
                get_value_from_dict_or_return_NA(post, 'post_time'),
                get_value_from_dict_or_return_NA(post, 'post_location'),
                get_value_from_dict_or_return_NA(post, 'likes_count'),
                get_value_from_dict_or_return_NA(post, 'views_count'),
                json.dumps(get_value_from_dict_or_return_NA(post, 'comments'))
            ])
Exemplo n.º 23
0
 def __init__(self, dest, predicate=None, use_unicode=False):
     if use_unicode:
         self._writer = UnicodeWriter(dest, delimiter=DELIMITER)
     else:
         self._writer = csv.writer(dest, delimiter=DELIMITER)
     self._predicate = predicate