def update_csv_to_municipalities(csv_to_be_parsed,
                                 csv_parsed_successfully,
                                 csv_parsed_unsuccessfully,
                                 format="kml"):
    """

    :param csv_to_be_parsed:
    :param csv_parsed_successfully:
    :param csv_parsed_unsuccessfully:
    :param format:
    this tool uses a complete csv which has the osm_id numbers to search and reformat the csv to the complete format :


    this tool uses open_street_map


    """

    #TODO add a handle for formatting a different formats
    #TODO add a option for using a more complex csv
    #TODO add a option for google search

    csv_to_be_parsed = open(csv_to_be_parsed, 'rb')
    csv_parsed_successfully = open(csv_parsed_successfully, 'w+')
    csv_parsed_unsuccessfully = open(csv_parsed_unsuccessfully, 'w+')

    reader = unicode_csv.UnicodeReader(csv_to_be_parsed)
    writer_successfully = unicode_csv.UnicodeWriter(csv_parsed_successfully)
    writer_unsuccessfully = unicode_csv.UnicodeWriter(
        csv_parsed_unsuccessfully)
    writer_successfully.writerow([
        'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat',
        'lng', 'polygon'
    ])
    writer_unsuccessfully.writerow([
        'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat',
        'lng', 'polygon)'
    ])

    for row in reader:
        unique_id = row[0]
        search_string = row[1]
        osm_id = row[2]

        time.sleep(.2)
        entity_data = fetch.get_entity_data_from_osm_id(
            unique_id, search_string, osm_id)
        if entity_data:
            print entity_data
            writer_successfully.writerow(entity_data)
        # if for some reason this id was not found on open street data
        else:
            print '*************     did not find  osm_id: ' + osm_id + 'unique id : ' + unique_id + \
                  '   ******************'
            writer_unsuccessfully.writerow([unique_id, osm_id])
예제 #2
0
파일: rsstest.py 프로젝트: fnets/craigslist
def main():
	rss = []
	update_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	#cleaner = Cleaner(remove_unknown_tags=True, allow_tags=None);

	### Database Information ###
	csv_file = "search_result.csv"
	db_file = "search_result.db"
    
    # Database connection
	db = sqlite3.connect(db_file)
	db_cursor = db.cursor()
	
	with open('regex_out_top.txt') as f:
		iTunesLinks = f.readlines()# reads through output of TopPodasts.py
	g = open("rsstestout.txt","w") #opens output file, because there are too many links to be readable on command line

	for link in iTunesLinks: #go through output of TopPodasts.py and use them in the itunes_feed_extractor.py
		g.write(str(itunes_feed_extractor.ConvertItunesLink(link)) + "\n") #writes rss URLs to a file
		rss.append(itunes_feed_extractor.ConvertItunesLink(link)) #collects rss feeds into array

	for link in rss: #go through rss array, link by link
		r = feedparser.parse(str(link)) #parse each link
		url = r['feed']['link']
		for items in r.entries: #Go through episodes of each podcast
			title = items["title"]
			text1 = items.content[0].value.encode('ascii','ignore') #this works in interpreter
			text = re.sub(ur'<.*?>',"", text1) #this works in interpreter too
			#text.decode() #this works in interpreter too
			#For some reason, this code is pulling everything in the entry titled <div class='itemcontent' name='decodeable'>
			enter_data(db_cursor, url, title, text, update_time)
			#Should I just create CSV here?  

    # Uncomment this line to clean existing entries from the database
    # clean_expired_entries(db_cursor, update_time)

    # Commit the change to sqlite database
	db.commit()
    
    # pdb.set_trace()
    # Convert the sqlite3 database to csv
	db_cursor.execute('SELECT * FROM listings')
	with open("search_result.csv", "wb") as f:
		csv_writer = ucsv.UnicodeWriter(f)
		csv_writer.writerow([i[0] for i in db_cursor.description]) # Write the header
		csv_writer.writerows(db_cursor)

    # Close the database
	db.close()
		
	g.close()
	f.close()
예제 #3
0
def main():
    # When this operation starts
    update_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    ### Search Parameters ###
    locations = ["washingtondc"]
    sublocations = ["doc", "nva", "mld"]
    search_terms = []
    listings = ["cta"]  # cta is the listing for cars and trucks
    price = [3000, 7500]  # min and max price
    year = [2004, 2010]  # min and max year
    makes = ["honda", "toyota", "hyundai"]
    title_status = 1  # Clean title
    has_pic = 1  # has pic

    ### Database Information ###
    csv_file = "search_result.csv"
    db_file = "search_result.db"

    # Database connection
    db = sqlite3.connect(db_file)
    db_cursor = db.cursor()

    # Retrieve and parse rss
    rss_links = rss_link_generator(locations, sublocations, search_terms, listings, \
                                price, year, makes, title_status, has_pic)
    retrieve_and_enter_data(db_cursor, rss_links, update_time)

    # Uncomment this line to clean existing entries from the database
    # clean_expired_entries(db_cursor, update_time)

    # Commit the change to sqlite database
    db.commit()

    # pdb.set_trace()
    # Convert the sqlite3 database to csv
    db_cursor.execute('SELECT * FROM listings')
    with open("search_result.csv", "wb") as f:
        csv_writer = ucsv.UnicodeWriter(f)
        csv_writer.writerow([i[0] for i in db_cursor.description
                             ])  # Write the header
        csv_writer.writerows(db_cursor)

    # Close the database
    db.close()
예제 #4
0
def fix_csv_area(district_csv, fixed_csv):
    """ fix a csv to have an id and a open strat search name """
    reader_file = open(district_csv, 'rb')
    writer_file = open(fixed_csv, 'w+')
    reader = unicode_csv.UnicodeReader(reader_file)
    writer = unicode_csv.UnicodeWriter(writer_file)

    header = reader.next()
    print header
    for row in reader:
        open_street_search = row[3] + ' District' + ' Israel'
        open_street_search_english = row[4] + ' District' + ' Israel'
        if row[1] == '1111114':
            open_street_search_english = 'judea and samaria Area'
        print row[1], open_street_search, 'or : ', open_street_search_english
        writer.writerow(
            [row[1], open_street_search, open_street_search_english])

    reader_file.close()
    writer_file.close()
예제 #5
0
def render_csv(request, addon, stats, fields):
    """Render a stats series in CSV."""
    # Start with a header from the template.
    ts = time.strftime('%c %z')
    response = jingo.render(request, 'stats/csv_header.txt', {
        'addon': addon,
        'timestamp': ts
    })

    # For remora compatibility, reverse the output so oldest data
    # is first.
    # XXX: The list() performance penalty here might be big enough to
    # consider changing the sort order at lower levels.
    writer = unicode_csv.UnicodeWriter(response)
    writer.writerow(fields)
    stats_list = list(stats)
    for row in reversed(stats_list):
        writer.writerow(row)

    fudge_headers(response, stats_list)
    response['Content-Type'] = 'text/plain; charset=utf-8'
    return response
예제 #6
0
def fix_csv(data_csv, fixed_csv):
    """ fix a csv to have an id and a open street search name
    adds israel to the search item
    :param data_csv: orignal data csv
    :param fixed_csv: the fixed data csv
    """
    reader_file = open(data_csv, 'rb')
    writer_file = open(fixed_csv, 'r+')
    reader = unicode_csv.UnicodeReader(reader_file)
    writer = unicode_csv.UnicodeWriter(writer_file)

    header = reader.next()
    print header
    for row in reader:
        open_street_search = row[3] + ' Israel'
        open_street_search_english = row[4] + ' Israel'
        print row[1], open_street_search, 'or : ', open_street_search_english
        writer.writerow(
            [row[1], open_street_search, open_street_search_english])

    reader_file.close()
    writer_file.close()
def parse_csv_to_municipalities(csv_to_be_parsed,
                                csv_parsed_successfully,
                                csv_parsed_unsuccessfully,
                                format="kml",
                                search_column=0):
    """
    A tool to parse a csv file into a csv which contains the municipalities boundaries
    The tool will parse search_column and find the boundaries of a municipality.

    this tool uses open_street_map

    One column of csv to be parsed should contain the name of the muni to be search for.
    Another

    csv_to_be_parsed = file to be parsed
    csv_parsed_successfully = file name to write successfull search which found a muni boundry polygon
    csv_parsed_unsuccessfully =  file name write all search csv lines which didn't find a muni boundry match.
    format = the format of the column which will contain the polygon's of the muni boundry
    search_column = the column which contains the name of the muni search for.

    """

    #TODO add a handle for formatting a different formats
    #TODO add a option for using a more complex csv
    #TODO add a option for google search

    csv_to_be_parsed = open(csv_to_be_parsed, 'rb')
    csv_parsed_successfully = open(csv_parsed_successfully, 'w+')
    csv_parsed_unsuccessfully = open(csv_parsed_unsuccessfully, 'w+')

    reader = unicode_csv.UnicodeReader(csv_to_be_parsed)
    writer_successfully = unicode_csv.UnicodeWriter(csv_parsed_successfully)
    writer_unsuccessfully = unicode_csv.UnicodeWriter(
        csv_parsed_unsuccessfully)
    writer_successfully.writerow([
        'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat',
        'lng', 'polygon'
    ])
    writer_unsuccessfully.writerow([
        'unique_id', 'name', 'city', 'state', 'country', 'osm_id', 'lat',
        'lng', 'polygon'
    ])
    search_string_1 = ''
    search_string_2 = ''
    unique_id = ''
    for row in reader:
        search_string_1 = row[1]
        search_string_2 = row[2]
        unique_id = row[0]

        time.sleep(.5)
        entity_data = fetch.get_entity_data(unique_id, search_string_1)
        if entity_data:
            print entity_data
            writer_successfully.writerow(entity_data)
        elif search_string_2:
            entity_data = fetch.get_entity_data(unique_id, search_string_2)

            if entity_data:
                print entity_data
                writer_successfully.writerow(entity_data)

            else:
                print '*************     did not find : ' + search_string_1 + ' or :' + search_string_2 + \
                      ' unique id number : ' \
                      + unique_id + '   ******************'
                writer_unsuccessfully.writerow(
                    [unique_id, search_string_1, search_string_2])