config_file_name = "forecastio.config" # warning: don't interrupt program using ctrl c, press enter instead to avoid file writing problems def get_api_key(): if os.path.isfile(config_file_name) and len(open("forecastio.config", "rb").read().splitlines()) > 0: return open("forecastio.config", "rb").read().splitlines()[0] else: raise Exception( "The configuration file '{0}' containing your API key is invalid or does not exist.".format( config_file_name ) ) d = DataUtility() weather_config = d.read_json_file("weather-data-collection.json") start_time = int( time.mktime( datetime.datetime.strptime(weather_config["meta"]["dateRange"]["start"], "%Y-%m-%dT00:00:00.000Z").timetuple() ) ) end_time = int( time.mktime( datetime.datetime.strptime(weather_config["meta"]["dateRange"]["end"], "%Y-%m-%dT00:00:00.000Z").timetuple() ) )
import json from DataUtility import DataUtility d = DataUtility() weather_source = d.read_json_file('weather-data-collection.json') weatherData = weather_source["data"] temperatureData = [] windData = [] cloudCoverData = [] precipitationData = [] # goes through all Datapoints in the mixed weather-data-collection, # splits the Data into the different weatherSources and finally writes # them into different Data-Arrays for index, entry in enumerate(weatherData): # not every entry has every key included. so every key is checked first keyTemperature = "temperature" if keyTemperature in entry: temperatureEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'temperature':entry["temperature"],'timestamp':entry["time"],"children": []} temperatureData.append(temperatureEntry) keyWindSpeed = "windSpeed" keyWindBearing = "windBearing" if keyWindSpeed and keyWindBearing in entry: windEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'speed':entry["windSpeed"],'direction':entry["windBearing"],'timestamp':entry["time"],"children": []} windData.append(windEntry) keyCloudCover = "cloudCover" if keyCloudCover in entry: cloudCoverEntry = {'latitude':entry["latitude"],'longitude':entry["longitude"],'cloudCover':entry["cloudCover"],'timestamp':entry["time"],"children": []}
text = text.replace('\n',' ') # text = cgi.escape(text).encode('ascii','xmlcharrefreplace') latitude = result["geo"]["coordinates"][0] longitude = result["geo"]["coordinates"][1] tweetid = result["id"] created_at = result["created_at"] #write in csv row = [tweetid, user, text, latitude, longitude, created_at] csvwriter.writerow(row) result_count += 1 last_id = result["id"] print "got %d results" % result_count csvfile.close() print "written to %s" % outfile d = DataUtility() twitterjsondata = d.read_csv_file(outfile) with open('data.json', 'w') as fp: json.dump(twitterjsondata, fp)
import json from DataUtility import DataUtility import datetime import time from random import randint d = DataUtility() twitterjsondata = d.read_tabbed_file("Rohdaten_Sandy/numeric_20121022_clean.csv") tweets = [] count = 0 for tweet in twitterjsondata: try: tweet["longitude"] = float(tweet["lon"]) tweet["latitude"] = float(tweet["lat"]) tweet["children"] = [] #tweet["timestamp"] = 0 tweet["numberOfRetweets"] = randint(0,101) timestamp = tweet["timestamp"] #print tweet tweet["timestamp"] = int(time.mktime(datetime.datetime.strptime(str(timestamp), "%Y%m%d%H%M%S").timetuple())) #print tweet if count < 10000:
d['children'] = [] for k, v in d.iteritems(): if isinstance(v, OrderedDict): if k == 'children': d[k] = [v] reformat_dictionary(v) elif isinstance(v, list): for e in v: reformat_dictionary(e) else: if k == 'longitude' or k == 'latitude': d[k] = float(v) if k == 'name': count[0] += 1 if count[0] % 100 == 0: print "Formatting city {0}".format(count[0]) d = DataUtility() print 'Reading the XML file...' xml_content = d.read_xml_file('cities.xml') reformat_dictionary(xml_content) xml_content = xml_content['root'] cities = {'meta': {'dataType': 'cities', 'temporal': False}, 'root': xml_content} print 'Writing the JSON file...' d.write_json_file(cities, 'cities.json', True)
a = sin(dlat/2)**2 + cos(latitude_1) * cos(latitude_2) * sin(dlon/2)**2 c = 2 * asin(sqrt(a)) km = 6367 * c return km def split_list(list, size): """ Split a list into around equally sized pieces """ newseq = [] splitsize = 1.0/size*len(list) for i in range(size): newseq.append(list[int(round(i*splitsize)):int(round((i+1)*splitsize))]) return newseq d = DataUtility() print 'Reading the csv file...' csv_content = d.read_csv_file('Flightroutes.csv') print 'Read {0} lines of data.'.format(len(csv_content)) all_flights = [] for index, item in enumerate(csv_content): all_flights.append( { 'startPosition': { 'longitude': float(item['srcLongitude']), 'latitude': float(item['srcLatitude']), 'airportCode': item['srcAirport']
from DataUtility import DataUtility d = DataUtility() # Read an example CSV file csv_file = d.read_csv_file('example-data/test.csv') print csv_file # Access a specific row and key print csv_file[0]['author'] # Read an example XML file xml_file = d.read_xml_file('example-data/test.xml') print xml_file # Access a specific entry from the XML tree print xml_file['cities']['stadt']['stadt'][1]['name'] # Read an example JSON file json_file = d.read_json_file('example-data/test.json') print json_file # Access a specific entry from the JSON tree print json_file['globe']['radius'] # Write one of the dictionaries to a new JSON file d.write_json_file(csv_file, 'example-data/output.json')