def squeeze(self): if (len(self.spongeDatasourcePlugins) > 0): # Check for existence of data source plugins fooPlugin = 0 # # Here we will define the concept of a publisher. A publisher is an abstraction for any # site, service, or component that handles the publication of data in a way that it can be # accessed, consumed, and understood by interested and authorized parties. Our publisher # model is only conceptual at the moment, but we will define the concepts now. We will not # define what ways data can be accessed, consumed, or understood. We will discuss the concepts # for each of these action verbs. # # Publishers can make data accessible by exposing it over a network, typically published # on a web page or via a programatic web service interface. # # Publishers can allow data to be consumed, by making it possible for interested parties to # export the data as it has been published. In other words, if I publish a data series, my # publisher should not only allow you to view the data, but also pull or export the data # as I have published it, and use it for other purposes. # # Publishers present data in such a way that it can be understood. The best way to do this # is to provide the data a graphical representation, a bar graph, line graph, or some other # form of graph to best represent your data. For analytics data, we probably care most about # data over time. If graphics are not practical, then presenting summary data that has # been analyzed, with highlights, or more # # # # Process data sources # 0. Setup the HTTP Basic Authentication for the timetric web service # 1. Loop through the existing data sources, and attempt to find all of the .csv backingstores. # 2. get the datasource metadata # 3. push the existing data series store in the default backing store (currently .csv files) into # a publisher. # 4. email me when the complete set of data series have been uploaded. XXX: TODO: change how # to handle this notification so this is pluggable. # wsCaptain = wscaptain.WSCaptain() publisherURL = self.spongeProjectEnv['publisher.service.timetric.update.url'] apitokenKey = self.spongeProjectEnv['publisher.service.timetric.apitoken.key'] apitokenSecret = self.spongeProjectEnv['publisher.service.timetric.apitoken.secret'] seriesDict = eval(self.spongeProjectEnv['publisher.service.timetric.series']) # XXX: I need to verify if the authentication actually worked and avoid publishing if it did not + report error anOpener = wsCaptain.createHTTPBasicAuthenticationOpenerContext(apitokenKey, apitokenSecret, publisherURL) for datasourceKey in self.spongeDatasourcePlugins.keys(): datasource = self.spongeDatasourcePlugins[datasourceKey] fooPlugin = new.instance(datasource) fooPlugin.__init__(self.spongeProjectEnv) # We only want the datasource metadata, not to soak data from the datasources. ds_col_labels = fooPlugin.get_datasource_metadata() dbname = datasourceKey os.chdir(self.spongeProjectEnv['project.db.dir']) for col in ds_col_labels.keys(): # XXX What is the best way to iterate over the values? label = ds_col_labels[col][0] dbcsv = dbname + "." + label + ".csv" if (dbexists(dbcsv) is True): db = dbopen(dbcsv, flag='c', format='csv') if (db is not None): seriesID = seriesDict[datasourceKey][col-1] seriesURL = publisherURL + "/" + seriesID + "/" print "Publishing " + dbcsv + " to URL = " + seriesURL headers = {'Content-Type':'text/csv'} data = "" for key in db: data = data + key + "," + db[key] + "\r\n" request = wsCaptain.createRequest(seriesURL, data, headers) try: response = urllib2.urlopen(request) except urllib2.HTTPError, e: print e page = wsCaptain.openPage(seriesURL) else: print "Couldn't open DB name = " + dbcsv db.close() else: # Do this if you can't find the actual .csv source file print "Skipping publish of " + dbcsv os.chdir(self.baseDir) # Do this to get back to our original working directory
def soak(self): # XXX What should this return? if (len(self.spongeDatasourcePlugins) > 0): # Check for existence of data source plugins fooPlugin = 0 # # Use Default # should normally loop through and process all of the data source plugins # and only fail with exit if there are no plugins available # even if plugins don't work, they should return error info to stderr/stdout # and should not commit results to backing store if any plugin fails. # XXX: TODO Need to see if I actually honor this # # Example Plugin init: plugin = GithubDatasourcePlugin() # self.spongeDatasourcePlugins['GithubDatasourcePlugin':plugin] # # Process data sources # 1. get the data source instance # 2. get the results # 3. get the metadata # 4. persist the results into various formats for datasourceKey in self.spongeDatasourcePlugins.keys(): datasource = self.spongeDatasourcePlugins[datasourceKey] # print "Processing data source" + datasourceKey + " and datasource = " + # fooPlugin = datasource.__init__(self) fooPlugin = new.instance(datasource) fooPlugin.__init__(self.spongeProjectEnv) metadata = fooPlugin.get_plugin_metadata() ds_col_labels = fooPlugin.get_datasource_metadata() print metadata rowResults = fooPlugin.fetch_data(self.spongeDatasourceEnv) dbname = datasourceKey os.chdir(self.spongeProjectEnv['project.db.dir']) # # XXX: TODO This section of persistence approaches should be handled by configurable # plugins # For now, inline each approach # # # import csv # # Persist Method 1 # Persist to a .csv with results in row records, human readable # This yields one .csv per Plugin # This is better for crunching data on a single sheet isNewDB = False if (dbexists(dbname + ".csv") is not True): isNewDB = True fdb = open(dbname + ".csv", 'ab') rowdata = None if isNewDB: rowdata = "Date" for label in ds_col_labels.values(): rowdata = rowdata + ",%s"%(label) fdb.write(rowdata + "\n") rowdata = datetime.datetime.now().ctime() # This needs to be ISO print "time now is %s"%(rowdata) for data in rowResults.values(): rowdata = rowdata + ",%s"%(data) fdb.write(rowdata + "\n") fdb.close() # # Persist Method 2 # For each series, put into a separate csv file # format should be # ISO DateTime Data, row data # This yields 1 .csv per Plugin-Column combo # Naming follows this convension # for label in ds_col_labels.value(): for key,value in rowResults.items(): db = dbopen(dbname + "." + key + ".csv", flag='c', format='csv') if (db is not None): # XXX May want to change how this mapped so that each key/value pair is comma-sep db[datetime.datetime.isoformat(datetime.datetime.now())] = value # Warning: time is in ISO, need to convert when displaying db.close() else: print "Couldn't create or open DB name = " + dbname + "." + key + '.csv' db.close() print rowResults # XXX Debug print fooPlugin # XXX Debug os.chdir(self.baseDir) # Do this to get back to our original working directory else: print "Couldn't load any plugins for datasources, exiting" sys.exit(1)