class GoogleConn: def __init__(self, usr, pwd, form): global FORM_COLS self.gc = SpreadsheetsService() self.gc.ClientLogin(username=usr, password=pwd, source=APP_NAME) query = DocumentQuery() query.title = form feed = self.gc.GetSpreadsheetsFeed(query=query) if not feed.entry: raise Exception, "empty spreadsheet list" self.formid = feed.entry[0].id.text.rsplit('/', 1)[1] # self.update = feed.entry[0].updated.text query = CellQuery() query.range = '%c1:%c1' % tuple(i + ord('A') for i in (0, len(FORM_KEYS) - 1)) feed = self.gc.GetCellsFeed(self.formid, query=query) FORM_COLS = tuple(''.join(c for c in cell.content.text.lower() if c.isalnum() or c in '-_') for cell in feed.entry) self.statcol = FORM_COLS[FORM_KEYS.index('status')] def get_requests(self): feed = self.gc.GetListFeed(self.formid) return [ Request(row) for row in feed.entry if not row.custom[self.statcol].text ] def update_request(self, request): request.entry = self.gc.UpdateRow(request.entry, request.data())
def get_cos_contributors(): ''' returns a list of contribtors from a google spreadsheet specified by spreadsheet ID ''' client = SpreadsheetsService() feed = client.GetWorksheetsFeed(DOC_KEY, visibility='public', wksht_id='1') sheet_key = feed.id.text.rsplit('/', 1)[1] list_of_rows = client.GetListFeed(DOC_KEY, sheet_key, visibility='public').entry print('There are {} rows on the COS spreadsheet.'.format( len(list_of_rows))) cos_users = [] for row in list_of_rows: row_dict = row.custom cos_users.append(row_dict[COL_ROW_NAME].text) return cos_users
class SheetQuerier: key = "0AkGO8UqErL6idDhYYjg1ZXlORnRaM3ZhTks4Z3FrYlE" worksheets = {"DVD": "movietitle", "Blu-ray": "movietitle"} cache = "spreadsheet_cache.p" cache_timeout = 60 * 60 * 24 * 7 prefixes = ["The", "A"] whitespace_pattern = re.compile('[^a-zA-Z0-9 ]+', re.UNICODE) def __init__(self, force_reload=False): try: pickle_date, self.data = pickle.load(open(self.cache, "rb")) if DEBUG: print "Loaded cache successfully" if force_reload or time.time() - pickle_date > self.cache_timeout: if DEBUG: if force_reload: print "Cache update forced." else: print "Cache too old, reloading." self.reload_data() pickle.dump((time.time(), self.data), open(self.cache, "wb")) except: if DEBUG: print "Problem loading cache, reloading." self.reload_data() pickle.dump((time.time(), self.data), open(self.cache, "wb")) def reload_data(self): self.client = SpreadsheetsService() feed = self.client.GetWorksheetsFeed(self.key, visibility='public', projection='basic') self.data = {} for entry in feed.entry: if entry.title.text in self.worksheets.keys(): bad_rows, total_rows = self.process_sheet( entry.id.text.split("/")[-1], self.worksheets[entry.title.text]) print "Skipped %d / %d rows in sheet \"%s\"" % ( bad_rows, total_rows, entry.title.text) elif DEBUG: print "Skipped sheet \"%s\"" % entry.title.text def process_sheet(self, sheet_key, movie_row_key, type_row_key="forcedsubtitletype"): if DEBUG: print "Document: %s" % self.key print "Sheet: %s" % sheet_key rows = self.client.GetListFeed(self.key, sheet_key, visibility='public', projection='values').entry bad_rows = 0 for row in rows: try: self.data[SheetQuerier.clean_title( row.custom[movie_row_key].text.strip( ))] = row.custom[type_row_key].text.strip() except: bad_rows += 1 return bad_rows, len(rows) def query_exact(self, title): query = SheetQuerier.clean_title(title) if query in self.data: return self.data[query] else: return False @staticmethod def clean_title(title): # Move prefixes for prefix in SheetQuerier.prefixes: if title.endswith(prefix): title = "%s %s" % (prefix, title[:-1 * len(prefix) - 2]) break # Strip all non alpha-numeric characters title = SheetQuerier.whitespace_pattern.sub('', title) # Return the lowercase version return title.lower()
class GDataClient(object): gd_client = None gd_cur_ss_key = None gd_cur_ws_id = None def __init__(self,uname,pswd,document_name=None,worksheet_name=None): # Connect to Google self.gd_client = SpreadsheetsService(email=uname,password=pswd,source=GDATA_SOURCE) self.gd_client.ProgrammaticLogin() if document_name is not None: self.set_document(document_name) if worksheet_name is not None: self.set_worksheet(worksheet_name) def __ss_check(self): ''' Make sure spreadsheet has been set before we try to do anything with worksheets. ''' if self.gd_cur_ss_key is None: raise GDError('Must set spreadsheet before accessing worksheets!') def __header_to_key(self,hdr_string): ''' Google sheets column headers are used as keys to row info dictionaries, but first most non alphanumeric characters are removed and the letters are lower-cased. ''' return ''.join(re.findall('[a-z\-0-9\.]+',hdr_string.lower())) def set_document(self, docname): ''' Set current spreadsheet document given a title. ''' q = DocumentQuery(params={'title':docname,'title-exact':'true'}) ss_feed = self.gd_client.GetSpreadsheetsFeed(query=q) if len(ss_feed.entry) != 1: raise GDError('{} spreadsheets match the given name "{}" (expected exactly one)!'.format(len(ss_feed.entry),docname)) self.gd_cur_ss_key = ss_feed.entry[0].id.text.rsplit('/',1)[1] def list_documents(self): ''' List all spreadsheet documents available. ''' feed = self.gd_client.GetSpreadsheetsFeed(DocumentQuery()) return [en.title.text for en in feed.entry] def list_worksheets(self): ''' List all worksheets in the current spreadsheet document. ''' self.__ss_check() ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key) return [en.title.text for en in ws_feed.entry] def set_worksheet(self, sheetname): ''' Set current worksheet within the current spreadsheet document. ''' self.__ss_check() q = DocumentQuery(params={'title':sheetname,'title-exact':'true'}) ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q) if len(ws_feed.entry) != 1: raise GDError('{} worksheets match the given name "{}" (expected exactly one)!'.format(len(ws_feed.entry),sheetname)) self.gd_cur_ws_id = ws_feed.entry[0].id.text.rsplit('/',1)[1] def add_worksheet(self, title, rows, cols, overwrite=False): ''' Add a worksheet to current spreadsheet document (if it does not exist). Switch current sheet to the (new) guy. ''' self.__ss_check() # First, check to see if the worksheet already exists q = DocumentQuery(params={'title':title,'title-exact':'true'}) ws_feed = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,query=q) ws_found = None if len(ws_feed.entry) > 0: if overwrite: if not self.gd_client.DeleteWorksheet(ws_feed.entry[0]): raise GDError('Failed to delete existing worksheet named {} to overwrite!'.format(title)) else: ws_found = ws_feed.entry[0] is_new = ws_found is None if is_new: ws_found = self.gd_client.AddWorksheet(title,rows,cols,self.gd_cur_ss_key) self.gd_cur_ws_id = ws_found.id.text.rsplit('/',1)[1] return is_new def set_headers(self,header_names): ''' Set the current worksheet headers given a list. NOTE: header list length must match columns in sheet! ''' self.__ss_check() ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id) if int(ws.col_count.text) != len(header_names): raise GDError('Number of headers ({}) does not match columns in spreadsheet ({})!' .format(len(header_names),int(ws.col_count.text))) query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names))}) cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query) if len(cells.entry) == 0: query = CellQuery(params={'min-row':'1','max-row':'1','min-col':'1','max-col':str(len(header_names)),'return-empty':'true'}) cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=query) batchRequest = SpreadsheetsCellsFeed() for idx,val in enumerate(header_names): cells.entry[idx].cell.inputValue = val batchRequest.AddUpdate(cells.entry[idx]) updated = self.gd_client.ExecuteBatch(batchRequest,cells.GetBatchLink().href) def delete_rows(self,row_list): ''' Given a list of row numbers, delete the corresponding rows from the current sheet. @param row_list List of integers in 0:num_rows ''' if len(row_list) == 0: return self.__ss_check() list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id) ordered_list = sorted(row_list,reverse=True) if ordered_list[0] > int(list_feed.total_results.text): raise GDError('Tried to delete row {} but highest row number in sheet is {}!'.format(ordered_list[0],list_feed.total_results.text)) for row in ordered_list: if not self.gd_client.DeleteRow(list_feed.entry[row]): raise GDError('Failed to delete row {} (partway through list: {})'.format(row,ordered_list)) def insert_rows(self,info_dict_list): ''' Add a group of rows based on list of dictionaries. @param info_dict_list Dictionary with keys for any headers with non-blank info in the new row (all missing keys will be have blank data). ''' self.__ss_check() # Check and make sure that none of the input dictionaries contains keys not in the first row of our list. hdr_list = [self.__header_to_key(hdr) for hdr in self.row_as_list(1)] bad_row = next((dd for dd in info_dict_list if len(set(dd.keys()) - set(hdr_list)) > 0), None) if bad_row is not None: raise GDError('Failed to insert row {} because it contains keys not in spreadsheet headers ({})!' .format(bad_row,hdr_list)) list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id) for inf in info_dict_list: self.gd_client.InsertRow(inf,self.gd_cur_ss_key, self.gd_cur_ws_id) def column_as_list(self,column,with_header=False): ''' Read just a single column into a list of strings. Ignore the first row by default because it's the column header. ''' self.__ss_check() if with_header: minrow = 1 else: minrow = 2 ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id) if int(ws.row_count.text) < minrow: return [] q = CellQuery(params={'min-row':str(minrow),'min-col':str(column),'max-col':str(column)}) cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q) return [cellentry.cell.text for cellentry in cells.entry] def row_as_list(self,row): ''' Read just a single row into a list of strings. NOTE: this is indexed by cell, so row 1 is the header row! ''' self.__ss_check() mincol = 1 ws = self.gd_client.GetWorksheetsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id) if int(ws.col_count.text) < mincol: return [] q = CellQuery(params={'min-row':str(row),'min-col':str(mincol),'max-row':str(row)}) cells = self.gd_client.GetCellsFeed(self.gd_cur_ss_key,self.gd_cur_ws_id,query=q) return [cellentry.cell.text for cellentry in cells.entry] def read_to_list(self,num_lines=None): """ Read the sheet into a list of lists of strings """ self.__ss_check() if num_lines is not None: q = DocumentQuery(params={'max-results':'%d'%num_lines}) list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q) else: list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id) string_list = [] for i, entry in enumerate(list_feed.entry): row = {key:entry.custom[key].text for key in entry.custom} row['rowname'] = entry.title.text string_list.append(row) return string_list def read_to_dict(self,key_column_name,row_start=None,row_num=None): """ Read the sheet into a dictionary with keys given by the named column. NOTE: Raises an error on any redundant rows. """ self.__ss_check() # Use this for testing to limit number of results handled: params = {} if row_start is not None: params['start-index'] = '%d'%row_start if row_num is not None: params['max-results'] = '%d'%row_num if len(params) != 0: q = ListQuery(params=params) list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id,query=q) else: list_feed = self.gd_client.GetListFeed(self.gd_cur_ss_key, self.gd_cur_ws_id) multi_rows = [] string_dict = {} for i, entry in enumerate(list_feed.entry): # If we get the title column, ignore it. if entry.custom[key_column_name].text.replace(' ','').lower() == key_column_name: continue row = {key:entry.custom[key].text for key in entry.custom if key is not key_column_name} row['rowname'] = entry.title.text key_name = entry.custom[key_column_name].text if key_name in string_dict: multi_rows.append(key_name) string_dict[key_name] = row if len(multi_rows) > 0: errors = 'read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows) # raise GDError('read_to_dict -- Column {} contains multiple rows with each of the following values: {}'.format(key_column_name,multi_rows)) else: errors = None return string_dict,errors