def execute(self, config): spreadsheet_id = config.id gids = config.gids or [] if config.gid is not None: gids.append(config.gid) if not gids and not config.collection: gids.append(0) format_as = config.format if (config.collection and format_as not in GoogleSheetsPreprocessor.MAP_TYPES and format_as not in GoogleSheetsPreprocessor.GRID_TYPES): format_as = 'map' gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=config.generate_ids, header_row_count=config.header_row_count, header_row_index=config.header_row_index) if config.path: # Single sheet import. path, key_to_update = self.parse_path(config.path) for gid in gids: # Preserve existing data if necessary. gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=path, key_to_update=key_to_update) content = GoogleSheetsPreprocessor.serialize_content( formatted_data=gid_to_data[gid], path=path, output_style=self.config.output_style) self.pod.write_file(path, content) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, path)) else: # Multi sheet import based on collection. collection_path = config.collection if not gids: gids = gid_to_sheet.keys() for gid in gids: if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL): continue file_name = '{}.yaml'.format( utils.slugify(gid_to_sheet[gid]['title'])) output_path = os.path.join(collection_path, file_name) gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=output_path, key_to_update=None) # Use plain text dumper to preserve yaml constructors. output_content = utils.dump_plain_yaml(gid_to_data[gid]) self.pod.write_file(output_path, output_content) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, output_path))
def get_storage(key, username): """Returns the Storage class compatible with the current environment.""" key = utils.slugify(key) file_name = os.path.expanduser('~/.config/grow/{}_{}'.format( key, username)) dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) return oauth_file.Storage(file_name)
def execute(self, config): spreadsheet_id = config.id gids = config.gids or [] if config.gid is not None: gids.append(config.gid) if not gids and not config.collection: gids.append(0) format_as = config.format if (config.collection and format_as not in GoogleSheetsPreprocessor.MAP_TYPES and format_as not in GoogleSheetsPreprocessor.GRID_TYPES): format_as = 'map' gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=config.generate_ids) if config.path: # Single sheet import. path, key_to_update = self.parse_path(config.path) for gid in gids: # Preserve existing yaml data. if (path.endswith(('.yaml', '.yml')) and self.config.preserve and self.pod.file_exists(path)): existing_data = self.pod.read_yaml(path) # Skip trying to update lists, because there would be no # expectation of merging old and new list data. if isinstance(existing_data, dict): gid_to_data[gid] = utils.format_existing_data( old_data=existing_data, new_data=gid_to_data[gid], preserve=self.config.preserve, key_to_update=key_to_update) content = GoogleSheetsPreprocessor.serialize_content( formatted_data=gid_to_data[gid], path=path, output_style=self.config.output_style) self.pod.write_file(path, content) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, path)) else: # Multi sheet import. collection_path = config.collection if not gids: gids = gid_to_sheet.keys() for gid in gids: if gid_to_sheet[gid]['title'].strip().startswith(IGNORE_INITIAL): continue file_name = '{}.yaml'.format( utils.slugify(gid_to_sheet[gid]['title'])) output_path = os.path.join(collection_path, file_name) self.pod.write_yaml(output_path, gid_to_data[gid]) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, output_path))
def slug(self): value = self.fields.get('$slug') if value: return value if not self.title: return None if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY): return utils.slugify(self.title) return slugify.slugify(self.title)
def get_storage(key, username): """Returns the Storage class compatible with the current environment.""" if appengine and utils.is_appengine(): return appengine.StorageByKeyName( appengine.CredentialsModel, username, 'credentials') key = utils.slugify(key) file_name = os.path.expanduser('~/.config/grow/{}_{}'.format(key, username)) dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) return oauth_file.Storage(file_name)
def get_storage(key, username): """Returns the Storage class compatible with the current environment.""" if appengine and utils.is_appengine(): return appengine.StorageByKeyName(appengine.CredentialsModel, username, 'credentials') key = utils.slugify(key) file_name = os.path.expanduser('~/.config/grow/{}_{}'.format( key, username)) dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) return oauth_file.Storage(file_name)
def _parse_articles_rss(root, options): used_titles = set() for item in root.findall('./channel/item'): article = Article() for child in item: if child.tag == CONTENT_KEYS.title: article.title = child.text elif child.tag == CONTENT_KEYS.description: article.description = child.text article.content = child.text elif child.tag == CONTENT_KEYS.link: article.link = child.text elif child.tag == CONTENT_KEYS.published: raw_date = child.text article.published = parse(raw_date) elif child.tag == CONTENT_KEYS.content_encoded: article.content = child.text elif child.text: article.fields[child.tag] = child.text # Handle aliases, in addition to established defaults # Handled after defaults to allow for overrides for alias in options.get_aliases(child.tag): article.fields[alias] = child.text if article.title: slug = utils.slugify(article.title) if slug in used_titles: index = 1 alt_slug = slug while alt_slug in used_titles: alt_slug = '{}-{}'.format(slug, index) index = index + 1 slug = alt_slug article.slug = slug if article.content: soup_article_content = BS(article.content, "html.parser") soup_article_image = soup_article_content.find('img') if soup_article_image: article.image = soup_article_image['src'] yield article
def _parse_articles_rss(root): used_titles = set() for item in root.findall('./channel/item'): article = Article() for child in item: if child.tag == CONTENT_KEYS.title: article.title = child.text.encode('utf8') elif child.tag == CONTENT_KEYS.description: article.description = child.text.encode('utf8') article.content = child.text.encode('utf8') elif child.tag == CONTENT_KEYS.link: article.link = child.text.encode('utf8') elif child.tag == CONTENT_KEYS.published: raw_date = child.text.encode('utf8') article.published = parse(raw_date) elif child.tag == CONTENT_KEYS.content_encoded: article.content = child.text.encode('utf8') elif child.text: article.fields[child.tag] = child.text.encode('utf8') if article.title: slug = utils.slugify(article.title) if slug in used_titles: index = 1 alt_slug = slug while alt_slug in used_titles: alt_slug = '{}-{}'.format(slug, index) index = index + 1 slug = alt_slug article.slug = slug if article.content: soup_article_content = BS(article.content, "html.parser") soup_article_image = soup_article_content.find('img') if soup_article_image: article.image = soup_article_image['src'] yield article
def execute(self, config): convert = config.convert is not False # Binds a Google Drive folder to a collection. if config.folder: service = BaseGooglePreprocessor.create_service() query = "'{}' in parents".format(config.folder) # pylint: disable=no-member resp = service.files().list(q=query).execute() docs_to_add = [] existing_docs = self.pod.list_dir(config.collection) for item in resp['items']: doc_id = item['id'] title = item['title'] if item['mimeType'] != 'application/vnd.google-apps.document': continue if title.startswith(IGNORE_INITIAL): self.pod.logger.info('Skipping -> {}'.format(title)) continue if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY): basename = '{}.md'.format(utils.slugify(title)) else: basename = '{}.md'.format(slugify.slugify(title)) docs_to_add.append(basename) path = os.path.join(config.collection, basename) self._execute_doc(path, doc_id, convert) # Clean up files that are no longer in Google Drive. for path in existing_docs: doc_path = path.lstrip(os.path.sep) if doc_path.startswith(IGNORE_INITIAL): continue if doc_path not in docs_to_add: path_to_delete = os.path.join(config.collection, doc_path) text = 'Deleting -> {}'.format(path_to_delete) self.pod.logger.info(text) self.pod.delete_file(path_to_delete) return # Downloads a single document. doc_id = config.id path = config.path self._execute_doc(config.path, doc_id, convert)
def execute(self, config): convert = config.convert is not False # Binds a Google Drive folder to a collection. if config.folder: service = BaseGooglePreprocessor.create_service() query = "'{}' in parents".format(config.folder) # pylint: disable=no-member resp = service.files().list(q=query).execute() docs_to_add = [] existing_docs = self.pod.list_dir(config.collection) for item in resp['items']: doc_id = item['id'] title = item['title'] if item['mimeType'] != 'application/vnd.google-apps.document': continue if title.startswith(IGNORE_INITIAL): self.pod.logger.info('Skipping -> {}'.format(title)) continue basename = '{}.md'.format(utils.slugify(title)) docs_to_add.append(basename) path = os.path.join(config.collection, basename) self._execute_doc(path, doc_id, convert) # Clean up files that are no longer in Google Drive. for path in existing_docs: doc_path = path.lstrip(os.path.sep) if doc_path.startswith(IGNORE_INITIAL): continue if doc_path not in docs_to_add: path_to_delete = os.path.join(config.collection, doc_path) text = 'Deleting -> {}'.format(path_to_delete) self.pod.logger.info(text) self.pod.delete_file(path_to_delete) return # Downloads a single document. doc_id = config.id path = config.path self._execute_doc(config.path, doc_id, convert)
def _slug_filter(value, delimiter=u'-'): if not value: return value if use_legacy_slugify: return utils.slugify(value, delimiter) return slugify.slugify(value, separator=delimiter)
def execute(self, config): spreadsheet_id = config.id gids = config.gids or [] if config.gid is not None: gids.append(config.gid) if not gids and not config.collection: gids.append(0) format_as = config.format keep_empty_values = config.keep_empty_values if (config.collection and format_as not in GoogleSheetsPreprocessor.MAP_TYPES and format_as not in GoogleSheetsPreprocessor.GRID_TYPES): format_as = 'map' gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=config.generate_ids, header_row_count=config.header_row_count, header_row_index=config.header_row_index, keep_empty_values=keep_empty_values) if config.path: # Single sheet import. path, key_to_update = self.parse_path(config.path) for gid in gids: if gid not in gid_to_data: self.logger.info( 'Sheet not imported for gid {}. Skipped tab?'.format( gid)) continue gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=path, key_to_update=key_to_update, properties=gid_to_sheet[gid]) content = GoogleSheetsPreprocessor.serialize_content( formatted_data=gid_to_data[gid], path=path, output_style=self.config.output_style) self.pod.write_file(path, content) self.logger.info('Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, path)) else: # Multi sheet import based on collection. collection_path = config.collection if not gids: gids = list(gid_to_sheet.keys()) num_saved = 0 for gid in gids: if gid not in gid_to_data: self.logger.info( 'Sheet not imported for gid {}. Skipped tab?'.format( gid)) continue title = gid_to_sheet[gid]['title'] if title.strip().startswith(IGNORE_INITIAL): continue if self.pod.is_enabled(self.pod.FEATURE_OLD_SLUGIFY): slug = utils.slugify(title) else: slug = slugify.slugify(title) file_name = '{}.yaml'.format(slug) output_path = os.path.join(collection_path, file_name) gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=output_path, key_to_update=None, properties=gid_to_sheet[gid]) # Use plain text dumper to preserve yaml constructors. output_content = utils.dump_plain_yaml(gid_to_data[gid]) self.pod.write_file(output_path, output_content) if gid_to_data[gid].get(DRAFT_KEY): self.logger.info('Drafted tab -> {}'.format(title)) num_saved += 1 text = 'Saved {} tabs -> {}' self.logger.info(text.format(num_saved, collection_path))
def slug(self): if '$slug' in self.fields: return self.fields['$slug'] return utils.slugify(self.title) if self.title is not None else None
def slug(self): if '$slug' in self.fields: return self.fields['$slug'] return utils.slugify(self.title) if self.title is not None else None
def slug(self): value = self.fields.get('$slug') if value: return value return utils.slugify(self.title) if self.title is not None else None
def slug_filter(value, delimiter=u'-'): """Filters string to remove url unfriendly characters.""" return utils.slugify(value, delimiter)
def slug(self): if "$slug" in self.fields: return self.fields["$slug"] return utils.slugify(self.title) if self.title is not None else None
def slug(self): value = self.fields.get('$slug') if value: return value return utils.slugify(self.title) if self.title is not None else None
def execute(self, config): spreadsheet_id = config.id gids = config.gids or [] if config.gid is not None: gids.append(config.gid) if not gids and not config.collection: gids.append(0) format_as = config.format if (config.collection and format_as not in GoogleSheetsPreprocessor.MAP_TYPES and format_as not in GoogleSheetsPreprocessor.GRID_TYPES): format_as = 'map' gid_to_sheet, gid_to_data = GoogleSheetsPreprocessor.download( spreadsheet_id=spreadsheet_id, gids=gids, format_as=format_as, logger=self.pod.logger, generate_ids=config.generate_ids, header_row_count=config.header_row_count, header_row_index=config.header_row_index) if config.path: # Single sheet import. path, key_to_update = self.parse_path(config.path) for gid in gids: if gid not in gid_to_data: self.logger.info( 'Sheet not imported for gid {}. Skipped tab?'.format(gid)) continue gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=path, key_to_update=key_to_update, properties=gid_to_sheet[gid]) content = GoogleSheetsPreprocessor.serialize_content( formatted_data=gid_to_data[gid], path=path, output_style=self.config.output_style) self.pod.write_file(path, content) self.logger.info( 'Downloaded {} ({}) -> {}'.format( gid_to_sheet[gid]['title'], gid, path)) else: # Multi sheet import based on collection. collection_path = config.collection if not gids: gids = gid_to_sheet.keys() num_saved = 0 for gid in gids: if gid not in gid_to_data: self.logger.info( 'Sheet not imported for gid {}. Skipped tab?'.format(gid)) continue title = gid_to_sheet[gid]['title'] if title.strip().startswith(IGNORE_INITIAL): continue slug = utils.slugify(title) file_name = '{}.yaml'.format(slug) output_path = os.path.join(collection_path, file_name) gid_to_data[gid] = self._maybe_preserve_content( new_data=gid_to_data[gid], path=output_path, key_to_update=None, properties=gid_to_sheet[gid]) # Use plain text dumper to preserve yaml constructors. output_content = utils.dump_plain_yaml(gid_to_data[gid]) self.pod.write_file(output_path, output_content) if gid_to_data[gid].get(DRAFT_KEY): self.logger.info('Drafted tab -> {}'.format(title)) num_saved += 1 text = 'Saved {} tabs -> {}' self.logger.info(text.format(num_saved, collection_path))