def fetch_pypi_package_list(): res = make_request( default_requests_session.get, "https://pypi.python.org/pypi?%3Aaction=index", ) # Use BeautifulSoup to parse the HTML and find the package names. page = BeautifulSoup(res.content, 'html.parser') package_table = page.find('table') all_rows = package_table.findAll('tr') logging.info("====There are currently %d packages on PyPI.", len(all_rows) - 1) num_fetched = 0 # Each row represents a single PyPI package. for row in all_rows: # Each row has 2 columns, a name (hyperlinked) and a description. link = row.find('a') # Only fetch package if it has a link to its own page. if link is not None: # Reformat spacing in extracted name. package_name = link.text.replace(u'\xa0', ' ') PyPIPackage.get_or_create(name=package_name) num_fetched += 1 if num_fetched % 10 == 0: logging.info("%d packages fetched.", num_fetched) logging.info("====Done fetching package list. There were %d packages.", num_fetched)
fetch_npm_package_list() if args.data: if args.update: packages = NPMPackage.select().where( NPMPackage.description != '') else: packages = NPMPackage.select().where( NPMPackage.readme >> None).order_by(fn.Random()) fetch_npm_data(packages) if args.lib_packages: create_tables() fetch_packagenames_from_libraryio(args.lib_package_count) if args.github_readmes: fetch_github_readmes(NPMPackage.select()) if args.github_stats: fetch_github_stats(NPMPackage.select()) elif args.db == 'pypi': if args.package_list: create_pypi_tables() fetch_pypi_package_list() if args.data: if args.update: packages = PyPIPackage.select().where( PyPIPackage.description != '') else: packages = PyPIPackage.select().where( PyPIPackage.readme >> None).order_by(fn.Random()) fetch_pypi_data(packages) else: print "Please provide a valid argument to package-list: 'npm' or 'pypi'"
if args.db == 'npm': if args.package_list: create_npm_tables() fetch_npm_package_list() if args.data: if args.update: packages = NPMPackage.select().where(NPMPackage.description != '') else: packages = NPMPackage.select().where(NPMPackage.readme >> None).order_by(fn.Random()) fetch_npm_data(packages) if args.lib_packages: create_tables() fetch_packagenames_from_libraryio(args.lib_package_count) if args.github_readmes: fetch_github_readmes(NPMPackage.select()) if args.github_stats: fetch_github_stats(NPMPackage.select()) elif args.db == 'pypi': if args.package_list: create_pypi_tables() fetch_pypi_package_list() if args.data: if args.update: packages = PyPIPackage.select().where(PyPIPackage.description != '') else: packages = PyPIPackage.select().where(PyPIPackage.readme >> None).order_by(fn.Random()) fetch_pypi_data(packages) else: print "Please provide a valid argument to package-list: 'npm' or 'pypi'"