Пример #1
0
class Web(object):
    def __init__(self):
        self.cache = set()
        self.store = dict()
        self.extra = set()
        self.server = ServerProxy("https://pypi.python.org/pypi")

    def get_users(self, package="pip"):
        results = self.server.package_roles(package)
        self.extra.add(package)
        title, users = zip(*results)
        for user in users:
            if user in self.cache:
                pass
            else:
                user_result = self.server.user_packages(user)
                title, packages = zip(*user_result)
                self.store[user] = set(packages)
                self.cache.add(user)
                print(user, packages)
Пример #2
0
class PyPiPackage(object):
	def __init__(self, name, json_link=None):
		self.name = self.package =name
		if not json_link:
			self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.package)
		else:
			self.json_link = json_link
		self.proxy = ServerProxy('https://pypi.python.org/pypi')
		roles = [role[1] for role in self.proxy.package_roles(self.name)]
		if len(roles)==1:
			self.roles = roles[0]
		else:
			self.roles = roles
		self.data = self._data
		self.keywords = self.info.get('keywords')
		self.author = self.info.get('author')
		self.classifiers = self.info.get('classifiers')
		self.maintainer_email = self.info.get('maintainer_email')
		self.home_page = self.info.get('home_page')
		self.platform = self.info.get('platform')
		self._pypi_ordering = self.info.get('_pypi_ordering')
		self.downloads_url = self.info.get('download_url')
		self.docs_url = self.info.get('docs_url')
		self.author_email = self.info.get('author_email')
		self.summary = self.info.get('summary', ".")
		self.maintainer = self.info.get('maintainer')
		self.description = self.info.get('description')
		self.package_url = self.info.get('package_url')
		self.bugtrack_url = self.info.get('bugtrack_url')
		self.packagetype = self.urls.get('packagetype')
		self.size = self.urls.get('size')
		self.size_fmt = "{:,}".format(self.size)
		self.max_downloads = self._max_downloads

		self._compacted = self.compact_info
		for key, value in self.compact_info.items():
			if value:
				self.__setattr__(key, value)

	def from_dict(self, d):
		from copy import copy
		alias = self.copy()
		for k,v in d.items():
			alias.__setattr__(k,v)
		return alias

	def clean_Nones(self):
		alias = copy(self)
		for k,v in alias.__dict__.items():
			if v is None:
				del alias[k]
		return alias


	def __repr__(self):
		name =self.name
		if self.max_downloads:
			dls = "{:,} downloads, ".format(self.max_downloads)
		else:
			dls = ''
		if self.size_fmt:
			size = "size: {}, ".format(self.size_fmt)
		else:
			size = ''
		if self.summary:
			if len(self.summary) > 40:
				summary = " ".join(w for w in self.summary.lower().split() if w not in STOPWORDS)
			elif len(self.summary) <= 40:
				summary = self.summary
		else:
			summary = ''
		return "<Package {}: {}{};{}>".format(name, dls, size, summary)

	@property
	def _data(self):
		try:
			data = requests.get(self.json_link).json()
		except JSONDecodeError:
			version = max(self.proxy.package_releases(self.name))
			data = self.proxy.release_data(self.name, version)
		return data

	@property
	def _max_downloads(self):
		counts = []
		releases = self.data.get('releases')
		for key in releases.keys():
			if len(releases[key]) > 0:
				counts.append(releases[key][0].get('downloads', 0))
		return sum(counts)

	@property
	def info(self):
		return self.data.get('info')

	@property
	def compact_info(self):
		compacter = itemgetter('downloads', 'release_url', 'keywords', 'author', 'classifiers', 'maintainer_email','home_page', 'license', 'name', 'platform', '_pypi_ordering', 'download_url', 'docs_url', 'author_email', 'summary', 'maintainer', 'description', 'version', 'package_url', 'bugtrack_url')

		compacted_info = compacter(self.info)
		compact_dict = dict(zip(('downloads', 'release_url', 'keywords', 'author', 'classifiers', 'maintainer_email', 'home_page', 'license', 'name', 'platform', '_pypi_ordering', 'download_url', 'docs_url', 'author_email', 'summary', 'maintainer', 'description', 'version', 'package_url', 'bugtrack_url'), compacted_info))
		return compact_dict


	@property
	def releases(self):
		return self.data.get('releases')

	@property
	def urls(self):
		urls = self.data.get('urls')
		if len(urls) > 0 and isinstance(urls, list):
			result = urls[0]
		else:
			result = urls

		return result

	def copy(self):
		from copy import copy
		return copy(self)



	def __pickle_safe__(self):
		from copy import copy

		#just do 1 release, most recent
		max_release = max(self.releases.keys())
		fresh_release = self.releases[max_release]
		if type(fresh_release) == list:
			pickled = fresh_release[0]
		elif type(fresh_release) == tuple:
			pickled = fresh_release[0]
		elif type(fresh_release) == dict:
			pickled = fresh_release
		else:
			pickled = fresh_release

		info = copy(self.info)
		for k,v in info.items():
			if k in pickled:
				v2 = copy(pickled[k])
				if v!= v2:
					pickled[k] = (v,v2)
				else:
					pickled[k] = v

		pickled['max_downloads'] = str(copy(self.max_downloads))
		pickled['size_fmt'] = copy(self.size_fmt)
		pickled['roles'] = copy(self.roles)

		return pickled
Пример #3
0
class JSON_PYPI(object):
	def __init__(self, name=None, json_link=None):
		self.name = name
		self.package = name
		if not json_link:
			self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.package)
		else:
			self.json_link = json_link
		self.proxy = ServerProxy('https://pypi.python.org/pypi')
		self.keywords = self.info.get('keywords')
		self.author = self.info.get('author')
		self.classifiers = self.info.get('classifiers')
		self.maintainer_email = self.info.get('maintainer_email')
		self.home_page = self.info.get('home_page')
		self.platform = self.info.get('platform')
		self._pypi_ordering = self.info.get('_pypi_ordering')
		self.downloads_url = self.info.get('download_url')
		self.docs_url = self.info.get('docs_url')
		self.author_email = self.info.get('author_email')
		self.summary = self.info.get('summary', ".")
		self.maintainer = self.info.get('maintainer')
		self.description = self.info.get('description')
		self.package_url = self.info.get('package_url')
		self.bugtrack_url = self.info.get('bugtrack_url')
		self.packagetype = self.urls.get('packagetype')
		self.size = self.urls.get('size')
		self.size_fmt=  "{:,}".format(self.size)


	@property
	def data(self):
		try:
			data =get_json(self.json_link)
		except simplejson.scanner.JSONDecodeError:
			version = max(self.proxy.package_releases(self.name))
			data = self.proxy.release_data(self.name, version)
		return data

	@property
	def info(self):
		return self.data.get('info')

	@property
	def releases(self):
		return self.data.get('releases')

	@property
	def urls(self):
		urls = self.data.get('urls')
		if len(urls) > 0 and isinstance(urls, list):
			result = urls[0]
		else:
			result = urls

		return result

	@property
	def upload_time(self):
		try:
			up_time = ' '.join(self.urls.get('upload_time').split('T'))
			upload_time = datetime.strptime(up_time, "%Y-%m-%d %H:%M:%S")
		except ValueError:
			date,time = self.urls.get('upload_time').split('T')
			year, month, day = date.split('-')
			hour, min, sec = time.split(':')
			upload_time = datetime(int(year), int(month), int(day), int(hour), int(min), int(sec))

		return upload_time

	@property
	def get_max_downloads(self):
		counts = []
		if isinstance(self.releases, str):
			json = requests.get(self.releases).json()['releases']
		elif isinstance(self.releases, dict):
			if 'releases' in self.releases:
				json = url['self.releases']
			else:
				json = self.releases

		for key in json.keys():
			if len(json[key]) > 0:
				counts.append(json[key][0].get('downloads', 0))
		return sum(counts)

	def roles(self):
		return self.proxy.package_roles(self.name)

	def __repr__(self):
		name = self.name
		if self.summary is not None or len(self.summary) > 2:
			summary = self.summary
		else:
			summary = " "
		size = self.fmt_size
		dls = "{:,}".format(self.get_max_downloads)
		if isinstance(summary, str) and len(summary) > 30:
			summary = " ".join(w for w in summary.split() if w not in STOPWORDS)

		return "<Package {}: {}, {}, {}>".format(name, dls, size, summary)
Пример #4
0
class Client(object):
	def __init__(self):
		self.uri = 'https://pypi.python.org/pypi'
		self.proxy = ServerProxy(self.uri)
		self.cache = set()


	def all_distributions(self):
		return self.proxy.list_packages()

	def roles(self, package):
		return [cl[1] for cl in self.proxy.package_roles(package)]

	def user_packages(self, user):
		return [cl[1] for cl in self.proxy.user_packages(user)]

	def release_downloads(self, package, version=None):
		if version is None:
			version = max(self.proxy.package_releases(package))
			if isinstance(version, list):
				version = version[0]
		return self.proxy.release_downloads(package, version)

	def package_releases(self, package):
		return self.proxy.package_releases(package)

	def release_data(self, package, version=None):
		version = max(self.proxy.package_releases(package))
		if isinstance(version, list):
			version = version[0]
		data = self.proxy.release_data(package, version)
		return data


	def simple_search(self, spec, operator='or', getter = None, omit = ('django')):
		"""Can search by name, version, author, author_email, maintainer,
		maintainer_email, home_page, license, summary,
		description, keywords, platform,download_url"""
		from pip.commands.search import transform_hits, highest_version
		from
		results = set()
		packages = {}
		from operator import itemgetter
		filter = itemgetter('name', 'version')
		info_getter = itemgetter('name', 'summary')
		name_getter = itemgetter('name')

		hits = self.proxy.search({'name': spec},{'summary':spec}, operator)
		results = transform_hits(hits)
		for result in results:
			result['version'] = highest_version(result['versions'])
			del result['score']
		return results


	def deep_search(self, spec, operator='and', cache = None, limit=20):
		results =list()
		temp_cache = set()
		initial_results = self.simple_search(spec, operator='and', getter=None, omit=['django'])
		for result in initial_results:
			new = PyPiPackage(result)
			results.append(new)
			temp_cache.add(new)
		if cache is not None:
			self.cache = cache|temp_cache
		else:
			pass
		filtered = sorted(results, key = lambda x: x.max_downloads, reversed=True)
		return filtered[limit:]


	def browse(self, classifiers):
		return self.proxy.browse(classifiers)


	def get_json_url(self, package):
		return "http://pypi.python.org/pypi/{}/json".format(package)
Пример #5
0
class Package(object):
    def __init__(self, name, json_link=None):
        if isinstance(name, dict):
            for key, value in name.items():
                self.name = self.package = key
                if len(value) > -1:
                    self.dist = value[0]
        else:
            self.name = self.package = name

        self.json_link = "http://pypi.python.org/pypi/{}/json".format(self.name)
        print(self.json_link)  #

        self.proxy = ServerProxy("https://pypi.python.org/pypi")

        roles = [role[1] for role in self.proxy.package_roles(self.name)]

        if len(roles) == 1:
            self.roles = roles[0]
        else:
            self.roles = roles

        self.data = self._data
        if self.data:
            print("data found")
        if self.info is not None:
            print("info found")
            self.keywords = self._keywords
            self.author = self.info.get("author", "")
            self.classifiers = self.info.get("classifiers", "")
            self.maintainer_email = self.info.get("maintainer_email", "")
            self.home_page = self.info.get("home_page", "")
            self.platform = self.info.get("platform", "")
            self._pypi_ordering = self.info.get("_pypi_ordering", "")
            self.downloads_url = self.info.get("download_url", "")
            self.docs_url = self.info.get("docs_url", "")
            self.author_email = self.info.get("author_email", "")
            self.summary = self.info.get("summary", "")
            self.maintainer = self.info.get("maintainer", "")
            self.description = self.info.get("description", "")
            self.package_url = self.info.get("package_url", "")
            self.bugtrack_url = self.info.get("bugtrack_url", "")

        try:
            self._compacted = self.compact_info
            for key, value in self.compact_info.items():
                if value:
                    self.__setattr__(key, value)
                    print("compact value")
                else:
                    self.__setattr__(key, None)
        except AttributeError:
            pass

        try:
            self.packagetype = self.urls.get("packagetype", None)
        except AttributeError:
            self.packagetype = None
        try:
            self.size = str(self.urls.get("size", None))
        except AttributeError:
            self.size = "0"
        self.size_fmt = "{:,}".format(int(self.size))
        self.max_downloads = str(self._max_downloads)

    def from_dict(self, d):
        from copy import copy

        alias = self.copy()
        for k, v in d.items():
            alias.__setattr__(k, v)
        return alias

    def __repr__(self):
        name = self.name
        if self.max_downloads:
            dls = "{:,} downloads, ".format(self.max_downloads)
        else:
            dls = ""
        if self.size_fmt:
            size = "size: {}, ".format(self.size_fmt)
        else:
            size = ""
        if self.summary:
            if len(self.summary) > 40:
                summary = " ".join(w for w in self.summary.lower().split() if w not in STOPWORDS)
            elif len(self.summary) <= 40:
                summary = self.summary
        else:
            summary = ""
        return "<Package {}: {}{};{}>".format(name, dls, size, summary)

    def highest_version(self, versions):
        return next(iter(sorted(versions, key=pkg_resources.parse_version, reverse=True)))

    @property
    def _data(self):
        try:
            data = requests.get(self.json_link).json()
            print(bool(data))  ##
        except simplejson.JSONDecodeError:

            version = self.proxy.package_releases(self.name)
            if len(version) == 1:
                version = version[0]
                print(version)  ###
            data = self.proxy.release_data(self.name, version)
        else:
            data = dict(info="", releases="", urls="")
        return data

    @property
    def _max_downloads(self):
        counts = []
        releases = self.data.get("releases")
        for key in releases.keys():
            if len(releases[key]) > 0:
                counts.append(releases[key][0].get("downloads", 0))
        return sum(counts)

    @property
    def _keywords(self):
        if self.info is not None:
            kws = self.info.get("keywords", None)
            if kws:
                if "," in kws:
                    keywords = [kw.strip().lower() for kw in kws.split(",")]
                else:
                    keywords = [kw.strip().lower() for kw in kws.split()]
                return keywords
            else:
                return kws
        else:
            return None

    def extract_metadata(self):
        if isinstance(self.dist, DistInfoDistribution):
            if hasattr(self.dist, "_parsed_pkg_info"):
                name = self.dist._parsed_pkg_info.get("name")
                version = self.dist._parsed_pkg_info.get("version")
                summary = self.dist._parsed_pkg_info.get("summary")
                homepage = self.dist._parsed_pkg_info.get("homepage")
                author = self.dist._parsed_pkg_info.get("author")
                author_email = self.dist._parsed_pkg_info.get("Author-email")
                platform = self.dist._parsed_pkg_info.get("platform")
                classifiers = self.dist._parsed_pkg_info.get_all("classifier")

    @property
    def info(self):
        if self._data and isinstance(self._data, dict):
            print(self.data.get("info", {}))
            return self.data.get("info", {})

        else:
            return dict().fromkeys(
                [
                    "docs_url",
                    "keywords",
                    "_pypi_ordering",
                    "requires_python",
                    "version",
                    "license",
                    "maintainer_email",
                    "cheesecake_documentation_id",
                    "release_url",
                    "author",
                    "home_page",
                    "classifiers",
                    "cheesecake_installability_id",
                    "platform",
                    "summary",
                    "name",
                    "download_url",
                    "author_email",
                    "downloads",
                    "cheesecake_code_kwalitee_id",
                    "bugtrack_url",
                    "package_url",
                    "description",
                    "maintainer",
                    "_pypi_hidden",
                ],
                "",
            )

    @property
    def compact_info(self):
        compacter = itemgetter(
            "downloads",
            "release_url",
            "keywords",
            "author",
            "classifiers",
            "maintainer_email",
            "home_page",
            "license",
            "name",
            "platform",
            "_pypi_ordering",
            "download_url",
            "docs_url",
            "author_email",
            "summary",
            "maintainer",
            "description",
            "version",
            "package_url",
            "bugtrack_url",
        )

        compacted_info = compacter(self.info)
        compact_dict = dict(
            zip(
                (
                    "downloads",
                    "release_url",
                    "keywords",
                    "author",
                    "classifiers",
                    "maintainer_email",
                    "home_page",
                    "license",
                    "name",
                    "platform",
                    "_pypi_ordering",
                    "download_url",
                    "docs_url",
                    "author_email",
                    "summary",
                    "maintainer",
                    "description",
                    "version",
                    "package_url",
                    "bugtrack_url",
                ),
                compacted_info,
            )
        )
        return compact_dict

    @property
    def releases(self):
        return self.data.get("releases")

    @property
    def urls(self):
        urls = self.data.get("urls")
        if len(urls) > 0 and isinstance(urls, list):
            result = urls[0]
        else:
            result = urls

        return result

    def copy(self):
        from copy import copy

        return copy(self)

    def __pickle_safe__(self):
        from copy import copy

        # just do 1 release, most recent
        max_release = max(self.releases.keys())
        fresh_release = self.releases[max_release]
        if type(fresh_release) == list:
            pickled = fresh_release[0]
        elif type(fresh_release) == tuple:
            pickled = fresh_release[0]
        elif type(fresh_release) == dict:
            pickled = fresh_release
        else:
            pickled = fresh_release

        info = copy(self.info)
        for k, v in info.items():
            if k in pickled:
                v2 = copy(pickled[k])
                if v != v2:
                    pickled[k] = (v, v2)
                else:
                    pickled[k] = v

        pickled["max_downloads"] = str(copy(self.max_downloads))
        pickled["size_fmt"] = copy(self.size_fmt)
        pickled["roles"] = copy(self.roles)
        pickled["summary"] = copy(self.summary)
        pickled["keywords"] = copy(self.keywords)
        pickled["classifiers"] = copy(self.classifiers)
        pickled["name"] = copy(self.name)

        return pickled

    @property
    def _pickle(self):
        return self.__pickle_safe__()