def get_result(self): if self.version: query = db_session.query( models.Libs.id, models.LibVersions.id, models.LibVersions.name).outerjoin( models.LibVersions, and_(models.LibVersions.lib_id == models.Libs.id, models.LibVersions.name == self.version)).filter( models.Libs.id == self.id_) else: query = db_session.query(models.Libs.id, models.LibVersions.id, models.LibVersions.name).join( models.LibVersions, models.LibVersions.id == models.Libs.latest_version_id).filter( models.Libs.id == self.id_) try: data = query.one() except NoResultFound: raise APINotFound("Unknown library with ID '%d'" % self.id_) lib_id = data[0] version_id = data[1] version_name = data[2] if not version_id: raise APINotFound("Unknown version '%s'" % self.version) self._logdlinfo(lib_id) result = dict( url=util.get_libarch_url(lib_id, version_id), version=version_name) return result
def get_result(self): boards = BoardsAPI.get_result() result = dict( libs=db_session.query(func.count(models.Libs.id)).scalar(), libexamples=db_session.query(func.count(models.LibExamples.id)) .scalar(), boards=len(boards), mcus=len(set([b['mcu'] for b in boards])), frameworks=len(FrameworksAPI.get_result()), platforms=len(PlatformsAPI.get_result())) return result
def sync_lib_by_id(lib_id): item = db_session.query(models.Libs).get(lib_id) if not item: print "Library with id={} not found.".format(lib_id) return sync_lib(item)
def get_result(self): result = dict(successed=False, message=None) try: manifest_name = basename(self.conf_url) if manifest_name.endswith(".properties"): cls = crawler.ArduinoLibSyncer elif manifest_name == "module.json": cls = crawler.YottaLibSyncer else: cls = crawler.PlatformIOLibSyncer config = cls.load_config(self.conf_url) assert cls.validate_config(config) # check for pending duplicates query = db_session.query(func.count(1)).filter( models.PendingLibs.conf_url == self.conf_url) if query.scalar(): raise InvalidLibConf("The library is already registered") db_session.add(models.PendingLibs(conf_url=self.conf_url)) db_session.commit() result['successed'] = True result['message'] = ("The library has been successfully " "registered and is waiting for moderation") except InvalidLibConf as e: result['message'] = str(e) except Exception as e: logger.exception(e) result['message'] = ( "Invalid URL or broken manifest. " "Please validate it with http://jsonlint.com") return result
def _get_last_keywords(self, limit=5): items = [] query = db_session.query(models.Keywords.name).order_by( models.Keywords.id.desc()).limit(limit) for item in query.all(): items.append(item[0]) return items
def run(self): for (conf_url, ) in db_session.query( models.PendingLibs.conf_url).all(): self.add_known_repo(conf_url) g = Github(self.gh_login, self.gh_pass, per_page=1000) for result in g.search_code(self.query): if self.is_known_manifest(result.html_url): continue self._process_repository(result.repository)
def optimise_sync_period(): libs = db_session.query(models.Libs) libs_count = libs.count() dt = timedelta(seconds=ceil(86400 / libs_count)) # 24h == 86400s new_sync_datetime = datetime.utcnow() - timedelta(hours=24) for lib in libs.all(): lib.synced = new_sync_datetime new_sync_datetime += dt db_session.commit()
def _prepare_sql_query(self, is_count=False): if is_count: query = db_session.query( func.count(distinct(models.LibFTS.lib_id))) else: query = db_session.query( models.LibFTS.lib_id, models.LibFTS.name, models.LibFTS.description, models.LibFTS.keywords, models.LibFTS.authornames, models.LibDLStats.lifetime, models.Libs.example_nums, models.Libs.updated, models.LibFTS.frameworkslist, models.LibFTS.platformslist) query = query.join(models.Libs, models.LibDLStats) query = self._apply_filters_to_query(query, is_count) if not is_count: query = query.order_by(models.LibDLStats.lifetime.desc()) return query
def _get_top_keywords(self, limit=50): items = [] query = db_session.query( models.Keywords.name, func.count(models.Keywords.id).label( "total")).join(models.LibsKeywords).group_by( models.Keywords.id).order_by(desc("total")).limit(limit) for item in query.all(): items.append(item[0]) return items
def sync_libs(): query = db_session.query(models.Libs)\ .filter(models.Libs.synced < datetime.utcnow() - timedelta(days=1), models.Libs.active) for item in query.all(): before = item.updated sync_lib(item) if before != item.updated: purge_cache()
def _prepare_sql_query(self, is_count=False): _params, _words = self.search_query if is_count: query = db_session.query(func.count(models.LibExamples.id)) else: query = db_session.query( models.LibExamples, models.LibFTS.name, models.LibFTS.description, models.LibFTS.keywords, models.LibFTS.authornames, models.LibFTS.frameworkslist, models.LibFTS.platformslist) query = query.join(models.Libs, models.LibFTS) query = self._apply_filters_to_query(query, is_count) if not self.search_query['words'] and not is_count: query = query.order_by(models.LibExamples.id.desc()) return query
def sync_version(self, version): try: version = (db_session.query(models.LibVersions).filter( models.LibVersions.lib_id == self.lib.id, models.LibVersions.name == version['name']).one()) except NoResultFound: version = models.LibVersions(**version) self.lib.versions.append(version) db_session.flush() return version.id
def get_result(self): result = dict() query = db_session.query(models.Libs.id, models.LibVersions.name).join( models.LibVersions, models.LibVersions.id == models.Libs.latest_version_id).filter( models.Libs.id.in_(self.ids)) result = {i[0]: i[1] for i in query.all()} for id_ in self.ids: if id_ not in result: result[id_] = None return result
def get_free_lib_id(): lib_id = 0 free_id = 0 query = db_session.query(models.Libs.id).order_by(models.Libs.id.asc()) for (lib_id, ) in query.all(): free_id += 1 if lib_id > free_id: break if lib_id == free_id: free_id += 1 return free_id
def _get_last_added(self, limit=5): items = [] query = db_session.query( models.Libs.id, models.Libs.added, models.LibFTS.name).join( models.LibFTS).order_by(models.Libs.added.desc()).limit(limit) for item in query.all(): items.append( dict( id=item[0], name=item[2], date=item[1].strftime("%Y-%m-%dT%H:%M:%SZ"))) return items
def sync_attributes(self): confattrs = {} self._fetch_conf_attrs(confattrs, self.config) attributes = [] for attribute in db_session.query(models.Attributes).all(): if attribute.name not in confattrs: continue _la = models.LibsAttributes(value=confattrs[attribute.name]) _la.attribute = attribute attributes.append(_la) return attributes
def rotate_libs_dlstats(): today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) # delete obsolete logs db_session.query(models.LibDLLog.lib_id).filter( models.LibDLLog.date < today - timedelta(days=60)).delete() db_session.query(models.LibDLStats).update(dict( day=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date >= today)).as_scalar(), week=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date >= today - timedelta(days=7))).as_scalar(), month=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date >= today - timedelta(days=30))).as_scalar(), day_prev=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date < today, models.LibDLLog.date >= today - timedelta(days=1))).as_scalar(), week_prev=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date < today - timedelta(days=7), models.LibDLLog.date >= today - timedelta(days=14))).as_scalar(), month_prev=select([func.count(1)]).where( and_(models.LibDLLog.lib_id == models.LibDLStats.lib_id, models.LibDLLog.date < today - timedelta(days=30))).as_scalar()), synchronize_session=False) db_session.commit() purge_cache()
def get_result(self): result = [] query = db_session.query(models.LibVersions).filter( models.LibVersions.lib_id == self.id_).order_by( models.LibVersions.released.asc(), models.LibVersions.id.asc()) for version in query.all(): result.append( dict( version=version.name, date=version.released.strftime("%Y-%m-%dT%H:%M:%SZ"))) if not result: raise APINotFound("Unknown library with ID '%s'" % self.id_) return result
def _logdlinfo(self, lib_id): if not self.ip or self.ci: return ip_int = util.ip2int(self.ip) try: query = db_session.query(models.LibDLLog).filter( models.LibDLLog.lib_id == lib_id, models.LibDLLog.date > datetime.utcnow() - timedelta(hours=1), models.LibDLLog.ip == ip_int) item = query.one() item.date = datetime.utcnow() except NoResultFound: db_session.query(models.LibDLStats).filter( models.LibDLStats.lib_id == lib_id).update({ models.LibDLStats.lifetime: models.LibDLStats.lifetime + 1, models.LibDLStats.day: models.LibDLStats.day + 1, models.LibDLStats.week: models.LibDLStats.week + 1, models.LibDLStats.month: models.LibDLStats.month + 1 }) db_session.add(models.LibDLLog(lib_id=lib_id, ip=ip_int)) db_session.commit()
def cleanup_lib_versions(keep_versions): libs_query = db_session\ .query(models.Libs, func.count(models.Libs.versions))\ .join(models.Libs.versions)\ .group_by(models.Libs) for lib, versions_count in libs_query.all(): if versions_count <= keep_versions: continue versions_query = db_session.query(models.LibVersions)\ .with_parent(lib)\ .order_by(models.LibVersions.released.desc()) for version in versions_query.all()[keep_versions:]: remove_library_version_archive(lib.id, version.id) db_session.delete(version) db_session.commit() purge_cache()
def sync_authors(self, confauthors): authors = [] itemtpl = dict(email=None, url=None, maintainer=False) if confauthors: if not isinstance(confauthors, list): confauthors = [confauthors] for item in confauthors: tmp = itemtpl.copy() tmp.update(item) authors.append(tmp) elif self.vcsclient and self.vcsclient.get_type() == "github": tmp = itemtpl.copy() tmp.update(self.vcsclient.get_owner()) authors.append(tmp) else: raise NotImplementedError() authornames = [item['name'] for item in authors] # delete obsolete authors self.lib.authors = [] query = db_session.query(models.Authors).filter( models.Authors.name.in_(authornames)) existing = set() for _author in query.all(): for item in authors: if item['name'] != _author.name: continue existing.add(_author.name) _la = models.LibsAuthors(maintainer=item['maintainer']) _la.author = _author self.lib.authors.append(_la) for name in (set(authornames) - existing): for item in authors: if item['name'] != name: continue _la = models.LibsAuthors(maintainer=item['maintainer']) _la.author = models.Authors(name=item['name'], email=item['email'], url=item['url']) self.lib.authors.append(_la) # save in string format for FTS self.lib.fts.authornames = ",".join(authornames) return authors
def _get_most_downloaded(self, period, limit=10): period_prev = getattr(models.LibDLStats, "%s_prev" % period.key) items = [] query = db_session.query( period, label("diff", period - period_prev), models.LibFTS.lib_id, models.LibFTS.name)\ .join(models.LibFTS, models.LibDLStats.lib_id == models.LibFTS.lib_id)\ .filter(period >= period_prev)\ .order_by(desc("diff"))\ .limit(limit) for item in query.all(): items.append( dict( id=item[2], name=item[3], total=item[0], diff=item[1])) return items
def sync_authors(self, confauthors): authors = [] itemtpl = dict(email=None, url=None, maintainer=False) if confauthors: if not isinstance(confauthors, list): confauthors = [confauthors] for item in confauthors: tmp = itemtpl.copy() tmp.update(item) authors.append(tmp) elif self.vcsclient and self.vcsclient.get_type() == "github": tmp = itemtpl.copy() tmp.update(self.vcsclient.get_owner()) authors.append(tmp) else: raise NotImplementedError() authornames = [item['name'] for item in authors] # delete obsolete authors self.lib.authors = [] query = db_session.query(models.Authors).filter( models.Authors.name.in_(authornames)) existing = set() for _author in query.all(): for item in authors: if item['name'] != _author.name: continue existing.add(_author.name) _la = models.LibsAuthors(maintainer=item['maintainer']) _la.author = _author self.lib.authors.append(_la) for name in (set(authornames) - existing): for item in authors: if item['name'] != name: continue _la = models.LibsAuthors(maintainer=item['maintainer']) _la.author = models.Authors( name=item['name'], email=item['email'], url=item['url']) self.lib.authors.append(_la) # save in string format for FTS self.lib.fts.authornames = ",".join(authornames) return authors
def delete_library(lib_id): lib = db_session.query(models.Libs).get(lib_id) # remove whole examples dir (including all examples files) try: rmtree(util.get_libexample_dir(lib_id)) except OSError: logger.warning("Unable to remove lib #%s examples directory. " "Probably it was removed earlier." % lib_id) # remove all versions archives for version in lib.versions: remove_library_version_archive(lib_id, version.id) # remove information about library from database db_session.delete(lib) db_session.commit() purge_cache()
def sync_keywords(self, keywords): keywords = self._cleanup_keywords(keywords) # delete obsolete keywords self.lib.keywords = [] query = db_session.query(models.Keywords).filter( models.Keywords.name.in_(keywords)) existing = set() for item in query.all(): existing.add(item.name) self.lib.keywords.append(item) for item in (set(keywords) - existing): self.lib.keywords.append(models.Keywords(name=item)) # save in string format for FTS self.lib.fts.keywords = ",".join(keywords) return keywords
def sync_frameworks_or_platforms(self, what, items): def _process_items(items_): if not isinstance(items_, list): items_ = [i for i in items_.split(",")] return list(set([i.lower().strip() for i in items_])) assert what in ("frameworks", "platforms") assert any([isinstance(items, t) for t in (list, basestring)]) items = _process_items(items) dbitems = [] if items: _model = getattr(models, what.title()) dbitems = db_session.query(_model).order_by(_model.name.asc()) if items[0] == "*": if what == "platforms" and self.config.get("frameworks"): dbitems = dbitems.join(models.PlatformsFrameworks).join( models.Frameworks, and_( models.Frameworks.id == models.PlatformsFrameworks.framework_id, models.Frameworks.name.in_( _process_items(self.config['frameworks'])))) dbitems = dbitems.all() items = [getattr(i, "name") for i in dbitems] else: dbitems = dbitems.filter(_model.name.in_(items)).all() # check for invalid items # assert len(items) == len(dbitems) # update items in DB setattr(self.lib, what, dbitems) # save in string format for FTS setattr( self.lib.fts, what + "list", ",".join(["%s:%s" % (item.name, item.title) for item in dbitems])) return items
def sync_frameworks_or_platforms(self, what, items): def _process_items(items_): if not isinstance(items_, list): items_ = [i for i in items_.split(",")] return list(set([i.lower().strip() for i in items_])) assert what in ("frameworks", "platforms") assert any([isinstance(items, t) for t in (list, basestring)]) items = _process_items(items) dbitems = [] if items: _model = getattr(models, what.title()) dbitems = db_session.query(_model).order_by(_model.name.asc()) if items[0] == "*": if what == "platforms" and self.config.get("frameworks"): dbitems = dbitems.join(models.PlatformsFrameworks).join( models.Frameworks, and_(models.Frameworks.id == models.PlatformsFrameworks.framework_id, models.Frameworks.name.in_( _process_items(self.config['frameworks'])))) dbitems = dbitems.all() items = [getattr(i, "name") for i in dbitems] else: dbitems = dbitems.filter(_model.name.in_(items)).all() # check for invalid items # assert len(items) == len(dbitems) # update items in DB setattr(self.lib, what, dbitems) # save in string format for FTS setattr(self.lib.fts, what + "list", ",".join( ["%s:%s" % (item.name, item.title) for item in dbitems])) return items
def process_pending_libs(): def get_free_lib_id(): lib_id = 0 free_id = 0 query = db_session.query(models.Libs.id).order_by(models.Libs.id.asc()) for (lib_id, ) in query.all(): free_id += 1 if lib_id > free_id: break if lib_id == free_id: free_id += 1 return free_id query = db_session.query(models.PendingLibs, models.Libs.id).filter( ~models.PendingLibs.processed, models.PendingLibs.approved).outerjoin( models.Libs, models.PendingLibs.conf_url == models.Libs.conf_url) were_synced = False for (item, lib_id) in query.all(): if lib_id: continue logger.info("Processing pending library: %s", item.conf_url) with util.rollback_on_exception(db_session, logger): lib = models.Libs(id=get_free_lib_id(), conf_url=item.conf_url) lib.dlstats = models.LibDLStats() db_session.add(lib) ls = LibSyncerFactory.new(lib) ls.sync() item.processed = True db_session.commit() were_synced = True purge_cache() if were_synced: optimise_sync_period()
def sync_arduino_libs(): def _cleanup_url(url): for text in (".git", "/"): if url.endswith(text): url = url[:-len(text)] return url used_urls = set() for item in db_session.query(models.PendingLibs).all(): used_urls.add(item.conf_url.lower()) query = db_session\ .query(models.LibsAttributes.value)\ .join(models.Attributes)\ .filter(models.Attributes.name.in_(["homepage", "repository.url"])) for (url, ) in query.all(): url = _cleanup_url(url) used_urls.add(url.lower()) libs_index = requests.get( "http://downloads.arduino.cc/libraries/library_index.json").json() libs = {} for lib in libs_index['libraries']: if lib['name'] not in libs or \ parse_version(lib['version']) > parse_version( libs[lib['name']]['version']): libs[lib['name']] = lib del libs_index for lib in libs.values(): github_url = "https://github.com/{}/{}" if "github.com" in lib['website'] and lib['website'].count("/") >= 4: github_url = github_url.format( *urlparse(lib['website']).path[1:].split("/")[:2]) else: _username, _filename = lib['url'].rsplit("/", 2)[1:] github_url = github_url.format(_username, _filename.rsplit("-", 1)[0]) github_url = _cleanup_url(github_url) if github_url.lower() in used_urls: continue logger.debug( "SyncArduinoLibs: Processing {name}, {website}".format(**lib)) approved = False try: vcs = VCSClientFactory.newClient("git", github_url) default_branch = vcs.default_branch assert default_branch conf_url = ("https://raw.githubusercontent.com{user_and_repo}/" "{branch}/library.properties".format( user_and_repo=urlparse(github_url).path, branch=default_branch)) if conf_url.lower() in used_urls: continue r = requests.get(conf_url) r.raise_for_status() approved = True except Exception: conf_url = github_url if conf_url.lower() in used_urls: continue else: used_urls.add(conf_url) # leave for moderation library with existing name if approved: query = db_session.query( func.count(1)).filter(models.LibFTS.name == lib['name']) approved = not query.scalar() db_session.add(models.PendingLibs(conf_url=conf_url, approved=approved)) db_session.commit() logger.info( "SyncArduinoLibs: Registered new library {name}, {website}".format( **lib))
def get_result(self): result = dict( authors=[], dlstats=dict(), version=dict(), examples=[], frameworks={}, platforms={}) query = db_session.query(models.Libs, models.LibVersions).join( models.LibVersions, models.LibVersions.id == models.Libs.latest_version_id).filter( models.Libs.id == self.id_) try: lib, libversion = query.one() except NoResultFound: raise APINotFound("Unknown library with ID '%s'" % str(self.id_)) result['id'] = lib.id result['confurl'] = lib.conf_url for k in ("name", "description"): result[k] = getattr(lib.fts, k) result['keywords'] = lib.fts.keywords.split(",") for k in ("lifetime", "day", "week", "month"): result['dlstats'][k] = getattr(lib.dlstats, k) # examples if lib.example_nums: for item in lib.examples: result['examples'].append( util.get_libexample_url(lib.id, item.name)) # latest version result['version'] = dict( name=libversion.name, released=libversion.released.strftime("%Y-%m-%dT%H:%M:%SZ")) # previous versions result['versions'] = [ dict( name=l.name, released=l.released.strftime("%Y-%m-%dT%H:%M:%SZ")) for l in lib.versions ] # authors for item in lib.authors: _author = {"maintainer": item.maintainer} for k in ("name", "email", "url"): _author[k] = getattr(item.author, k) result['authors'].append(_author) # frameworks & platforms for what in ("frameworks", "platforms"): result[what] = util.parse_namedtitled_list( getattr(lib.fts, what + "list")) # headers result['headers'] = lib.fts.headerslist.split( ",") if lib.fts.headerslist else [] # attributes attributes = {} for item in lib.attributes: attributes[item.attribute.name] = item.value result['homepage'] = attributes.get("homepage") result['repository'] = attributes.get("repository.url") return result