def test_relationship(ssn: sa.orm.Session): """ Test getting historical relationship values through an InstanceHistoryProxy """ # Prepare ssn.add(User(id=1, name='John', age=18)) ssn.add(User(id=2, name='Jack', age=18)) ssn.add(Article(id=1, title='Python', author_id=1)) ssn.commit() # Users john = ssn.query(User).get(1) jack = ssn.query(User).get(2) # Article article: Article = ssn.query(Article).get(1) old_article: Article = InstanceHistoryProxy(article) # noqa assert article.author == john # load it assert old_article.author == john # works # Modify article.author = jack assert old_article.author == john # still works # Flush ssn.flush() assert old_article.author == john # still works
async def upload_avatar(file: UploadFile = File(...), current_user: User = Depends(get_current_user), db: sa.orm.Session = Depends(get_db)): """ Uploads avatar and saves them :param file: File object :param current_user: Current authentificated user :param db: Session instance :return: Upload info """ path = os.path.join('static/user_avatars', file.filename) with open(path, 'wb+') as buffer: shutil.copyfileobj(file.file, buffer) if current_user.avatar: if os.path.exists(current_user.avatar): os.remove(current_user.avatar) try: current_user.avatar = path db.add(current_user) db.commit() except Exception as e: print(e) db.rollback() return {'filename': file.filename, 'path': path}
def _get_instances(task, config: ConfigHolder, session: sqlalchemy.orm.Session): if config.manual_query: query = None print( "Manual query chosen. Please fill a query. After finishing the query just end ipython.\n\ Query result must be of type Graph or TaskJobs!") embed() assert query is not None, "query must be filled!" session.add( Config(task=task, value=query.statement(), param="statement")) return query.all() if config.task_id is None: query = session.query(Graph) else: query = session.query(TaskJobs).join(Graph).filter( TaskJobs.task_id == config.task_id) if config.min_n is not None: query = query.filter(Graph.vert_amount >= config.min_n) if config.max_n is not None: query = query.filter(Graph.vert_amount <= config.max_n) if config.min_m is not None: query = query.filter(Graph.edge_amount >= config.min_m) if config.max_m is not None: query = query.filter(Graph.edge_amount <= config.max_m) if config.instance_types: query = query.filter(Graph.i_type.in_(config.instance_types)) if config.max_amount is not None: query = query[:config.max_amount] return query[:]
def index(c: config.Config, db: sa.orm.Session, name: str): for s in get_sources(c, name): if s is None: click.echo( message=f'Source name={name} not found in config.', err=True, ) exit(1) source_model, _ = crud.get_or_create(db, models.Source, name=s.name) reindex = models.Reindex(source=source_model) db.add(reindex) db.add(source_model) db.commit() buckets = [ crud.upsert_object(db, b, {}, reindex, source_model) for b in sources.get_module(s.type).scan_for_buckets(s) ] bucketnames = ' '.join([b.name for b in buckets]) click.echo( f'reindexing {len(buckets)} bucket(s) from source={s.name}: {bucketnames}' ) nodes = [] for bucket in buckets: nodes += crud.get_nodes(db, bucket) with click.progressbar(crud.index_source(db, s, reindex), length=len(nodes)) as bar: for b in bar: pass
def _insert_default_coa(session: sa.orm.Session): # noqa: C901 # the testing chart of accounts coa = """ root # 1 Assets # 2 Cash # 3 Receivables # 4 Inventory # 5 Liabilities # 6 Payables # 7 Shares Issued # 8 Retained Earnings # 9 Income # 10 Trade # 11 Interest # 12 Expenses # 13 Fees # 14 Broker # 15 Administration # 16 Tax # 17 Other # 18 """ coa = [line for line in coa.splitlines() if line.strip() != ""] coa = [line.split("#")[0].rstrip() for line in coa] def _get_level(coa, line): line_str = coa[line] level = len(line_str) - len(line_str.lstrip()) - 4 level = level // 4 return level def _insert_next(coa, line, parent_id, curr_level, last_id): while line < len(coa) - 1: line += 1 now_level = _get_level(coa, line) name = coa[line].strip() if now_level == curr_level: # sibling account last_id += 1 acc = models.Account( id=last_id, name=name, parent_id=parent_id ) session.add(acc) elif now_level == curr_level + 1: # child line -= 1 line, last_id = _insert_next( coa=coa, line=line, parent_id=last_id, curr_level=now_level, last_id=last_id, ) elif now_level < curr_level: # go back one level return line - 1, last_id return line, last_id root = models.Account(id=1, name=coa[0].strip(), parent_id=None) session.add(root) _insert_next(coa=coa, line=0, parent_id=1, curr_level=1, last_id=1) session.commit()
def _insert_default_assets(session: sa.orm.Session): assets = [ # currencies ("USD", "US Dolar", True, "currency"), ("EUR", "Euros", True, "currency"), ("JPY", "Japanese Yen", True, "currency"), ("CNY", "Chinese Yuan", True, "currency"), ("CHF", "Swiss Franc", True, "currency"), ("BRL", "Brazilian Real", True, "currency"), ("BTC", "Bitcoin", True, "currency"), ("ETH", "Ethereum", True, "currency"), ("XMR", "Monero", True, "currency"), ("ADA", "Cardano", True, "currency"), ("USDT", "Tether", True, "currency"), ] for asset_item in assets: if isinstance(asset_item, tuple): asset_item = { k: v for k, v in zip( ("name", "description", "is_active", "type"), asset_item ) } asset_db = models.Asset(**asset_item) session.add(asset_db) session.commit()
def create_transaction( transaction: TransactionCreate, session: sa.orm.Session = sess, ): entries = [ db.models.Entry( **entry.dict(), datetime=transaction.datetime, fund_id=transaction.fund_id, ) for entry in transaction.entries ] entries_value = sum([entry.value for entry in entries if entry.value > 0]) transaction_db = db.models.Transaction( datetime=transaction.datetime, timestamp=dt.datetime.utcnow(), value=entries_value, description=transaction.description, entries=entries, fund_id=transaction.fund_id, ) session.add(transaction_db) db.main.try_to_commit(session) session.refresh(transaction_db) return transaction_db
def create_fund( fund: FundCreate, session: sa.orm.Session = sess, ): """ Creates a new fund Parameters: name: str The fund name temporary: bool Whether the fund is temporary or not Temporary funds can be deleted allong with all its transactions asset_id: int The identifier of the fund currency id It is used to mark-to-market the fund's assets """ if not _asset_is_currency(fund.asset_id, session): msg = "Fund asset must be a currency" raise HTTPException(status_code=422, detail=msg) fund_db = db.models.Fund(**fund.dict()) session.add(fund_db) db.main.try_to_commit(session) session.refresh(fund_db) return fund_db
def create_asset( asset_create: asset_create_class, session: sa.orm.Session = sess, ): asset_dict = asset_create.dict() asset_dict["type"] = asset_type.value asset_db = db.models.Asset(**asset_dict) session.add(asset_db) db.main.try_to_commit(session) session.refresh(asset_db) return asset_db
def cancel_transaction( transaction_id: int, session: sa.orm.Session = sess, ): "Creates a transaction that reverse the original" # find the original transaction transaction = session.query(db.models.Transaction).get(transaction_id) if transaction is None: msg = f"Transaction {transaction_id=} not found" raise HTTPException(status_code=404, detail=msg) # cancel it and create reverse entries transaction.cancel = True entries = [] for n in range(len(transaction.entries)): original_entry = transaction.entries[n] original_entry.cancel = True new_entry = EntryCreate( account_id=original_entry.account_id, value=-original_entry.value, asset_id=original_entry.asset_id, quantity=-original_entry.quantity, ) new_entry_db = db.models.Entry( **new_entry.dict(), datetime=transaction.datetime, cancel=True, fund_id=transaction.fund_id, ) entries.append(new_entry_db) # create the reverse transaction canceling = db.models.Transaction( timestamp=dt.datetime.utcnow(), datetime=transaction.datetime, value=transaction.value, description=f"Cancel: {transaction_id}", entries=entries, cancel=True, fund_id=transaction.fund_id, ) # persist and return session.add(canceling) db.main.try_to_commit(session) return canceling
def test_property(ssn: sa.orm.Session): """ Test getting historical @property values through an InstanceHistoryProxy """ # Prepare ssn.add(User(id=1, name='John', age=18)) ssn.commit() # Load user: User = ssn.query(User).get(1) old_user: User = InstanceHistoryProxy(user) # noqa # @property access assert user.age_in_100_years == 118 assert old_user.age_in_100_years == 118 # Modify user.age = 20 assert old_user.age_in_100_years == 118 # still good
def add_new_package_version(session: sqlalchemy.orm.Session, pkg: Dict) -> None: get_package_version_id_query(session, pkg).one_or_none() or session.add( PackageVersion( name=pkg.get("name", None), version=pkg.get("version", None), language="node", url=pkg.get( "resolved", None ), # is null for the root for npm list and yarn list output ))
def create_user( user: CreateUserSchema, db: sa.orm.Session = get_db, ) -> UserSchema: """ Create new user. """ user = user.dict(exclude_unset=True) user["password"] = hash_password(user["password"]) user = User(**user) db.add(user) try: db.commit() except sa.exc.IntegrityError: db.rollback() raise HTTPException( status_code=400, detail="A user with this email already exists.", ) return user
def update_or_create_user_from_tweet(tweet: Tweet, session: sqlalchemy.orm.Session): """Extracts the user data from the tweet's other_data json field and saves a user object. If there already is a user with that id, it updates their properties. Otherwise, it creates a new user object :type session: sqlalchemy.orm.Session :param session: session object to perform queries with :type tweet: Tweet :returns User """ data = extract_user_dict_from_tweet(tweet) if data is not None: # Try to load existing user userId = tweet.userID user = get_user_by_id(userId, session) if user is None: # make user from the json user = UserFactory(data) user.audit_data = {'created_from_tweet': tweet.tweetID} # the new object isn't yet tracked, so we need to add it to the session session.add(user) else: # make a copy, we need to be a bit sneaky because sqlalchemy isn't watching the json field # but it will freak out over a Detached Instance j = {k: user.audit_data[k] for k in user.audit_data.keys()} # update relevant fields audit = {'event': 'update_from_tweet', 'tweetId': tweet.tweetID} for f in USER_FIELDS_TO_UPDATE: setattr(user, f, data[f]) audit[f] = data[f] j[tweet.tweetID] = audit user.audit_data = j # return the object in case needed return user
def insert_package_graph(session: sqlalchemy.orm.Session, task_data: Dict) -> None: link_ids = [] for task_dep in task_data.get("dependencies", []): add_new_package_version(session, task_dep) session.commit() parent_package_id = get_package_version_id_query(session, task_dep).first() for dep in task_dep.get("dependencies", []): # is fully qualified semver for npm (or file: or github: url), semver for yarn name, version = dep.rsplit("@", 1) child_package_id = get_package_version_id_query( session, dict(name=name, version=version)).first() link_id = get_package_version_link_id_query( session, (parent_package_id, child_package_id)).one_or_none() if not link_id: session.add( PackageLink( child_package_id=child_package_id, parent_package_id=parent_package_id, )) session.commit() link_id = get_package_version_link_id_query( session, (parent_package_id, child_package_id)).first() link_ids.append(link_id) session.add( PackageGraph( root_package_version_id=get_package_version_id_query( session, task_data["root"]).first() if task_data["root"] else None, link_ids=link_ids, package_manager="yarn" if "yarn" in task_data["command"] else "npm", package_manager_version=None, )) session.commit()
def create_product( product: CreateProductSchema, db: sa.orm.Session = get_db, ) -> ProductSchema: """ Create new Product. """ product = product.dict(exclude_unset=True) if "image" in product: product["image"] = b64decode(product["image"]) product = Product(**product) db.add(product) try: db.commit() except sa.exc.IntegrityError: db.rollback() raise HTTPException( status_code=400, detail="A product with that name already exists.", ) return product
def test_columns(ssn: sa.orm.Session): """ Simple test of InstanceHistoryProxy with columns """ # Prepare ssn.add(User(id=1, name='John', age=18)) ssn.commit() # Check initial state user: User = ssn.query(User).get(1) old_user: User = InstanceHistoryProxy(user) # noqa def old_user_is_correct(): assert old_user.id == 1 assert old_user.name == 'John' assert old_user.age == 18 # Modify user.id = 1000 user.name = 'CHANGED' user.age = 1800 old_user_is_correct() # still good # Flush ssn.flush() old_user_is_correct() # still good
def test_does_not_lose_history(ssn: sa.orm.Session): """ Extensive test of InstanceHistoryProxy with query counters and lazy loads """ assert ssn.autoflush == False, 'this test relies on Session.autoflush=False' engine = ssn.get_bind() # Prepare ssn.add(User(id=1, name='John', age=18)) ssn.add(Article(id=1, title='Python', author_id=1)) ssn.commit() # === Test 1: ModelHistoryProxy does not lose history when flushing a session ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) # issues no queries # Modify user.name = 'CHANGED' # History works assert old_user_hist.name == 'John' # Flush ssn.flush() # History is NOT broken! assert old_user_hist.name == 'John' # Change another column after flush; history is still NOT broken! user.age = 1800 assert old_user_hist.age == 18 # correct # Undo ssn.rollback() # === Test 1: ModelHistoryProxy does not lose history when lazyloading a column ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).options(load_only('name')).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) # issues no queries user.name = 'CHANGED' assert old_user_hist.name == 'John' # Load a column with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): user.age # get an unloaded column # History is NOT broken! assert old_user_hist.name == 'John' # === Test 2: ModelHistoryProxy does not lose history when lazyloading a one-to-many relationship ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) user.name = 'CHANGED' assert old_user_hist.name == 'John' # History works # Load a relationship with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): list(user.articles) # History is NOT broken! assert old_user_hist.name == 'John' # === Test 3: ModelHistoryProxy does not lose history when lazyloading a one-to-one relationship ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects article = ssn.query(Article).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_article_hist = InstanceHistoryProxy(article) article.title = 'CHANGED' assert old_article_hist.title == 'Python' # works # Load a relationship with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): article.author # History is NOT broken! assert old_article_hist.title == 'Python' # works
async def handle_push(connection: Connection, session: sqlalchemy.orm.Session) -> Optional[int]: msgs_got = 0 version: Optional[Version] = None async for msg in connection.socket: msgs_got += 1 msg_json = json.loads(msg) pushmsg = PushMessage(msg_json) # print(f"pushmsg: {msg}") if not pushmsg.operations: logger.warn("empty operations list in client PushMessage") for op in pushmsg.operations: logger.info(f"operation: {op}") # await connection.socket.send(f"answer is:{msg}") logger.info(f"message key={pushmsg.key}") latest_version_id = core.get_latest_version_id(session=session) logger.info( f"** version on server:{latest_version_id}, version in pushmsg:{pushmsg.latest_version_id}" ) if latest_version_id != pushmsg.latest_version_id: exc = f"version identifier isn't the latest one; " \ f"incoming: {pushmsg.latest_version_id}, on server:{latest_version_id}" if latest_version_id is None: logger.warn(exc) raise PushRejected(exc) if pushmsg.latest_version_id is None: logger.warn(exc) raise PullSuggested(exc) if pushmsg.latest_version_id < latest_version_id: logger.warn(exc) raise PullSuggested(exc) raise PushRejected(exc) if not pushmsg.islegit(session): raise PushRejected("message isn't properly signed") for listener in before_push: listener(session, pushmsg) # I) detect unique constraint conflicts and resolve them if possible unique_conflicts = find_unique_conflicts(pushmsg, session) conflicting_objects = set() for uc in unique_conflicts: obj = uc['object'] conflicting_objects.add(obj) for key, value in zip(uc['columns'], uc['new_values']): setattr(obj, key, value) for obj in conflicting_objects: make_transient(obj) # remove from session for model in set(type(obj) for obj in conflicting_objects): pk_name = get_pk(model) pks = [ getattr(obj, pk_name) for obj in conflicting_objects if type(obj) is model ] session.query(model).filter(getattr(model, pk_name).in_(pks)). \ delete(synchronize_session=False) # remove from the database session.add_all(conflicting_objects) # reinsert session.flush() # II) perform the operations operations = [ o for o in pushmsg.operations if o.tracked_model is not None ] post_operations: List[Tuple[Operation, SQLClass, Optional[SQLClass]]] = [] try: op: Operation for op in operations: (obj, old_obj) = await op.perform_async(pushmsg, session, pushmsg.node_id, connection.socket) if obj is not None: # if the op has been skipped, it wont be appended for post_operation handling post_operations.append((op, obj, old_obj)) resp = dict(type="info", op=dict( row_id=op.row_id, version=op.version, command=op.command, content_type_id=op.content_type_id, )) call_after_tracking_fn(session, op, obj) await connection.socket.send(json.dumps(resp)) except OperationError as e: logger.exception( "Couldn't perform operation in push from node %s.", pushmsg.node_id) raise PushRejected("at least one operation couldn't be performed", *e.args) # III) insert a new version if post_operations: # only if operations have been done -> create the new version version = Version(created=datetime.datetime.now(), node_id=pushmsg.node_id) session.add(version) # IV) insert the operations, discarding the 'order' column accomplished_operations = [ op for (op, obj, old_obj) in post_operations ] for op in sorted(accomplished_operations, key=attr('order')): new_op = Operation() for k in [k for k in properties_dict(op) if k != 'order']: setattr(new_op, k, getattr(op, k)) session.add(new_op) new_op.version = version session.flush() for op, obj, old_obj in post_operations: op.call_after_operation_fn(session, obj, old_obj) # from woodmaster.model.sql.model import WoodPile, Measurement # orphans = session.query(Measurement).filter(Measurement.woodpile_id == None).all() # print(f"orphans:{orphans}") for listener in after_push: listener(session, pushmsg) # return the new version id back to the client logger.info(f"version is: {version}") if version: await connection.socket.send( json.dumps( dict(type="result", new_version_id=version.version_id))) return {'new_version_id': version.version_id} else: await connection.socket.send( json.dumps(dict(type="result", new_version_id=None))) logger.info("sent nothing message") await connection.socket.close() logger.info("push ready")
def insert_package_audit(session: sqlalchemy.orm.Session, task_data: Dict) -> None: is_yarn_cmd = bool("yarn" in task_data["command"]) # NB: yarn has .advisory and .resolution # the same advisory JSON (from the npm DB) is # at .advisories{k, v} for npm and .advisories[].advisory for yarn advisories = ((item.get("advisory", None) for item in task_data.get("advisories", [])) if is_yarn_cmd else task_data.get("advisories", dict()).values()) non_null_advisories = (adv for adv in advisories if adv) for advisory in non_null_advisories: advisory_fields = extract_nested_fields( advisory, { "package_name": ["module_name"], "npm_advisory_id": ["id"], "vulnerable_versions": ["vulnerable_versions"], "patched_versions": ["patched_versions"], "created": ["created"], "updated": ["updated"], "url": ["url"], "severity": ["severity"], "cves": ["cves"], "cwe": ["cwe"], "exploitability": ["metadata", "exploitability"], "title": ["title"], }, ) advisory_fields["cwe"] = int(advisory_fields["cwe"].lower().replace( "cwe-", "")) advisory_fields["language"] = "node" advisory_fields["vulnerable_package_version_ids"] = [] get_node_advisory_id_query( session, advisory_fields).one_or_none() or session.add( Advisory(**advisory_fields)) session.commit() # TODO: update other advisory fields too impacted_versions = set( finding.get("version", None) for finding in advisory.get("findings", []) if finding.get("version", None)) db_advisory = (session.query( Advisory.id, Advisory.vulnerable_package_version_ids).filter_by( language="node", url=advisory["url"]).first()) impacted_version_package_ids = list( vid for result in session.query(PackageVersion.id).filter( PackageVersion.name == advisory_fields["package_name"], PackageVersion.version.in_(impacted_versions), ).all() for vid in result) if len(impacted_versions) != len(impacted_version_package_ids): log.warning( f"missing package versions for {advisory_fields['package_name']!r}" f" in the db or misparsed audit output version:" f" {impacted_versions} {impacted_version_package_ids}") if db_advisory.vulnerable_package_version_ids is None: session.query(Advisory.id).filter_by(id=db_advisory.id).update( dict(vulnerable_package_version_ids=list())) # TODO: lock the row? vpvids = set( list( session.query(Advisory).filter_by( id=db_advisory.id).first().vulnerable_package_version_ids)) vpvids.update(set(impacted_version_package_ids)) session.query(Advisory.id).filter_by(id=db_advisory.id).update( dict(vulnerable_package_version_ids=sorted(vpvids))) session.commit()
def insert_npmsio_data(session: sqlalchemy.orm.Session, source: Generator[Dict[str, Any], None, None]) -> None: for line in source: fields = extract_nested_fields( line, { "package_name": ["collected", "metadata", "name"], "package_version": ["collected", "metadata", "version"], "analyzed_at": ["analyzedAt" ], # e.g. "2019-11-27T19:31:42.541Z" # overall score from .score.final on the interval [0, 1] "score": ["score", "final"], # score components on the interval [0, 1] "quality": ["score", "detail", "quality"], "popularity": ["score", "detail", "popularity"], "maintenance": ["score", "detail", "maintenance"], # score subcomponent/detail fields from .evaluation.<component>.<subcomponent> # generally frequencies and subscores are decimals between [0, 1] # or counts of downloads, stars, etc. # acceleration is signed (+/-) "branding": ["evaluation", "quality", "branding"], "carefulness": ["evaluation", "quality", "carefulness"], "health": ["evaluation", "quality", "health"], "tests": ["evaluation", "quality", "tests"], "community_interest": ["evaluation", "popularity", "communityInterest"], "dependents_count": ["evaluation", "popularity", "dependentsCount"], "downloads_acceleration": [ "evaluation", "popularity", "downloadsAcceleration", ], "downloads_count": ["evaluation", "popularity", "downloadsCount"], "commits_frequency": ["evaluation", "maintenance", "commitsFrequency"], "issues_distribution": [ "evaluation", "maintenance", "issuesDistribution", ], "open_issues": ["evaluation", "maintenance", "openIssues"], "releases_frequency": [ "evaluation", "maintenance", "releasesFrequency", ], }, ) fields[ "source_url"] = f"https://api.npms.io/v2/package/{fields['package_name']}" # only insert new rows if (session.query(NPMSIOScore.id).filter_by( package_name=fields["package_name"], package_version=fields["package_version"], analyzed_at=fields["analyzed_at"], ).one_or_none()): log.debug( f"skipping inserting npms.io score for {fields['package_name']}@{fields['package_version']}" f" analyzed at {fields['analyzed_at']}") else: session.add(NPMSIOScore(**fields)) session.commit() log.info( f"added npms.io score for {fields['package_name']}@{fields['package_version']}" f" analyzed at {fields['analyzed_at']}")
def insert_npm_registry_data( session: sqlalchemy.orm.Session, source: Generator[Dict[str, Any], None, None]) -> None: for line in source: # save version specific data for version, version_data in line["versions"].items(): fields = extract_nested_fields( version_data, { "package_name": ["name"], "package_version": ["version"], "shasum": ["dist", "shasum"], "tarball": ["dist", "tarball"], "git_head": ["gitHead"], "repository_type": ["repository", "type"], "repository_url": ["repository", "url"], "description": ["description"], "url": ["url"], "license_type": ["license"], "keywords": ["keywords"], "has_shrinkwrap": ["_hasShrinkwrap"], "bugs_url": ["bugs", "url"], "bugs_email": ["bugs", "email"], "author_name": ["author", "name"], "author_email": ["author", "email"], "author_url": ["author", "url"], "maintainers": ["maintainers"], "contributors": ["contributors"], "publisher_name": ["_npmUser", "name"], "publisher_email": ["_npmUser", "email"], "publisher_node_version": ["_nodeVersion"], "publisher_npm_version": ["_npmVersion"], }, ) # license can we a string e.g. 'MIT' # or dict e.g. {'type': 'MIT', 'url': 'https://github.com/jonschlinkert/micromatch/blob/master/LICENSE'} fields["license_url"] = None if isinstance(fields["license_type"], dict): fields["license_url"] = fields["license_type"].get("url", None) fields["license_type"] = fields["license_type"].get( "type", None) # looking at you [email protected].{3,4} with: # [{"name": "StrongLoop", "url": "http://strongloop.com/license/"}, "MIT"], if not ((isinstance(fields["license_type"], str) or fields["license_type"] is None) and (isinstance(fields["license_url"], str) or fields["license_url"] is None)): log.warning( f"skipping weird license format {fields['license_type']}") fields["license_url"] = None fields["license_type"] = None # published_at .time[<version>] e.g. '2014-05-23T21:21:04.170Z' (not from # the version info object) # where time: an object mapping versions to the time published, along with created and modified timestamps fields["published_at"] = get_in(line, ["time", version]) fields["package_modified_at"] = get_in(line, ["time", "modified"]) fields[ "source_url"] = f"https://registry.npmjs.org/{fields['package_name']}" if (session.query(NPMRegistryEntry.id).filter_by( package_name=fields["package_name"], package_version=fields["package_version"], shasum=fields["shasum"], tarball=fields["tarball"], ).one_or_none()): log.debug( f"skipping inserting npm registry entry for {fields['package_name']}@{fields['package_version']}" f" from {fields['tarball']} with sha {fields['shasum']}") else: session.add(NPMRegistryEntry(**fields)) session.commit() log.info( f"added npm registry entry for {fields['package_name']}@{fields['package_version']}" f" from {fields['tarball']} with sha {fields['shasum']}")