Exemplo n.º 1
0
def process_file(session: Session, file: File,
                 callback: Callable[[], None]) -> bool:
    if file.processing_started_at:
        return False

    # Claim this file by updating the `processing_started_at` timestamp in such
    # a way that it must not have been set before.
    processing_started_at = datetime.datetime.utcnow()
    result = session.execute(
        update(File.__table__)  # pylint: disable=no-member
        .where(File.id == file.id).where(
            File.processing_started_at.is_(None)).values(
                processing_started_at=processing_started_at))
    if result.rowcount == 0:
        return False

    # If we got this far, `file` is ours to process.
    try:
        callback()
        file.processing_started_at = processing_started_at
        file.processing_completed_at = datetime.datetime.utcnow()
        session.add(file)
        session.commit()
        return True
    except Exception as error:
        session.rollback()
        file.processing_started_at = processing_started_at
        file.processing_completed_at = datetime.datetime.utcnow()
        file.processing_error = str(error)
        session.add(file)
        session.commit()
        if not isinstance(error, UserError):
            raise error
        return True
Exemplo n.º 2
0
    def create(session: Session, repo_dir: str, issue_id: int, comment_id: int,
               author: str, body: str) -> Comment:
        """ Creates a new Comment record.

        Args:
            session (Session): The database session.
            repo_dir (str): The issue repository.
            issue_id (int): The issue identifier.
            comment_id (int): The comment identifier.
            author (str): The comment author.
            body (str): The comment body.

        Raises:
            ValueError: Thrown when missing repo_dir, issue_id, comment_id or author.
            CommentAlreadyExistError:
                Thrown when already exists a comment with the same repo_dir,
                issue_id and comment_id.

        Returns:
            Comment: The comment.
        """
        if not repo_dir or issue_id is None or comment_id is None or not author:
            raise ValueError("You cannot create an issue without a repo_dir, "
                             "an issue_id, a comment_id and an author.")
        try:
            comment: Comment = Comment(repo_dir, issue_id, comment_id, author,
                                       body)
            session.add(comment)
            session.commit()
            return comment
        except IntegrityError as err:
            session.rollback()
            raise CommentAlreadyExistsError from err
Exemplo n.º 3
0
    def deleteKit(self,
                  session: Session,
                  name,
                  version,
                  iteration,
                  force=False):
        """
        Delete kit from the db.

        Raises:
            KitNotFound
            KitInUse
            DbError
        """

        try:
            self._kitsDbHandler.deleteKit(session,
                                          name,
                                          version,
                                          iteration,
                                          force=force)

            session.commit()
        except TortugaException:
            session.rollback()

            raise
        except Exception as ex:
            session.rollback()

            self.getLogger().exception('%s' % ex)
            raise
Exemplo n.º 4
0
    def deleteUsableHardwareProfileFromSoftwareProfile(
            self, session: Session, hardwareProfileName: str,
            softwareProfileName: str) -> None:
        """
        Delete hardwareProfile from softwareProfile

            Returns:
                None
            Throws:
                HardwareProfileNotFound
                SoftwareProfileNotFound
                SoftwareUsesHardwareNotFound
                DbError
        """

        try:
            self._softwareProfilesDbHandler.\
                deleteUsableHardwareProfileFromSoftwareProfile(
                    session, hardwareProfileName, softwareProfileName)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
    def create(session: Session, repo_dir: str, title: str,
               description: str, labels: list) -> Repository:
        """ Creates a new Repository record.

        Args:
            session (Session): The database session.
            repo_dir (str): The repository direction.
            title (str): The repository title.
            description (str): The repository description.
            labels (str): The repository labels.

        Raises:
            ValueError: Thrown when missing repo_dir or title.
            RepositoryAlreadyExistError:
                Thrown when already exists a repository with the same repo_dir.

        Returns:
            Repository: The repository.
        """
        if not repo_dir or not title:
            raise ValueError(
                "You cannot create a repository without a repo_dir and a title.")
        try:
            labels_1: str = json.dumps(labels)
            repo: Repository = Repository(
                repo_dir, title, description, labels_1)
            session.add(repo)
            session.commit()
            return repo
        except IntegrityError as err:
            session.rollback()
            raise RepositoryAlreadyExistError from err
Exemplo n.º 6
0
    def _send_stalled_tis_back_to_scheduler(
            self,
            keys: List[TaskInstanceKey],
            session: Session = NEW_SESSION) -> None:
        try:
            session.query(TaskInstance).filter(
                TaskInstance.filter_for_tis(keys),
                TaskInstance.state == State.QUEUED,
                TaskInstance.queued_by_job_id == self.job_id,
            ).update(
                {
                    TaskInstance.state: State.SCHEDULED,
                    TaskInstance.queued_dttm: None,
                    TaskInstance.queued_by_job_id: None,
                    TaskInstance.external_executor_id: None,
                },
                synchronize_session=False,
            )
            session.commit()
        except Exception:
            self.log.exception("Error sending tasks back to scheduler")
            session.rollback()
            return

        for key in keys:
            self._set_celery_pending_task_timeout(key, None)
            self.running.discard(key)
            celery_async_result = self.tasks.pop(key, None)
            if celery_async_result:
                try:
                    app.control.revoke(celery_async_result.task_id)
                except Exception as ex:
                    self.log.error(
                        "Error revoking task instance %s from celery: %s", key,
                        ex)
Exemplo n.º 7
0
    def deletePartition(self, session: Session, partitionName: str,
                        softwareProfileName: str) -> None:
        """
        Delete node from the db.

            Returns:
                None
            Throws:
                PartitionNotFound
                SoftwareProfileNotFound
                DbError
        """

        try:
            self._softwareProfilesDbHandler.deletePartitionFromSoftwareProfile(
                session, partitionName, softwareProfileName)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 8
0
def execute_sql(session: Session, sql: str) -> ResultProxy:
    """
    execute an sql statement on the database
    
    Parameters
    ----------
    session
        sqlalchemy.orm.session.Session
        session object to be used
    sql
        sql
        the sql statement
 
    Return
    ------
    sqlalchemy.engine.ResultProxy
        DB-API cursor wrapper for results of the query
    """
    if not isinstance(sql, str):
        raise TypeError('sql must be of type str')
    try:
        results = session.execute(sql)
        session.commit()
        session.flush()
        return results
    except Exception as e:
        session.rollback()
        session.flush()
        raise e
Exemplo n.º 9
0
    def deleteAdmin(self, session: Session, softwareProfileName: str,
                    adminUsername: str) -> None:
        """
        Delete an admin from a software profile
        """

        try:
            dbAdmin = self._adminsDbHandler.getAdmin(session, adminUsername)

            dbSoftwareProfile = self._softwareProfilesDbHandler.\
                getSoftwareProfile(session, softwareProfileName)

            if dbAdmin in dbSoftwareProfile.admins:
                dbSoftwareProfile.admins.remove(dbAdmin)
            else:
                raise AdminNotFound(
                    'Admin [%s] not associated with software profile'
                    ' [%s]' % (adminUsername, softwareProfileName))

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 10
0
    def addAdmin(self, session: Session, softwareProfileName: str,
                 adminUsername: str) -> None:
        """
        Add an admin to this software profile

        Raises:
            AdminAlreadyExists
        """

        try:
            dbAdmin = self._adminsDbHandler.getAdmin(session, adminUsername)

            dbSoftwareProfile = self._softwareProfilesDbHandler.\
                getSoftwareProfile(session, softwareProfileName)

            if dbAdmin not in dbSoftwareProfile.admins:
                dbSoftwareProfile.admins.append(dbAdmin)
            else:
                raise AdminAlreadyExists(
                    'Admin [%s] already associated with %s' %
                    (adminUsername, softwareProfileName))

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 11
0
def create_queue_tokens(session: Session, queue: Queue) -> None:
    refresh_object(session, queue)
    now = datetime.datetime.utcnow()
    token_count = session.query(Token).filter(Token.queue == queue).count()
    # Full do nothing but update 'updated
    if token_count >= queue.bucket_size:
        try:
            queue.bucket_updated = now
            session.add(queue)
            session.commit()
        except OperationalError:
            # ?
            session.rollback()
    # calculate how many more tokens would have been created since last bucket update
    token_num, leftover_seconds = divmod(
        (now - queue.bucket_updated).total_seconds(), queue.bucket_rate)
    if token_num >= queue.bucket_size:
        token_num = queue.bucket_size
        leftover_seconds = 0
    try:
        queue.bucket_updated = now - datetime.timedelta(
            seconds=leftover_seconds)
        session.add(queue)
        for _ in range(int(token_num)):
            session.add(Token(queue))
        session.commit()
    except OperationalError:
        # ?
        session.rollback()
Exemplo n.º 12
0
    def deleteComponentFromSoftwareProfile(self, session: Session, componentId,
                                           softwareProfileId):
        """
        Delete component to software profile.

            Returns:
                None
            Throws:
                SoftwareProfileNotFound
                ComponentNotFound
                SoftwareProfileComponentNotFound
                DbError
        """

        try:
            self._softwareProfilesDbHandler.\
                deleteComponentFromSoftwareProfile(
                    session, componentId, softwareProfileId)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self.getLogger().exception('%s' % ex)
            raise
Exemplo n.º 13
0
    def addAdmin(
            self, session: Session, hardwareProfileName,
            adminUsername: str) -> None:
        """
        Add an admin to this hardware profile

        Raises:
            AdminAlreadyExists
        """

        try:
            dbAdmin = self._adminsDbHandler.getAdmin(
                session, adminUsername)

            dbHardwareProfile = self._hardwareProfilesDbHandler.\
                getHardwareProfile(session, hardwareProfileName)

            if dbAdmin in dbHardwareProfile.admins:
                raise AdminAlreadyExists(
                    'The admin %s is already associated with %s.' % (
                        adminUsername, hardwareProfileName))

            dbHardwareProfile.admins.append(dbAdmin)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 14
0
    def deleteAdmin(
            self, session: Session, hardwareProfileName: str,
            adminUsername: str) -> None:
        """
        Delete an admin from a hardware profile

        Raises:
            AdminNotFound
        """

        try:
            dbAdmin = self._adminsDbHandler.getAdmin(session, adminUsername)

            dbHardwareProfile = self._hardwareProfilesDbHandler.\
                getHardwareProfile(session, hardwareProfileName)

            if dbAdmin not in dbHardwareProfile.admins:
                raise AdminNotFound(
                    'Admin user [%s] not associated with %s.' % (
                        adminUsername, hardwareProfileName))

            dbHardwareProfile.admins.remove(dbAdmin)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 15
0
    def deleteParameter(self, session: Session, name: str) -> None:
        """
        Delete parameter from the db.

            Returns:
                None
            Throws:
                ParameterNotFound
                DbError
        """

        try:
            p = self._globalParametersDbHandler.getParameter(session, name)

            self._logger.debug('Deleting parameter [%s]' % (name))

            session.delete(p)

            session.commit()

            self._logger.info('Deleted parameter [%s]' % name)
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 16
0
    def session_scope(self, query_func, need_commit=False):
        '''
        セッションスコープ

        :param function query_func: クエリーを行う関数
        :param bool need_commit: コミットを行う必要のある処理か?

        ``query_func`` の第1引数には SQLAlchemy のセッションオブジェクトが渡される。
        '''
        if not self.engine:
            self.engine = create_engine(self.connection_string)
        session = Session(bind=self.engine)

        try:
            result = query_func(session)
            if need_commit:
                session.commit()
            else:
                session.rollback()
        except:
            session.rollback()
            raise
        finally:
            session.close()

        return result
Exemplo n.º 17
0
    def addUsableHardwareProfileToSoftwareProfile(
            self, session: Session, hardwareProfileName: str,
            softwareProfileName: str) -> None:
        """
         Add hardwareProfile to softwareProfile

            Returns:
                SoftwareUsesHardwareId
            Throws:
                HardwareProfileNotFound
                SoftwareProfileNotFound
                SoftwareUsesHardwareAlreadyExists
                DbError
        """

        try:
            self._softwareProfilesDbHandler.addUsableHardwareProfileToSoftwareProfile(
                session, hardwareProfileName, softwareProfileName)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 18
0
 def create(session: Session, username: str) -> UserSession:
     """ Creates a new user session record.
     ---
     Note:
         Any existing transaction will be committed.
     Parameters:
         - session: The session object.
         - username: The user name string.
     Returns:
         The created UserSession result.
     """
     if not username:
         raise ValueError('A username and a password hash are required.')
     try:
         new_user_session = UserSession(username,
                                        token=str(uuid.uuid4()),
                                        active=True,
                                        created=datetime.now(),
                                        updated=datetime.now())
         session.add(new_user_session)
         session.commit()
         return new_user_session
     except Exception as ex:
         session.rollback()
         raise ex
Exemplo n.º 19
0
    def addKit(self, session: Session, kit):
        """
        Insert kit into the db.

        Raises:
            KitAlreadyExists
            DbError
        """

        try:
            dbKit = self._kitsDbHandler.addKit(session, kit)

            session.commit()

            iteration = dbKit.components[0].os_components[0].os.arch \
                if dbKit.isOs else dbKit.iteration

            kit_descr = format_kit_descriptor(
                dbKit.name, dbKit.version, iteration)

            logmsg = 'Installed OS kit [{0}] successfully' \
                if dbKit.isOs else 'Installed kit [{0}] successfully'

            self._logger.info(logmsg.format(kit_descr))
        except TortugaException:
            session.rollback()

            raise
        except Exception as ex:
            session.rollback()

            self._logger.exception(str(ex))

            raise
Exemplo n.º 20
0
def chowdown_migrate(session: Session, zip_file: Path):

    temp_dir = unpack_zip(zip_file)

    with temp_dir as dir:
        chow_dir = next(Path(dir).iterdir())
        image_dir = app_dirs.TEMP_DIR.joinpath(chow_dir, "images")
        recipe_dir = app_dirs.TEMP_DIR.joinpath(chow_dir, "_recipes")

        failed_recipes = []
        successful_recipes = []
        for recipe in recipe_dir.glob("*.md"):
            try:
                new_recipe = read_chowdown_file(recipe)
                db.recipes.create(session, new_recipe.dict())
                successful_recipes.append(new_recipe.name)
            except Exception as inst:
                session.rollback()
                logger.error(inst)
                failed_recipes.append(recipe.stem)

        failed_images = []
        for image in image_dir.iterdir():
            try:
                if image.stem not in failed_recipes:
                    shutil.copy(image, app_dirs.IMG_DIR.joinpath(image.name))
            except Exception as inst:
                logger.error(inst)
                failed_images.append(image.name)
        report = {"successful": successful_recipes, "failed": failed_recipes}

    migrate_images()
    return report
Exemplo n.º 21
0
    def updateHardwareProfile(
            self, session: Session,
            hardwareProfileObject: HardwareProfile) -> None:
        """
        Update Hardware Profile Object
        """

        try:
            dbHardwareProfile = \
                self._hardwareProfilesDbHandler.getHardwareProfileById(
                    session, hardwareProfileObject.getId())

            self.__populateHardwareProfile(
                session, hardwareProfileObject, dbHardwareProfile)
            self._set_tags(dbHardwareProfile, hardwareProfileObject.getTags())
            session.commit()

        except TortugaException:
            session.rollback()
            raise

        except Exception as ex:
            session.rollback()
            self._logger.exception(str(ex))
            raise
Exemplo n.º 22
0
    def verify_integrity(self, session: Session = None):
        """
        Verifies the DagRun by checking for removed tasks or tasks that are not in the
        database yet. It will set state to removed or add the task if required.

        :param session: Sqlalchemy ORM Session
        :type session: Session
        """
        dag = self.get_dag()
        tis = self.get_task_instances(session=session)

        # check for removed or restored tasks
        task_ids = set()
        for ti in tis:
            task_instance_mutation_hook(ti)
            task_ids.add(ti.task_id)
            task = None
            try:
                task = dag.get_task(ti.task_id)
            except AirflowException:
                if ti.state == State.REMOVED:
                    pass  # ti has already been removed, just ignore it
                elif self.state is not State.RUNNING and not dag.partial:
                    self.log.warning(
                        "Failed to get task '%s' for dag '%s'. "
                        "Marking it as removed.", ti, dag)
                    Stats.incr("task_removed_from_dag.{}".format(dag.dag_id),
                               1, 1)
                    ti.state = State.REMOVED

            should_restore_task = (task
                                   is not None) and ti.state == State.REMOVED
            if should_restore_task:
                self.log.info(
                    "Restoring task '%s' which was previously "
                    "removed from DAG '%s'", ti, dag)
                Stats.incr("task_restored_to_dag.{}".format(dag.dag_id), 1, 1)
                ti.state = State.NONE
            session.merge(ti)

        # check for missing tasks
        for task in dag.task_dict.values():
            if task.start_date > self.execution_date and not self.is_backfill:
                continue

            if task.task_id not in task_ids:
                Stats.incr("task_instance_created-{}".format(task.task_type),
                           1, 1)
                ti = TI(task, self.execution_date)
                task_instance_mutation_hook(ti)
                session.add(ti)

        try:
            session.commit()
        except IntegrityError as err:
            self.log.info(str(err))
            self.log.info('Hit IntegrityError while creating the TIs for '
                          f'{dag.dag_id} - {self.execution_date}.')
            self.log.info('Doing session rollback.')
            session.rollback()
Exemplo n.º 23
0
    def addComponentToSoftwareProfile(self, session: Session, componentId,
                                      softwareProfileId):
        """
        Add component to softwareProfile.

            Returns:
                None
            Throws:
                SoftwareProfileNotFound
                ComponentNotFound
                SoftwareProfileComponentAlreadyExists
                DbError
        """

        try:
            self._softwareProfilesDbHandler.addComponentToSoftwareProfile(
                session, componentId, softwareProfileId)

            session.commit()
        except TortugaException:
            session.rollback()
            raise
        except Exception as ex:
            session.rollback()
            self.getLogger().exception('%s' % ex)
            raise
Exemplo n.º 24
0
def check_table_for_duplicates(table_name: str, uniqueness: List[str],
                               session: Session) -> Iterable[str]:
    """
    Check table for duplicates, given a list of columns which define the uniqueness of the table.

    Call from ``run_duplicates_checks``.

    :param table_name: table name to check
    :param uniqueness: uniqueness constraint to evaluate against
    :param session:  session of the sqlalchemy
    :rtype: str
    """
    table_obj = table(table_name, *[column(x) for x in uniqueness])
    dupe_count = 0
    try:
        subquery = (session.query(
            table_obj,
            func.count().label('dupe_count')).group_by(
                *[text(x) for x in uniqueness]).having(
                    func.count() > literal(1)).subquery())
        dupe_count = session.query(func.sum(subquery.c.dupe_count)).scalar()
    except (exc.OperationalError, exc.ProgrammingError):
        # fallback if tables hasn't been created yet
        session.rollback()
    if dupe_count:
        yield (
            f"Found {dupe_count} duplicate records in table {table_name}. You must de-dupe these "
            f"records before upgrading.  The uniqueness constraint for this table is {uniqueness!r}"
        )
Exemplo n.º 25
0
 def session_rollback(self, session: Session):
     """
     session的rollback操作
     :param session:
     :return:
     """
     session.rollback()
Exemplo n.º 26
0
def update_rows(session: Session,
                model: DeclarativeMeta,
                updates: dict,
                filters: dict = None):
    """
    bulk update rows from model where the criteria in the filters is met by the values in the updates dict
    Parameters
    ----------
    session
        sqlalchemy.orm.session.Session
        session object to be used
    model 
        sqlalchemy.ext.declarative.api.DeclativeMeta
        the sqlalchemy model to use
    updates
        dict
        the fields to update as the keys and their respective values and the dictionary values
    filters
        dict
        filters dict must be in the following structure
        [  {
                'column': {
                    'comparitor': '>=' OR '==' OR '<=' OR '>' OR '<' OR !=
                    'data': str OR int OR float  
                },
                join = "and" OR "or"
            },
            ...Other Columns
        ]
    """
    if not isinstance(updates, dict):
        raise TypeError('updates must be of type dict')

    results = read_rows(session, model, filters)

    check_res = results.first()

    if check_res == None:
        raise NoResultFound(
            f"no rows can be updated because no rows can be found with the following filters: {json.dumps(filters)}"
        )

    matched = results.update(updates)

    if matched == 0:
        raise ValueError(
            f"bad update request, no columns could be matched updates requested: {json.dumps(updates)}"
        )

    try:
        session.commit()
        session.flush()
    except Exception as e:
        # TODO Logging.log.exception()
        session.rollback()
        session.flush()
        raise e
Exemplo n.º 27
0
def return_queue_token(session: Session, token: Token) -> None:
    refresh_object(session, token)
    try:
        token.state = Token.State.ISSUED
        session.add(token)
        session.commit()
    except OperationalError:
        # ?
        session.rollback()
Exemplo n.º 28
0
    def updateNode(self, session: Session, nodeName: str,
                   updateNodeRequest: dict) -> None:
        """
        Calls updateNode() method of resource adapter
        """

        self._logger.debug('updateNode(): name=[{0}]'.format(nodeName))

        try:
            #
            # Get the old version for comparison later
            #
            node_old: Node = self.getNode(session, nodeName)

            db_node = self._nodesDbHandler.getNode(session, nodeName)
            if 'nics' in updateNodeRequest:
                nic = updateNodeRequest['nics'][0]
                if 'ip' in nic:
                    db_node.nics[0].ip = nic['ip']
                    db_node.nics[0].boot = True

            adapter = self.__getResourceAdapter(session,
                                                db_node.hardwareprofile)
            adapter.updateNode(session, db_node, updateNodeRequest)
            run_post_install = False

            if 'state' in updateNodeRequest:
                run_post_install = \
                    db_node.state == state.NODE_STATE_ALLOCATED and \
                    updateNodeRequest['state'] == state.NODE_STATE_PROVISIONED
                db_node.state = updateNodeRequest['state']

            session.commit()

            #
            # Fire events as required
            #
            # Get the current/new state of the node from the DB
            #
            node: Node = self.getNode(session, nodeName)
            if node.getState() != node_old.getState():
                NodeStateChanged.fire(node=node.getCleanDict(),
                                      previous_state=node_old.getState())
            if node.getTags() != node_old.getTags():
                NodeTagsChanged.fire(node=node.getCleanDict(),
                                     previous_tags=node_old.getTags())

            if run_post_install:
                self._logger.debug(
                    'updateNode(): run-post-install for node [{0}]'.format(
                        db_node.name))
                self.__scheduleUpdate()

        except Exception:
            session.rollback()
            raise
Exemplo n.º 29
0
 def create_session(self):
     session = Session(self.connection)
     try:
         yield session
         session.commit()
     except Exception as ex:
         session.rollback()
         raise ex
     finally:
         session.close()
Exemplo n.º 30
0
def task_nack(session: Session, task: Task) -> bool:
    refresh_object(session, task)
    task.state = Task.State.WAITING
    session.add(task)
    try:
        session.commit()
    except OperationalError:
        session.rollback()
        return False
    return True
Exemplo n.º 31
0
def session_scope():
    session = Session(get_engine())
    try:
        yield session
        session.commit()
    except:
        session.rollback()
        raise
    finally:
        session.close()
Exemplo n.º 32
0
class VMDatabase(object):
    def __init__(self, path=os.path.expanduser("~/vm.db")):
        engine = create_engine('sqlite:///%s' % path, echo=False)
        metadata.create_all(engine)
        Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)
        self.session = Session()

    def print_state(self):
        for provider in self.getProviders():
            print 'Provider:', provider.name
            for base_image in provider.base_images:
                print '  Base image:', base_image.name
                for snapshot_image in base_image.snapshot_images:
                    print '    Snapshot:', snapshot_image.name, \
                          snapshot_image.state
                for machine in base_image.machines:
                    print '    Machine:', machine.id, machine.name, \
                          machine.state, machine.state_time, machine.ip

    def abort(self):
        self.session.rollback()

    def commit(self):
        self.session.commit()

    def delete(self, obj):
        self.session.delete(obj)

    def getProviders(self):
        return self.session.query(Provider).all()

    def getProvider(self, name):
        return self.session.query(Provider).filter_by(name=name)[0]

    def getResult(self, id):
        return self.session.query(Result).filter_by(id=id)[0]

    def getMachine(self, id):
        return self.session.query(Machine).filter_by(id=id)[0]

    def getMachineByJenkinsName(self, name):
        return self.session.query(Machine).filter_by(jenkins_name=name)[0]

    def getMachineForUse(self, image_name):
        """Atomically find a machine that is ready for use, and update
        its state."""
        image = None
        for machine in self.session.query(Machine).filter(
            machine_table.c.state == READY).order_by(
            machine_table.c.state_time):
            if machine.base_image.name == image_name:
                machine.state = USED
                self.commit()
                return machine
        raise Exception("No machine found for image %s" % image_name)
Exemplo n.º 33
0
 def wrapper(*args, **kwargs):
     ret = func(*args, **kwargs)
     session = Session()
     # tmp
     session._model_changes = {}
     try:
         session.commit()
     except SQLAlchemyError as se:
         session.rollback()
         raise_server_exc(DATABASE_UNKNOWN_ERROR, exc=se)
     finally:
         session.close()
     return ret
Exemplo n.º 34
0
    def create_session(self):
        """
        Create a session context that communicates with the database.

        Commits all changes to the database before closing the session, and if an exception is raised,
        rollback the session.
        """
        session = Session(self.connection)
        try:
            yield session
            session.commit()
        except Exception as ex:
            session.rollback()
            raise ex
        finally:
            session.close()
Exemplo n.º 35
0
    def transaction(cls,
                    session: db_session.Session) -> Iterator[db_session.Session]:
        """
        Provides a transactional context-based database session.

        Args:
            session: The database session instance to wrap

        Yields:
            The wrapped database session instance
        """
        try:
            yield session
            session.commit()
        except:
            session.rollback()
            raise
        finally:
            session.close()
Exemplo n.º 36
0
    def create_session(self):
        """
        Create a session context that communicates with the database.

        Commits all changes to the database before closing the session, and if an exception is raised,
        rollback the session.
        """
        session = Session(self.connection)
        logger.info("Create session {0} with {1}".format(
            id(session), self._public_db_uri(str(self.engine.url))))
        try:
            yield session
            session.commit()
            logger.info("Commit transactions to database")
        except Exception:
            session.rollback()
            logger.exception("Database transactions rolled back")
        finally:
            logger.info("Session {0} with {1} closed".format(
                id(session), self._public_db_uri(str(self.engine.url))))
            session.close()
Exemplo n.º 37
0
result = connection.execute(s)
print "Fetched from Article table before commit"
for row in result:
	print row
#"""

print dir(Article)
#rollback all table savings
yes_or_no = raw_input("Commit Database Transaction?(y/n):")
yes_or_no = yes_or_no.lower()
#default is rollback. so no need to take care 'n' or 'no'.
if yes_or_no=='y' or yes_or_no=='yes':
	session.commit()
	#transaction.commit()	#it will also execute session.flush() if it's not executed.
else:
	session.rollback()
	#transaction.rollback()

aa = articles.alias()
s = sqlalchemy.sql.select([aa.c.body, aa.c.headline, association.c.article_id], aa.c.article_id==association.c.article_id, order_by=[association.c.article_id])
#connection = eng.connect()
result = connection.execute(s).fetchmany(3)
print "Fetched from Article table"
for row in result:
	print row.headline
	print row['headline']
	print row

#drop all tables created by this program
yes_or_no = raw_input("Drop All Relevant Tables?(y/n):")
yes_or_no = yes_or_no.lower()
Exemplo n.º 38
0
class CrawlProcessor(object):

    __VERSION__ = "CrawlProcessor-0.2.1"

    def __init__(self, engine, redis_server, stop_list="keyword_filter.txt"):

        if type(engine) == types.StringType:
            logging.info("Using connection string '%s'" % (engine,))
            new_engine = create_engine(engine, encoding='utf-8', isolation_level="READ COMMITTED")
            if "sqlite:" in engine:
                logging.debug("Setting text factory for unicode compat.")
                new_engine.raw_connection().connection.text_factory = str 
            self._engine = new_engine
        else:
            logging.info("Using existing engine...")
            self._engine = engine
        logging.info("Binding session...")
        self._session = Session(bind=self._engine, autocommit = False)

        if type(stop_list) == types.StringType:
            stop_list_fp = open(stop_list)
        else:
            stop_list_fp = stop_list 

        self.stop_list = set([])
        for line in stop_list_fp:
            self.stop_list.add(line.strip())

        self.cls = DocumentClassifier()
        self.dc  = DomainController(self._engine, self._session)
        self.ac  = ArticleController(self._engine, self._session)
        self.ex  = extract.TermExtractor()
        self.kwc = KeywordController(self._engine, self._session)
        self.swc = SoftwareVersionsController(self._engine, self._session)
        self.redis_kw = redis.Redis(host=redis_server, port=6379, db=1)
        self.redis_dm = redis.Redis(host=redis_server, port=6379, db=2)
        dm_session = Session(bind=self._engine, autocommit = False)
        self.drw = DomainResolutionWorker(dm_session, self.redis_dm)


    def _check_processed(self, item):
        crawl_id, record = item 
        headers, content, url, date_crawled, content_type = record

        path   = self.ac.get_path_fromurl(url)
        domain_identifier = None 
        logging.info("_check_processed: retrieving domain...")
        domain_key = self.dc.get_Domain_key(url)
        while domain_identifier == None:
            domain_identifier = self.drw.get_domain(domain_key)

        it = self._session.query(Article).filter_by(crawl_id = crawl_id).filter_by(domain_id = domain_identifier).filter_by(path = path)
        try:
            it = it.one()
            logging.error("%s: already processed", url)
            return False 
        except sqlalchemy.orm.exc.MultipleResultsFound:
            logging.error("%s: appears to have been already processed multiple times", url)
            return False 
        except sqlalchemy.orm.exc.NoResultFound:
            logging.info("%s: hasn't been processed yet", url)
            return True 

    def process_record(self, item):
        if len(item) != 2:
            raise ValueError(item)
        if not self._check_processed(item):
            return None
        ret, retries = None, 2
        while ret == None and retries > 0:
            try:
                retries -= 1
                ret = self._process_record(item)
            except Exception as ex:
                import traceback
                print >> sys.stderr, ex
                traceback.print_exc()
                raise ex 
        if ret == False:
            return None 
        return ret 


    def _process_record(self, item_arg):

        crawl_id, record = item_arg
        headers, content, url, date_crawled, content_type = record

        assert headers is not None
        assert content is not None 
        assert url is not None 
        assert date_crawled is not None 
        assert content_type is not None 

        status = "Processed"

        # Fix for a seg-fault
        if "nasa.gov" in url:
            return False

        # Sort out the domain
        domain_identifier = None 
        logging.info("Retrieving domain...")
        domain_key = self.dc.get_Domain_key(url)
        while domain_identifier == None:
            domain_identifier = self.drw.get_domain(domain_key)

        domain = self._session.query(Domain).get(domain_identifier)
        assert domain is not None

        # Build database objects 
        path   = self.ac.get_path_fromurl(url)
        article = Article(path, date_crawled, crawl_id, domain, status)
        self._session.add(article)
        classified_by = self.swc.get_SoftwareVersion_fromstr(pysen.__VERSION__)
        assert classified_by is not None

        if content_type != 'text/html':
            logging.error("Unsupported content type: %s", str(content_type))
            article.status = "UnsupportedType"
            return False

        # Start the async transaction to get the plain text
        worker_req_thread = BoilerPipeWorker(content)
        worker_req_thread.start()

        # Whilst that's executing, parse the document 
        logging.info("Parsing HTML...")
        html = BeautifulSoup(content)

        if html is None or html.body is None:
            article.status = "NoContent"
            return False

        # Extract the dates 
        date_dict = pydate.get_dates(html)

        if len(date_dict) == 0:
            status = "NoDates"

        # Detect the language
        lang, lang_certainty = langid.classify(content)

        # Wait for the BoilerPipe thread to complete
        worker_req_thread.join()
        logging.debug(worker_req_thread.result)
        logging.debug(worker_req_thread.version)

        if worker_req_thread.result == None:
            article.status = "NoContent"
            return False

        # If the language isn't English, skip it
        if lang != "en":
            logging.info("language: %s with certainty %.2f - skipping...", lang, lang_certainty)
            article.status = "LanguageError" # Replace with something appropriate
            return False

        content = worker_req_thread.result.encode('ascii', 'ignore')

        # Headline extraction 
        h_counter = 6
        headline = None
        while h_counter > 0:
            tag = "h%d" % (h_counter,)
            found = False 
            for node in html.findAll(tag):
                if node.text in content:
                    headline = node.text 
                    found = True 
                    break 
            if found:
                break
            h_counter -= 1

        # Run keyword extraction 
        keywords = self.ex(content)
        kset     = KeywordSet(self.stop_list)
        nnp_sets_scored = set([])

        for word, freq, amnt in sorted(keywords):
            try:
                nnp_sets_scored.add((word, freq))
            except ValueError:
                break 

        nnp_adj = set([])
        nnp_set = set([])
        nnp_vector = []
        for sentence in sent_tokenize(content):
            text = nltk.word_tokenize(sentence)
            pos  = nltk.pos_tag(text)
            pos_groups = itertools.groupby(pos, lambda x: x[1])
            for k, g in pos_groups:
                if k != 'NNP':
                    continue
                nnp_list = [word for word, speech in g]
                nnp_buf = []
                for item in nnp_list:
                    nnp_set.add(item)
                    nnp_buf.append(item)
                    nnp_vector.append(item)
                for i, j in zip(nnp_buf[0:-1], nnp_buf[1:]):
                    nnp_adj.add((i, j))

        nnp_vector = filter(lambda x: x.lower() not in self.stop_list, nnp_vector)
        nnp_counter = Counter(nnp_vector)
        for word in nnp_set:
            score = nnp_counter[word]
            nnp_sets_scored.add((item, score))

        for item, score in sorted(nnp_sets_scored, key=lambda x: x[1], reverse=True):
            try: 
                if type(item) == types.ListType or type(item) == types.TupleType:
                    kset.add(' '.join(item))
                else:
                    kset.add(item)
            except ValueError:
                break 

        scored_nnp_adj = []
        for item1, item2 in nnp_adj:
            score = nnp_counter[item1] + nnp_counter[item2]
            scored_nnp_adj.append((item1, item2, score))

        nnp_adj = []
        for item1, item2, score in sorted(scored_nnp_adj, key=lambda x: x[1], reverse=True):
            if len(nnp_adj) < KEYWORD_LIMIT:
                nnp_adj.append((item1, item2))
            else:
                break

        # Generate list of all keywords
        keywords = set([])
        for keyword in kset:
            try:
                k = Keyword(keyword)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)
                continue
        for item1, item2 in nnp_adj:
            try:
                k = Keyword(item1)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)
            try:
                k = Keyword(item2)
                keywords.add(k)
            except ValueError as ex:
                logging.error(ex)

        # Resolve keyword identifiers
        keyword_resolution_worker = KeywordResolutionWorker(set([k.word for k in keywords]), self.redis_kw)
        keyword_resolution_worker.start()
            
        # Run sentiment analysis
        trace = []
        features = self.cls.classify(worker_req_thread.result, trace) 
        label, length, classified, pos_sentences, neg_sentences,\
        pos_phrases, neg_phrases  = features[0:7]        

        # Convert Pysen's model into database models
        try:
            doc = Document(article.id, label, length, pos_sentences, neg_sentences, pos_phrases, neg_phrases, headline)
        except ValueError as ex:
            logging.error(ex)
            logging.error("Skipping this document...")
            article.status = "ClassificationError"
            return False

        self._session.add(doc)
        extracted_phrases = set([])
        for sentence, score, phrase_trace in trace:
            sentence_type = "Unknown"
            for node in html.findAll(text=True):
                if sentence.text in node.strip():
                    sentence_type = node.parent.name.upper()
                    break

            if sentence_type not in ["H1", "H2", "H3", "H4", "H5", "H6", "P", "Unknown"]:
                sentence_type = "Other"

            label, average, prob, pos, neg, probs, _scores = score 

            s = Sentence(doc, label, average, prob, sentence_type)
            self._session.add(s)
            for phrase, prob, score, label in phrase_trace:
                p = Phrase(s, score, prob, label)
                self._session.add(p)
                extracted_phrases.add((phrase, p))

        # Wait for keyword resolution to finish
        keyword_resolution_worker.join()
        keyword_mapping = keyword_resolution_worker.out_keywords

        # Associate extracted keywords with phrases
        keyword_objects, short_keywords = kset.convert(keyword_mapping, self.kwc)
        for k in keyword_objects:
            self._session.merge(k)
        for p, p_obj in extracted_phrases:
            for k in keyword_objects:
                if k.word in p.get_text():
                    nk = KeywordIncidence(k, p_obj)

        # Save the keyword adjacency list
        for i, j in kset.convert_adj_tuples(nnp_adj, keyword_mapping, self.kwc):
            self._session.merge(i)
            self._session.merge(j)
            kwa = KeywordAdjacency(i, j, doc)
            self._session.add(kwa)

        # Build date objects
        for key in date_dict:
            rec  = date_dict[key]
            if "dates" not in rec:
                logging.error("OK: 'dates' is not in a pydate result record.")
                continue
            dlen = len(rec["dates"])
            if rec["text"] not in content:
                logging.debug("'%s' is not in %s", rec["text"], content)
                continue
            if dlen > 1:
                for date, day_first, year_first in rec["dates"]:
                    try:
                        dobj = AmbiguousDate(date, doc, day_first, year_first, rec["prep"], key)
                    except ValueError as ex:
                        logging.error(ex)
                        continue
                    self._session.add(dobj)
            elif dlen == 1:
                for date, day_first, year_first in rec["dates"]:
                    dobj = CertainDate(date, doc, key)
                    self._session.add(dobj)
            else:
                logging.error("'dates' in a pydate result set contains no records.")

        # Process links
        for link in html.findAll('a'):
            if not link.has_attr("href"):
                logging.debug("skipping %s: no href", link)
                continue

            process = True 
            for node in link.findAll(text=True):
                if node not in worker_req_thread.result:
                    process = False 
                    break 
            
            if not process:
                logging.debug("skipping %s because it's not in the body text", link)
                break

            href, junk, junk = link["href"].partition("#")
            if "http://" in href:
                try:

                    domain_id = None 
                    domain_key = self.dc.get_Domain_key(href)
                    while domain_id is None:
                        domain_id = self.drw.get_domain(domain_key)

                    assert domain_id is not None
                    href_domain = self._session.query(Domain).get(domain_id)
                except ValueError as ex:
                    logging.error(ex)
                    logging.error("Skipping this link")
                    continue
                href_path   = self.ac.get_path_fromurl(href)
                lnk = AbsoluteLink(doc, href_domain, href_path)
                self._session.add(lnk)
                logging.debug("Adding: %s", lnk)
            else:
                href_path  = href 
                try:
                    lnk = RelativeLink(doc, href_path)
                except ValueError as ex:
                    logging.error(ex)
                    logging.error("Skipping link")
                    continue
                self._session.add(lnk)
                logging.debug("Adding: %s", lnk)

        # Construct software involvment records
        self_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(self.__VERSION__), "Processed", doc)
        date_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(pydate.__VERSION__), "Dated", doc)
        clas_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(pysen.__VERSION__), "Classified", doc)
        extr_sir = SoftwareInvolvementRecord(self.swc.get_SoftwareVersion_fromstr(worker_req_thread.version), "Extracted", doc)

        for sw in [self_sir, date_sir, clas_sir, extr_sir]:
            self._session.merge(sw, load=True)

        logging.debug("Domain: %s", domain)
        logging.debug("Path: %s", path)
        article.status = status

        # Commit to database, return True on success
        try:
            self._session.commit()
        except OperationalError as ex:
            logging.error(ex)
            self._session.rollback()
            return None

        return article.id

    def finalize(self):
        self._session.commit()
Exemplo n.º 39
0
class InboxSession(object):
    """ Inbox custom ORM (with SQLAlchemy compatible API).

    Parameters
    ----------
    engine : <sqlalchemy.engine.Engine>
        A configured database engine to use for this session
    versioned : bool
        Do you want to enable the transaction log?
    ignore_soft_deletes : bool
        Whether or not to ignore soft-deleted objects in query results.
    namespace_id : int
        Namespace to limit query results with.
    """
    def __init__(self, engine, versioned=True, ignore_soft_deletes=True,
                 namespace_id=None):
        # TODO: support limiting on namespaces
        assert engine, "Must set the database engine"

        args = dict(bind=engine, autoflush=True, autocommit=False)
        self.ignore_soft_deletes = ignore_soft_deletes
        if ignore_soft_deletes:
            args['query_cls'] = InboxQuery
        self._session = Session(**args)

        if versioned:
            from inbox.models.transaction import create_revisions

            @event.listens_for(self._session, 'after_flush')
            def after_flush(session, flush_context):
                """
                Hook to log revision snapshots. Must be post-flush in order to
                grab object IDs on new objects.
                """
                create_revisions(session)

    def query(self, *args, **kwargs):
        q = self._session.query(*args, **kwargs)
        if self.ignore_soft_deletes:
            return q.options(IgnoreSoftDeletesOption())
        else:
            return q

    def add(self, instance):
        if not self.ignore_soft_deletes or not instance.is_deleted:
            self._session.add(instance)
        else:
            raise Exception("Why are you adding a deleted object?")

    def add_all(self, instances):
        if True not in [i.is_deleted for i in instances] or \
                not self.ignore_soft_deletes:
            self._session.add_all(instances)
        else:
            raise Exception("Why are you adding a deleted object?")

    def delete(self, instance):
        if self.ignore_soft_deletes:
            instance.mark_deleted()
            # just to make sure
            self._session.add(instance)
        else:
            self._session.delete(instance)

    def begin(self):
        self._session.begin()

    def commit(self):
        self._session.commit()

    def rollback(self):
        self._session.rollback()

    def flush(self):
        self._session.flush()

    def close(self):
        self._session.close()

    def expunge(self, obj):
        self._session.expunge(obj)

    def merge(self, obj):
        return self._session.merge(obj)

    @property
    def no_autoflush(self):
        return self._session.no_autoflush