示例#1
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                localStorage.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                localStorage.page_url,
                localStorage.scope,
                localStorage.KEY,
                localStorage.value
                FROM localStorage
                INNER JOIN crawl ON localStorage.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id                        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # flash cookie model
            local_storage_model = LocalStorageModel()
            local_storage_model.id = row[0]
            local_storage_model.crawl_id = crawl_model
            local_storage_model.page_url = row[12]
            local_storage_model.scope = row[13]
            local_storage_model.KEY = row[14]
            local_storage_model.value = row[15]

            model_list.append(local_storage_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#2
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT         
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time,
        CrawlHistory.command,
        CrawlHistory.arguments,
        CrawlHistory.bool_success,
        CrawlHistory.dtg
        FROM CrawlHistory
        INNER JOIN crawl ON CrawlHistory.crawl_id = crawl.crawl_id
        INNER JOIN task ON crawl.task_id = task.task_id        
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[1]
            task_model.start_time = row[2]
            task_model.manager_params = row[3]
            task_model.openwpm_version = row[4]
            task_model.browser_version = row[5]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[0]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[6]
            crawl_model.screen_res = row[7]
            crawl_model.ua_string = row[8]
            crawl_model.finished = row[9]
            crawl_model.start_time = row[10]

            # crawl history model
            crawl_history_model = CrawlHistoryModel()
            crawl_history_model.crawl_id = crawl_model
            crawl_history_model.command = row[11]
            crawl_history_model.arguments = row[12]
            crawl_history_model.bool_success = row[13]
            crawl_history_model.timestamp = row[14]
            model_list.append(crawl_history_model)

        cursor.close()
        self.disconnect()
        return model_list
示例#3
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT         
        site_visits.visit_id,
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time,
        site_visits.site_url
        FROM site_visits
        INNER JOIN crawl ON site_visits.crawl_id = crawl.crawl_id
        INNER JOIN task ON crawl.task_id = task.task_id        
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # crawl history model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[0]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[12]
            model_list.append(site_visits_model)

        cursor.close()
        self.disconnect()
        return model_list
示例#4
0
def users(event, context):
  """
  taskに所属するuser一覧を返す
  """
  try:
    logger.info(event)
    if not event['pathParameters']:
      raise errors.BadRequest('Bad request')
    task_id = event['pathParameters']['id']

    # taskの取得
    try:
      task = TaskModel.get(task_id)
    except TaskModel.DoesNotExist:
      raise errors.NotFound('The task does not exist')
    if not task.userIds:
      task.userIds = []
    # usersの取得
    try:
      users = task.get_users()
    except UserModel.DoesNotExist as e:
      logger.exception(e)
      raise errors.InternalError('Internal server error')


    return {
        'statusCode': 200,
        'headers': {
          'Access-Control-Allow-Origin': '*',
          'Content-Type': 'application/json'
        },
        'body': json.dumps(
          {
            'statusCode': 200,
            'taskId': task_id,
            'users': [dict(user) for user in users]
          }
        )
      }

  except errors.BadRequest as e:
    logger.exception(e)
    return build_response(e, 400)

  except errors.NotFound as e:
    logger.exception(e)
    return build_response(e, 404)

  except errors.InternalError as e:
    logger.exception(e)
    return build_response(e, 500)
示例#5
0
def done_undone(event, context):
    try:
        logger.info(event)
        if not event['pathParameters']:
            raise errors.BadRequest('Bad request')
        task_id = event['pathParameters']['id']

        # done or undone で ture or false
        if re.match('.*/done$', event['resource']):
            flag = True
        else:
            flag = False

        # taskを取得
        try:
            task = TaskModel.get(task_id)
        except TaskModel.DoesNotExist:
            raise errors.NotFound('The task does not exist')

        # taskを更新
        try:
            task.status_update(flag)
        except UpdateError as e:
            logger.exception(e)
            raise errors.InternalError('Internal server error')

        return {
            'statusCode': 200,
            'headers': {
                'Access-Control-Allow-Origin': '*',
                'Content-Type': 'application/json'
            },
            'body': json.dumps({
                'statusCode': 200,
                'task': dict(task)
            })
        }

    except errors.BadRequest as e:
        logger.exception(e)
        return build_response(e, 400)

    except errors.NotFound as e:
        logger.exception(e)
        return build_response(e, 404)

    except errors.InternalError as e:
        logger.exception(e)
        return build_response(e, 500)
示例#6
0
def delete(event, context):
    """
  delteFlagをfalseに変更
  """
    try:
        logger.info(event)
        if not event['pathParameters']:
            raise errors.BadRequest('Bad request')
        task_id = event['pathParameters']['id']

        # taskの取得
        try:
            task = TaskModel.get(task_id)
        except TaskModel.DoesNotExist:
            raise errors.NotFound('The task does not exist')

        # taskの削除
        try:
            task.logic_delete()
        except UpdateError as e:
            logger.exception(e)
            raise errors.InternalError('Internal server error')

        return {
            'statusCode': 200,
            'headers': {
                'Access-Control-Allow-Origin': '*',
                'Content-Type': 'application/json'
            },
            'body': json.dumps({'statusCode': 200})
        }

    except errors.BadRequest as e:
        logger.exception(e)
        return build_response(e, 400)

    except errors.NotFound as e:
        logger.exception(e)
        return build_response(e, 404)

    except errors.InternalError as e:
        logger.exception(e)
        return build_response(e, 500)
示例#7
0
 def select(self):
     model_list = []
     self.connect()
     cursor = self.connection.cursor()
     query = "SELECT * FROM task"
     cursor.execute(query)
     rows = cursor.fetchall()
     for row in rows:
         model = TaskModel()
         model.task_id = row[0]
         model.start_time = row[1]
         model.manager_params = row[2]
         model.openwpm_version = row[3]
         model.browser_version = row[4]
         model_list.append(model)
     cursor.close()
     self.disconnect()
     return model_list
示例#8
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
        SELECT 
        crawl.crawl_id,
        task.task_id,
        task.start_time,
        task.manager_params,
        task.openwpm_version,
        task.browser_version,
        crawl.browser_params,
        crawl.screen_res,
        crawl.ua_string,
        crawl.finished,
        crawl.start_time
        FROM crawl
        INNER JOIN task on crawl.task_id = task.task_id
        """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[1]
            task_model.start_time = row[2]
            task_model.manager_params = row[3]
            task_model.openwpm_version = row[4]
            task_model.browser_version = row[5]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[0]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[6]
            crawl_model.screen_res = row[7]
            crawl_model.ua_string = row[8]
            crawl_model.finished = row[9]
            crawl_model.start_time = row[10]
            model_list.append(crawl_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#9
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                flash_cookies.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                flash_cookies.domain,
                flash_cookies.filename,
                flash_cookies.local_path,
                flash_cookies.key,
                flash_cookies.content
                FROM flash_cookies
                INNER JOIN crawl ON flash_cookies.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON flash_cookies.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            flash_cookie_model = FlashCookiesModel()
            flash_cookie_model.id = row[0]
            flash_cookie_model.crawl_id = crawl_model
            flash_cookie_model.visit_id = site_visits_model
            flash_cookie_model.domain = row[14]
            flash_cookie_model.filename = row[15]
            flash_cookie_model.local_path = row[16]
            flash_cookie_model.key = row[17]
            flash_cookie_model.content = row[18]

            model_list.append(flash_cookie_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#10
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                profile_cookies.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                profile_cookies.baseDomain,
                profile_cookies.name,
                profile_cookies.value,
                profile_cookies.host,
                profile_cookies.path,
                profile_cookies.expiry,
                profile_cookies.accessed,
                profile_cookies.creationTime,
                profile_cookies.isSecure,
                profile_cookies.isHttpOnly
                FROM profile_cookies
                INNER JOIN crawl ON profile_cookies.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON profile_cookies.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            profile_cookies_model = ProfileCookiesModel()
            profile_cookies_model.id = row[0]
            profile_cookies_model.crawl_id = crawl_model
            profile_cookies_model.visit_id = site_visits_model
            profile_cookies_model.baseDomain = row[14]
            profile_cookies_model.name = row[15]
            profile_cookies_model.value = row[16]
            profile_cookies_model.host = row[17]
            profile_cookies_model.path = row[18]
            profile_cookies_model.expiry = row[19]
            profile_cookies_model.accessed = row[20]
            profile_cookies_model.creationTime = row[21]
            profile_cookies_model.isSecure = row[22]
            profile_cookies_model.isHttpOnly = row[23]

            model_list.append(profile_cookies_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#11
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                javascript.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                javascript.script_url,
                javascript.script_line,
                javascript.script_col,
                javascript.func_name,
                javascript.script_loc_eval,
                javascript.call_stack,
                javascript.symbol,
                javascript.operation,
                javascript.value,
                javascript.arguments,
                javascript.time_stamp
                FROM javascript
                INNER JOIN crawl ON javascript.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON javascript.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            javascript_model = JavascriptModel()
            javascript_model.id = row[0]
            javascript_model.crawl_id = crawl_model
            javascript_model.visit_id = site_visits_model
            javascript_model.script_url = row[14]
            javascript_model.script_line = row[15]
            javascript_model.script_col = row[16]
            javascript_model.func_name = row[17]
            javascript_model.script_loc_eval = row[18]
            javascript_model.call_stack = row[19]
            javascript_model.symbol = row[20]
            javascript_model.operation = row[21]
            javascript_model.value = row[22]
            javascript_model.arguments = row[23]
            javascript_model.time_stamp = row[24]

            model_list.append(javascript_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#12
0
def update(event, context):
    """
  taskをupdate
  nameおよびdescription
  """
    try:
        logger.info(event)
        if not (event['body'] and event['pathParameters']):
            raise errors.BadRequest('Bad request')

        data = json.loads(event['body'])
        # dataから不要なattributeを削除
        data = {k: v for k, v in data.items() if k in ['name', 'description']}
        if not data:
            raise errors.BadRequest('Bad request')
        task_id = event['pathParameters']['id']

        # taskの取得
        try:
            task = TaskModel.get(task_id)
        except TaskModel.DoesNotExist:
            raise errors.NotFound('The task does not exist')
        if 'name' in data:
            task.name = data['name']
        if 'description' in data:
            task.description = data['description']
        if not task.userIds:
            task.userIds = []

        try:
            task.save()
        except InvalidNameError as e:
            logger.exception(e)
            raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2])))
        except InvalidDescriptionError as e:
            logger.exception(e)
            raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2])))
        except PutError as e:
            logger.exception(e)
            raise errors.InternalError('Internal server error')

        return {
            'statusCode': 200,
            'headers': {
                'Access-Control-Allow-Origin': '*',
                'Content-Type': 'application/json'
            },
            'body': json.dumps({
                'statusCode': 200,
                'task': dict(task)
            })
        }

    except errors.BadRequest as e:
        logger.exception(e)
        return build_response(e, 400)

    except errors.NotFound as e:
        logger.exception(e)
        return build_response(e, 404)

    except errors.InternalError as e:
        logger.exception(e)
        return build_response(e, 500)
示例#13
0
def add_remove(event, context):
  """
  userをtaskに追加
  """
  try:
    logger.info(event)
    if not (event['pathParameters'] and event['body']):
      raise errors.BadRequest('Bad request')

    data = json.loads(event['body'])
    # dataから不要なattributeを削除
    data = { k: v for k, v in data.items() if k == 'userIds' }
    if not data:
      raise errors.BadRequest('Bad request')
    else:
      if type(data['userIds']) != list:
        raise errors.BadRequest('"userIds" attribute must be array')
    task_id = event['pathParameters']['id']
    user_ids = data['userIds']

    # taskの取得
    try:
      task = TaskModel.get(task_id)
    except TaskModel.DoesNotExist:
      raise errors.NotFound('The task does not exist')

    # add or remove
    if re.match('.*/add$', event['resource']):
      flag = True
    else:
      flag = False

    # taskのuserIdsを更新
    try:
      task.user_ids_update(user_ids, flag)
    except InvalidUserError as e:
      logger.exception(e)
      raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2])))
    except UpdateError as e:
      logger.exception(e)
      raise errors.InternalError('Internal server error')
    task = TaskModel.get(task_id)

    return {
      'statusCode': 200,
      'headers': {
        'Access-Control-Allow-Origin': '*',
        'Content-Type': 'application/json'
      },
      'body': json.dumps(
        {
          'statusCode': 200,
          'task': dict(task)
        }
      )    
    }

  except errors.BadRequest as e:
    logger.exception(e)
    return build_response(e, 400)

  except errors.NotFound as e:
    logger.exception(e)
    return build_response(e, 404)
  
  except errors.InternalError as e:
    logger.exception(e)
    return build_response(e, 500)

  except Exception as e:
    logger.exception(e)
    return {
      'statusCode': 500,
      'headers': {
        'Access-Control-Allow-Origin': '*',
        'Content-Type': 'application/json'
      },
      'body': json.dumps(
        {
          'statusCode': 500,
          'errorMessage': 'Internal server error'
        }
      )
    }
示例#14
0
def create(event, context):
    """
  taskを作成
  name, descriptionは必須
  userIdsは任意
  """
    try:
        logger.info(event)
        if not (event['body']):
            raise errors.BadRequest('Bad request')
        body = json.loads(event['body'])
        validate_attributes(body)
        if not 'userIds' in body:
            body['userIds'] = []

        task = TaskModel(id=str(uuid.uuid1()),
                         name=body['name'],
                         description=body['description'],
                         taskListId=body['taskListId'],
                         userIds=body['userIds'])

        # taskの保存
        try:
            task.save()
        except InvalidNameError as e:
            logger.exception(e)
            raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2])))
        except InvalidDescriptionError as e:
            logger.exception(e)
            raise errors.BadRequest(str(e.with_traceback(sys.exc_info()[2])))
        except InvalidTaskListError as e:
            logger.exception(e)
            if str(e.with_traceback(
                    sys.exc_info()[2])) == 'The taskList does not exist':
                raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2])))
            else:
                raise errors.BadRequest(
                    str(e.with_traceback(sys.exc_info()[2])))
        except InvalidUserError as e:
            logger.exception(e)
            if str(e.with_traceback(sys.exc_info()[2])
                   ) == 'The userIds contains a invalid userId does not exist':
                raise errors.NotFound(str(e.with_traceback(sys.exc_info()[2])))
            else:
                raise errors.BadRequest(
                    str(e.with_traceback(sys.exc_info()[2])))
        except PutError as e:
            logger.exception(e)
            raise errors.InternalError('Internal server error')

        return {
            'statusCode': 200,
            'headers': {
                'Access-Control-Allow-Origin': '*',
                'Content-Type': 'application/json'
            },
            'body': json.dumps({
                'statusCode': 200,
                'task': dict(task)
            })
        }

    except errors.BadRequest as e:
        logger.exception(e)
        return build_response(e, 400)

    except errors.NotFound as e:
        logger.exception(e)
        return build_response(e, 404)

    except errors.InternalError as e:
        logger.exception(e)
        return build_response(e, 500)
示例#15
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                http_requests.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                http_requests.url,
                http_requests.top_level_url,
                http_requests.method,
                http_requests.referrer,
                http_requests.headers,
                http_requests.channel_id,
                http_requests.is_XHR,
                http_requests.is_frame_load,
                http_requests.is_full_page,
                http_requests.is_third_party_channel,
                http_requests.is_third_party_window,
                http_requests.triggering_origin,
                http_requests.loading_origin,
                http_requests.loading_href,
                http_requests.req_call_stack,
                http_requests.content_policy_type,
                http_requests.post_body,
                http_requests.time_stamp
                FROM http_requests
                INNER JOIN crawl ON http_requests.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON http_requests.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            http_requests_model = HttpRequestsModel()
            http_requests_model.id = row[0]
            http_requests_model.crawl_id = crawl_model
            http_requests_model.visit_id = site_visits_model
            http_requests_model.url = row[14]
            http_requests_model.top_level_url = row[15]
            http_requests_model.method = row[16]
            http_requests_model.referrer = row[17]
            http_requests_model.headers = row[18]
            http_requests_model.channel_id = row[19]
            http_requests_model.is_XHR = row[20]
            http_requests_model.is_frame_load = row[21]
            http_requests_model.is_full_page = row[22]
            http_requests_model.is_third_party_channel = row[23]
            http_requests_model.is_third_party_window = row[24]
            http_requests_model.triggering_origin = row[25]
            http_requests_model.loading_origin = row[26]
            http_requests_model.loading_href = row[27]
            http_requests_model.req_call_stack = row[28]
            http_requests_model.content_policy_type = row[29]
            http_requests_model.post_body = row[30]
            http_requests_model.time_stamp = row[31]

            model_list.append(http_requests_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#16
0
    def select(self):
        model_list = []
        self.connect()
        cursor = self.connection.cursor()
        query = """
                SELECT         
                http_responses.id,
                crawl.crawl_id,
                task.task_id,
                task.start_time,
                task.manager_params,
                task.openwpm_version,
                task.browser_version,
                crawl.browser_params,
                crawl.screen_res,
                crawl.ua_string,
                crawl.finished,
                crawl.start_time,
                site_visits.visit_id,
                site_visits.site_url,
                http_responses.url,
                http_responses.method,
                http_responses.referrer,
                http_responses.response_status,
                http_responses.response_status_text,
                http_responses.is_cached,
                http_responses.headers,
                http_responses.channel_id,
                http_responses.location,
                http_responses.time_stamp,
                http_responses.content_hash
                FROM http_responses
                INNER JOIN crawl ON http_responses.crawl_id = crawl.crawl_id
                INNER JOIN task ON crawl.task_id = task.task_id
                INNER JOIN site_visits ON http_responses.visit_id = site_visits.visit_id        
                """
        cursor.execute(query)
        rows = cursor.fetchall()
        for row in rows:
            # task model
            task_model = TaskModel()
            task_model.task_id = row[2]
            task_model.start_time = row[3]
            task_model.manager_params = row[4]
            task_model.openwpm_version = row[5]
            task_model.browser_version = row[6]

            # crawl model
            crawl_model = CrawlModel()
            crawl_model.crawl_id = row[1]
            crawl_model.task_id = task_model
            crawl_model.browser_params = row[7]
            crawl_model.screen_res = row[8]
            crawl_model.ua_string = row[9]
            crawl_model.finished = row[10]
            crawl_model.start_time = row[11]

            # site visits model
            site_visits_model = SiteVisitsModel()
            site_visits_model.visit_id = row[12]
            site_visits_model.crawl_id = crawl_model
            site_visits_model.site_url = row[13]

            # flash cookie model
            http_responses_model = HttpResponsesModel()
            http_responses_model.id = row[0]
            http_responses_model.crawl_id = crawl_model
            http_responses_model.visit_id = site_visits_model
            http_responses_model.url = row[14]
            http_responses_model.method = row[15]
            http_responses_model.referrer = row[16]
            http_responses_model.response_status = row[17]
            http_responses_model.response_status_text = row[18]
            http_responses_model.is_cached = row[19]
            http_responses_model.headers = row[20]
            http_responses_model.channel_id = row[21]
            http_responses_model.location = row[22]
            http_responses_model.time_stamp = row[23]
            http_responses_model.content_hash = row[24]

            model_list.append(http_responses_model)
        cursor.close()
        self.disconnect()
        return model_list
示例#17
0
 def add_task(self, task_title: str, the_list: ListModel):
     task_id = self.__create_id()
     task = TaskModel(task_id, task_title, the_list.id_)
     self.redis_connection.incr("tasks:index")
     self.redis_connection.rpush("list:%s:tasks" % the_list.id_, task.id_)
     return self.save_task(task)