Пример #1
0
def filterEvents(query,
                 enames=None,
                 lvls=None,
                 lvlsets=None,
                 workers=None,
                 assignments=None,
                 typ=None):
    if enames is not None:
        query = query.filter(Event.experiment.in_(enames))
    if lvls is not None:
        query = query.filter(
            func.json_extract(Event.payload, "$.lvlid").in_(lvls))
    if lvlsets is not None:
        query = query.filter(
            func.json_extract(Event.payload, "$.lvlset").in_(lvlsets))
    if workers is not None:
        query = query.filter(
            func.json_extract(Event.payload, "$.workerId").in_(workers))
    if assignments is not None:
        query = query.filter(
            func.json_extract(Event.payload,
                              "$.assignmentId").in_(assignments))

    if typ is not None:
        query = query.filter(Event.type == typ)

    return query
Пример #2
0
    def get_balance(self):
        if self.owner not in Wallet.ledger:
            balance = 0
            q = '%%"%s"%%' % self.owner
            blocks = self.session.query(
                func.json_extract(Block.data, '$.transactions[*].to'),
                func.json_extract(Block.data, '$.transactions[*].from'),
                func.json_extract(Block.data, '$.transactions[*].amount'),
            ).filter(
                or_(
                    func.json_extract(Block.data,
                                      '$.transactions[*].to').like(q),
                    func.json_extract(
                        Block.data, '$.transactions[*].from').like(q))).all()

            for block in blocks:
                _t, _f, _a = block
                _t = json.loads(_t)
                _f = json.loads(_f)
                _a = json.loads(_a)
                data = [{
                    't': t,
                    'f': f,
                    'a': a
                } for t, f, a in zip(_t, _f, _a)]

                for transaction in data:
                    if transaction['f'] == self.owner:
                        balance -= transaction['a']
                    elif transaction['t'] == self.owner:
                        balance += transaction['a']

            Wallet.ledger[self.owner] = balance

        return Wallet.ledger[self.owner]
Пример #3
0
def get_crawlingtask_info(objectid: str, PIC: str, actionid: str):

    if actionid == V4CrawlingTaskActionMaster.ARTIST_ALBUM_IMAGE:
        url = "url"
    else:
        url = "youtube_url"

    get_crawlingtask_info = (db_session.query(
        Crawlingtask.id,
        Crawlingtask.objectid,
        func.json_extract(Crawlingtask.taskdetail, f"$.{url}").label(
            "url"),
        func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label(
            "when_exists"),
        Crawlingtask.status

    )
        .select_from(Crawlingtask)
        .filter(Crawlingtask.objectid == objectid,
                Crawlingtask.actionid == actionid,
                func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC,
                # to delete
                # func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id") == "1A67A5F1E0D84FB9B48234AE65086375",
                )
        .order_by(
        Crawlingtask.created_at.desc())
    ).first()
    return get_crawlingtask_info
Пример #4
0
def get_one_youtube_url_and_youtube_uploader_by_youtube_url(youtube_url: str):
    return db_session.query(DataSource).filter((DataSource.valid == 1),
                                               func.json_extract(DataSource.info, "$.source.uploader") != None,
                                               func.json_extract(DataSource.info, "$.source.title") != None,
                                               DataSource.source_uri == youtube_url,
                                               func.json_extract(DataSource.info, "$.source.title") != "_",
                                               ).order_by(
        DataSource.created_at.desc()).limit(1).all()
Пример #5
0
def events(session, **kwargs):
    query = session.query(
        Event,
        func.json_extract(Event.payload, "$.lvlid").label("lvlid"),
        func.json_extract(Event.payload, "$.lvlset").label("lvlset"),
        func.json_extract(Event.payload, "$.workerId").label("workerId"),
        func.json_extract(Event.payload,
                          "$.assignmentId").label("assignmentId"))
    return filterEvents(query, **kwargs).all()
Пример #6
0
def get_track_wiki(trackid: tuple):
    track_wiki = (db_session.query(Track.id,
                                   Track.title,
                                   Track.artist,
                                   func.json_extract(Track.info, "$.wiki_url").label("wiki_url"),
                                   func.json_extract(Track.info, "$.wiki.brief").label("wiki_content")
                                   )
                  .select_from(Track)
                  .filter(Track.id.in_(trackid))
                  )
    return track_wiki
Пример #7
0
def get_album_wiki(albumuuid: tuple):
    album_wiki = (db_session.query(Album.uuid,
                                   Album.title,
                                   Album.artist,
                                   func.json_extract(Album.info, "$.wiki_url").label("wiki_url"),
                                   func.json_extract(Album.info, "$.wiki.brief").label("wiki_content")
                                   )
                  .select_from(Album)
                  .filter(Album.uuid.in_(albumuuid))
                  )
    return album_wiki
Пример #8
0
def generate_json_expansion_values(properties):
    property_query_cols = []
    for prop_i, prop in enumerate(properties):
        prop_col = func.json_extract(metric_properties_j.properties,
                                     f"$[{prop_i}]").label(f"prop_{prop_i}")
        agg_col = func.json_unquote(
            func.json_extract(metric_aggregations_j.aggregations,
                              f"$[{prop_i}]")).label(f"agg_{prop_i}")
        property_query_cols.append(prop_col)
        property_query_cols.append(agg_col)

    return property_query_cols
Пример #9
0
def get_youtube_title_and_youtube_uploader_from_youtube_url(youtube_url: str):
    datasourceid = (db_session.query(
        DataSource.source_uri.label('youtube_url'),
        func.json_extract(DataSource.info, "$.source.title").label(
            "youtube_title"),
        func.json_extract(DataSource.info, "$.source.uploader").label(
            "youtube_uploader"))
                    .select_from(DataSource)
                    .filter(DataSource.source_uri == youtube_url,
                            DataSource.valid == 1
                            )
                    ).group_by(DataSource.track_id).order_by(DataSource.created_at.desc())
    return datasourceid
Пример #10
0
def postpositional_case(conn, joined, *ands):
    return conn.execute(
        select([func.count(tables["ud_mwe"].c.id)]).where(
            and_(
                tables["ud_mwe"].c.typ == MweType.inflection,
                func.json_extract(tables["ud_mwe_token"].c.feats,
                                  "$.Case").isnot(None),
                func.json_extract(tables["ud_mwe_token"].c.feats,
                                  "$.Case") != "Par",
                func.json_extract(tables["ud_mwe_token"].c.feats,
                                  "$.Case") != "Gen",
                func.json_extract(tables["ud_mwe_token"].c.feats, "$.Case") !=
                "Nom", *ands)).select_from(joined_token(joined))).scalar()
Пример #11
0
    def get_lifecycle_state_details(self, session, resource_type_input):
        """Count of lifecycle states of the specified resources.

        Generate/return the count of lifecycle states (ACTIVE, DELETE_PENDING)
        of the specific resource type input (project, folder) for this inventory
        index.

        Args:
            session (object) : session object to work on.
            resource_type_input (str) : resource type to get lifecycle states.

        Returns:
            dict: a (lifecycle state -> count) dictionary
        """
        resource_data = Inventory.resource_data

        details = dict(
            session.query(
                func.json_extract(resource_data, '$.lifecycleState'),
                func.count()).filter(Inventory.inventory_index_id == self.id).
            filter(Inventory.category == 'resource').filter(
                Inventory.resource_type == resource_type_input).group_by(
                    func.json_extract(resource_data,
                                      '$.lifecycleState')).all())

        LOGGER.debug('Lifecycle details for %s:\n%s', resource_type_input,
                     details)

        # Lifecycle can be None if Forseti is installed to a non-org level.
        for key in list(details.keys()):
            if key is None:
                continue
            new_key = key.replace('\"', '').replace('_', ' ')
            new_key = ' - '.join([resource_type_input, new_key])
            details[new_key] = details.pop(key)

        if len(details) == 1 and list(details.keys())[0] is None:
            return {}

        if len(details) == 1:
            # If the lifecycle state is DELETE PENDING or
            # LIFECYCLE STATE UNSPECIFIED the added_key_string
            # will be RESOURCE_TYPE - ACTIVE, which is then set
            # to 0.
            added_key_str = 'ACTIVE'
            if 'ACTIVE' in list(details.keys())[0]:
                added_key_str = 'DELETE PENDING'
            added_key = ' - '.join([resource_type_input, added_key_str])
            details[added_key] = 0

        return details
Пример #12
0
def get_youtube_info_from_trackid(track_ids: list, format_id):
    datasourceid = (db_session.query(DataSource.track_id.label('track_id'),
                                     DataSource.id.label('datasource_id'),
                                     DataSource.source_uri.label('youtube_url'),
                                     func.json_extract(DataSource.info, "$.source.title").label(
                                         "youtube_title"),
                                     func.json_extract(DataSource.info, "$.source.uploader").label(
                                         "youtube_uploader"))
                    .select_from(DataSource)
                    .filter(DataSource.track_id.in_(track_ids),
                            DataSource.valid == 1,
                            DataSource.format_id == format_id)
                    ).group_by(DataSource.track_id).order_by(DataSource.created_at.desc())
    return datasourceid
Пример #13
0
def verified_by_worker(lvl, worker, exp):
    s = session.query(VerifyData)\
      .filter(VerifyData.lvl == lvl)\
      .filter(func.json_extract(VerifyData.config, "$.mode") == "individual")\
      .filter(func.json_extract(VerifyData.config, "$.enames[0]") == exp)\
      .filter(func.json_extract(VerifyData.payload, "$.workers[0]") == worker)
    vs = s.all()
    if (len(vs) == 0):
        assert len(
            events(session, typ='InvariantFound', lvls=[lvl],
                   workers=[worker])) == 0
        return False
    assert (len(vs) == 1), "Not 1 VerifyData entry for {}, {}, {}".format(
        lvl, worker, exp)
    return vs[0].provedflag
Пример #14
0
    def find(cls, publish=None, key_value=None, has_key=None):
        '''
        Return PublishMetadata instances by query arguments

            Args:
                publish  (Publish) : PublishMetadata parent Publish instance.
                key_value  (Tuple) : PublishMetadata match {'key' : 'value}.
                has_key      (str) : PublishMetadata has 'key'.

            Returns:
                A list of PublishMetadata instances matching find arguments.
        '''
        query = cls.query()

        if publish:
            cls.assert_isinstance(publish, 'Publish')
            query = query.filter(cls.publish_id == publish.id)

        if key_value:
            assert isinstance(
                key_value,
                tuple), ('key_value are must be a tuple. Given {}'.format(
                    type(key_value)))
            query = query.filter(PublishMetadata.metadata[key_value[0]] ==
                                 cast(key_value[1], JSON))

        if has_key:
            query = query.filter(
                func.json_extract(PublishMetadata.metadata, '$."{}"'.format(
                    has_key)) != None)

        return query.all()
Пример #15
0
def get_crawl_E5_06_status(ituneid: list):
    crawlingtask_alias = aliased(Crawlingtask, name='crawlingtask_alias')

    crawl_E5_06_status = (
        db_session.query(func.json_unquote(func.json_extract(Crawlingtask.taskdetail, "$.album_id")).label("ituneid"),
                         Crawlingtask.status.label('06_status'),
                         crawlingtask_alias.status.label('E5_status')
                         )
        .select_from(Crawlingtask)
        .outerjoin(crawlingtask_alias,
                   text("crawlingtask_alias.id = Crawlingtasks.ext ->> '$.itunes_track_task_id'"))
        .filter(func.DATE(Crawlingtask.created_at) == func.current_date(),
                Crawlingtask.actionid == '9C8473C36E57472281A1C7936108FC06',
                func.json_extract(Crawlingtask.taskdetail, "$.album_id").in_(ituneid)
                )
        )
    return crawl_E5_06_status
Пример #16
0
def get_crawl_image_status(gsheet_name: str, sheet_name: str):
    crawl_artist_image_status = (db_session.query(Crawlingtask.id,
                                                  Crawlingtask.actionid,
                                                  Crawlingtask.objectid,
                                                  Crawlingtask.taskdetail,
                                                  Crawlingtask.status)
                                 .select_from(Crawlingtask)
                                 .filter(func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}",
                                         Crawlingtask.actionid == 'OA9CPKSUT6PBGI1ZHPLQUPQCGVYQ71S9')
                                 .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc())
                                 )
    return crawl_artist_image_status
Пример #17
0
def _json_path_clause(kv_pair):
    path_expr, value = kv_pair
    key, path = _parse_path_expr(path_expr)

    json_match = sa_func.json_contains(
        sa_func.json_extract(models.Variable.value, path), value)

    # NOTE(thomasem): Order is important here. MySQL will short-circuit and
    # not properly validate the JSON Path expression when the key doesn't exist
    # if the key match is first int he and_(...) expression. So, let's put
    # the json_match first.
    return and_(json_match, models.Variable.key == key)
Пример #18
0
def get_youtube_crawlingtask_info(track_id: str, PIC: str, format_id: str):
    get_crawlingtask_info = (db_session.query(
        Crawlingtask.id,
        Crawlingtask.objectid,
        func.json_extract(Crawlingtask.taskdetail, f"$.youtube_url").label(
            "youtube_url"),
        func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label(
            "when_exists"),
        Crawlingtask.status
    )
        .select_from(Crawlingtask)
        .filter(Crawlingtask.objectid == track_id,
                Crawlingtask.actionid == V4CrawlingTaskActionMaster.DOWNLOAD_VIDEO_YOUTUBE,
                func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC,
                func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id") == format_id,
                Crawlingtask.priority != 10000,
                )
        .order_by(
        Crawlingtask.created_at.desc())
    ).first()
    return get_crawlingtask_info
Пример #19
0
def prog_exp(worker):
    """
    Return the avearge self-reported prog experience rounded to the nearest int
    """
    scores = session.query(
        func.json_extract(
            SurveyData.payload,
            '$.prog_experience')).filter(SurveyData.worker == worker).all()
    if (len(scores) == 0):
        return None
    ave_math_exp = sum(int(score[0]) for score in scores) * 1.0 / len(scores)
    return int(round(ave_math_exp))
Пример #20
0
def get_crawlingtask_youtube_info(objectid: str, PIC: str, actionid: str):
    get_crawlingtask_info = (db_session.query(
        Crawlingtask.id,
        Crawlingtask.objectid,
        func.json_extract(Crawlingtask.taskdetail, "$.youtube_url").label(
            "youtube_url"),
        func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label(
            "when_exists"),
        func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id").label(
            "data_source_format_id"),
        Crawlingtask.status

    )
        .select_from(Crawlingtask)
        .filter(Crawlingtask.objectid == objectid,
                Crawlingtask.actionid == actionid,
                func.json_extract(Crawlingtask.taskdetail, "$.PIC") == PIC,
                )
        .order_by(
        Crawlingtask.created_at.desc())
    ).first()
    return get_crawlingtask_info
Пример #21
0
        def __getitem__(self, index):
            if isinstance(index, tuple):
                index = "$%s" % ("".join([
                    "[%s]" % elem if isinstance(elem, int) else '."%s"' % elem
                    for elem in index
                ]))
            elif isinstance(index, int):
                index = "$[%s]" % index
            else:
                index = '$."%s"' % index

            # json_extract does not appear to return JSON sub-elements
            # which is weird.
            return func.json_extract(self.expr, index, type_=NullType)
Пример #22
0
def get_s11_crawlingtask_info(pic: str):
    # JOIN same table with aliases on SQLAlchemy
    crawlingtasks_06 = aliased(Crawlingtask, name='crawlingtasks_06')
    crawlingtasks_E5 = aliased(Crawlingtask, name='crawlingtasks_e5')

    s11_crawlingtask_info = (db_session.query(
        func.json_extract(crawlingtasks_06.taskdetail, f"$.album_id").label("itune_album_id"),
        crawlingtasks_06.id.label("06_id"),
        crawlingtasks_06.status.label("06_status"),
        crawlingtasks_E5.id.label("e5_id"),
        crawlingtasks_E5.status.label("e5_status")
    )
                                 .select_from(crawlingtasks_06)
                                 .outerjoin(crawlingtasks_E5,
                                            # crawlingtasks_E5.id == func.json_extract(crawlingtasks_06.ext, "$.itunes_track_task_id"), #performance query problem
                                            text("crawlingtasks_E5.id = crawlingtasks_06.ext ->> '$.itunes_track_task_id'")
                                            )).filter(
        crawlingtasks_06.actionid == "9C8473C36E57472281A1C7936108FC06",
        func.json_extract(crawlingtasks_06.taskdetail, "$.PIC") == pic,
        # func.json_extract(crawlingtasks_06.taskdetail, "$.PIC") == 'Contribution_Apr_2021_Youtube collect_experiment_2021-06-07',
    ).order_by(
        crawlingtasks_06.created_at.desc())
    return s11_crawlingtask_info
Пример #23
0
def get_datasourceId_from_crawlingtask():
    conditions2 = "datasources.TrackId = crawlingtasks.ObjectId and datasources.SourceURI = crawlingtasks.TaskDetail ->> '$.youtube_url' and datasources.FormatID = crawlingtasks.TaskDetail ->> '$.data_source_format_id' and datasources.Valid = 1"
    record = (db_session.query(Crawlingtask.objectid.label("track_id"),
                               DataSource.format_id,
                               func.json_extract(Crawlingtask.taskdetail, "$.youtube_url").label("youtube_url"),
                               DataSource.id.label("datasource_id"),
                               Crawlingtask.status.label("crawler_status"),
                               Crawlingtask.id.label("crawlingtask_id"),
                               func.json_extract(Crawlingtask.ext, "$.message").label("message")
                               )
              .select_from(Crawlingtask)
              .outerjoin(DataSource,
                         text(conditions2)
                          )
              .filter(
                      func.DATE(Crawlingtask.created_at) == func.current_date(),
                      # func.current_date(),
                      Crawlingtask.actionid == 'F91244676ACD47BD9A9048CF2BA3FFC1',
                      Crawlingtask.priority.in_([999, 10000])
                      )
              .group_by(Crawlingtask.objectid,func.json_extract(Crawlingtask.taskdetail, "$.youtube_url"),func.json_extract(Crawlingtask.taskdetail, "$.data_source_format_id"))
              )
    return record
Пример #24
0
def verified_up_to_exp(lvl, exp, exp_typ, experiment):
    """
    Return whether a lvl was solved by adding all of the invariants up to a math experience level exp
    """
    assert typ in ['math', 'prog']
    tag = '{}-exp-le-{}'.format(typ, exp)
    exp_f = math_exp if typ == 'math' else prog_exp

    entries = session.query(VerifyData.provedflag)\
            .filter(VerifyData.lvl == lvl)\
            .filter(func.json_extract(VerifyData.config, '$.mode') == 'combined')\
            .filter(func.json_extract(VerifyData.config, '$.tag') == tag)\
            .order_by(VerifyData.time.desc())\
            .all()

    if (len(entries) == 0):
        workers = workers_played(lvl, experiment)
        workers = [
            w for w in workers if exp_f(w) is not None and exp_f(w) <= exp
        ]
        assert len(workers) == 0, "{}, {}, {}".format(lvl, exp, workers)
        return False
    return entries[0][0] == 1
 def get_latest_evaluate_by_doc_type_id(nlp_task_id, doc_type_id):
     evaluate_result_path = '$.result.overall."f1-score"' if nlp_task_id == int(NlpTaskEnum.classify) else '$.scores.f1_score.overall.f1'
     q = session.query(EvaluateTask)\
         .join(TrainTask, TrainTask.train_task_id == EvaluateTask.train_task_id)\
         .join(TrainJob, TrainJob.train_job_id == TrainTask.train_job_id)\
         .filter(
         EvaluateTask.evaluate_task_status == int(StatusEnum.success),
         TrainJob.doc_type_id == doc_type_id,
         ~EvaluateTask.is_deleted,
         ~TrainTask.is_deleted,
         ~TrainJob.is_deleted
     ).order_by(
         sa_func.json_extract(EvaluateTask.evaluate_task_result, evaluate_result_path).desc())
     return q.first()
Пример #26
0
def get_crawlingtask_status(gsheet_name: str, sheet_name: str, actionid: str):
    if actionid == V4CrawlingTaskActionMaster.ARTIST_ALBUM_IMAGE:
        url = "url"
    else:
        url = "youtube_url"

    crawl_artist_image_status = (db_session.query(
        Crawlingtask.id,
        Crawlingtask.objectid,
        func.json_extract(Crawlingtask.taskdetail, f"$.{url}").label(
            f"{url}"),
        func.json_extract(Crawlingtask.taskdetail, "$.when_exists").label(
            "when_exists"),
        Crawlingtask.status,
        func.json_extract(Crawlingtask.ext, "$.message").label(
            "message")
    )
                                 .select_from(Crawlingtask)
                                 .filter(
        func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}",
        Crawlingtask.actionid == actionid)
                                 .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc())
                                 )
    return crawl_artist_image_status
Пример #27
0
def get_album_image_cant_crawl(artistuuid: list):
    album_image_cant_crawl = (db_session.query(Album.title,
                                               Album.uuid,
                                               func.json_extract(Crawlingtask.taskdetail, "$.url").label(
                                                   "image_url"),
                                               Crawlingtask.status
                                               )
                              .select_from(Crawlingtask)
                              .join(Album,
                                    Album.uuid == Crawlingtask.objectid)
                              .filter(func.DATE(Crawlingtask.created_at) == func.current_date(),
                                      Crawlingtask.actionid == 'OA9CPKSUT6PBGI1ZHPLQUPQCGVYQ71S9',
                                      Crawlingtask.objectid.in_(artistuuid)
                                      )
                              .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc())
                              )
    return album_image_cant_crawl
Пример #28
0
def allInvs(session,
            enames=None,
            lvls=None,
            lvlsets=None,
            workers=None,
            assignments=None,
            enameSet=None,
            lvlSet=None,
            lvlsetSet=None,
            workerSet=None,
            assignmentSet=None,
            colSwaps=None):
    q = session.query(
        Event.experiment,
        func.json_extract(Event.payload, "$.lvlid"),
        func.json_extract(Event.payload, "$.lvlset"),
        func.json_extract(Event.payload, "$.workerId"),
        func.json_extract(Event.payload, "$.assignmentId"),
        func.json_extract(Event.payload, "$.raw"),
        func.json_extract(Event.payload, "$.canonical"),
        func.ifnull(func.json_extract(Event.payload, "$.colSwap"), 0)
      ) \
      .filter(Event.type == "FoundInvariant")

    q = filterEvents(q, enames, lvls, lvlsets, workers, assignments)

    def gen():
        for row in q.all():
            if enameSet is not None:
                enameSet.add(row[0])
            if lvlSet is not None:
                lvlSet.add(row[1])
            if lvlsetSet is not None:
                lvlsetSet.add(row[2])
            if workerSet is not None:
                workerSet.add(row[3])
            if assignmentSet is not None:
                assignmentSet.add(row[4])
            if colSwaps is not None:
                try:
                    colSwaps[row[7]] += 1
                except KeyError:
                    colSwaps[row[7]] = 1

            yield (row[5], row[6])

    return set(dict(gen()).iteritems())
Пример #29
0
def collect_from_youtube_query():
    collect_from_youtube_query = (db_session.query(PointLog.created_at,
                                                   PointLog.valid,
                                                   PointLog.id,
                                                   func.json_extract(PointLog.info, "$.email").label("email"),
                                                   func.json_extract(PointLog.info, "$.youtube_url").label(
                                                       "contribution_url"),
                                                   Album_Track.track_id,
                                                   Track.title,
                                                   Track.artist.label("track_artist"),
                                                   Album.title.label("album_title"),
                                                   Album.valid.label("album_valid"),
                                                   Track.valid.label("track_valid"),
                                                   func.json_extract(PointLog.info, "$.content_type").label(
                                                       "contribution_type"),
                                                   func.json_extract(PointLog.info, "$.comment").label(
                                                       "contribution_comment"),
                                                   PointLog.crawler_status,
                                                   func.json_extract(PointLog.ext, "$.crawler_id").label("crawler_id"),
                                                   Crawlingtask.status,
                                                   PointLog.action_type,
                                                   func.json_extract(DataSource.info, "$.source.title").label(
                                                       "youtube_title"),
                                                   func.json_extract(DataSource.info, "$.source.uploader").label(
                                                       "youtube_uploader")
                                                   )
                                  .select_from(PointLog)
                                  .outerjoin(Crawlingtask,
                                             text("crawlingtasks.id = pointlogs.ext ->> '$.crawler_id'"))
                                  .outerjoin(Album_Track,
                                             (Album_Track.track_id == PointLog.target_id))
                                  .outerjoin(Album,
                                             (Album.uuid == Album_Track.album_uuid) & (Album.valid == 1))
                                  .outerjoin(Track,
                                             (Track.id == Album_Track.track_id) & (Track.id != "") & (Track.valid == 1))
                                  .outerjoin(DataSource,
                                             text("datasources.id = crawlingtasks.ext ->> '$.data_source_id'"))
                                  .filter(PointLog.action_type == "CY",
                                          PointLog.created_at > '2020-10-01',
                                          PointLog.valid == 0
                                          )
                                  .group_by(PointLog.id)
                                  .order_by(PointLog.created_at.desc())
                                  # .all()
                                  )
    return collect_from_youtube_query
Пример #30
0
        def __getitem__(self, index):
            if isinstance(index, tuple):
                index = "$%s" % (
                    "".join(
                        [
                            "[%s]" % elem
                            if isinstance(elem, int)
                            else '."%s"' % elem
                            for elem in index
                        ]
                    )
                )
            elif isinstance(index, int):
                index = "$[%s]" % index
            else:
                index = '$."%s"' % index

            # json_extract does not appear to return JSON sub-elements
            # which is weird.
            return func.json_extract(self.expr, index, type_=NullType)
Пример #31
0
def get_crawlingtask_download_video_youtube_status_from_df(gsheet_name: str, sheet_name: str):
    '''
    MP3_SHEET_NAME = {"sheet_name": "MP_3", "fomatid": DataSourceFormatMaster.FORMAT_ID_MP3_FULL,
                      "column_name": ["track_id", "Memo", "Mp3_link", "url_to_add"]}
    MP4_SHEET_NAME = {"sheet_name": "MP_4", "fomatid": DataSourceFormatMaster.FORMAT_ID_MP4_FULL,
                      "column_name": ["track_id", "Memo", "MP4_link", "url_to_add"]}
    '''

    crawl_artist_image_status = (db_session.query(Crawlingtask.id,
                                                  Crawlingtask.actionid,
                                                  Crawlingtask.objectid,
                                                  Crawlingtask.taskdetail,
                                                  Crawlingtask.status,
                                                  Crawlingtask.ext
                                                  )
                                 .select_from(Crawlingtask)
                                 .filter(
        func.json_extract(Crawlingtask.taskdetail, "$.PIC") == f"{gsheet_name}_{sheet_name}",
        Crawlingtask.actionid == V4CrawlingTaskActionMaster.DOWNLOAD_VIDEO_YOUTUBE)
                                 .order_by(Crawlingtask.objectid, Crawlingtask.created_at.desc())
                                 )
    return crawl_artist_image_status