Exemplo n.º 1
0
    def get_update_requests(self, member_stats, member=None):
        if member:
            m = member
        else:
            for gid in member_stats.guild_ids:
                g = self.bot.get_guild(gid)
                m = g.get_member(member_stats.id)
                if m:
                    break
            else:
                return []

        items = member_stats.process_status(m.status.value, update=True)
        last_mark = items[-1]["mark"]
        reqs = [
            pymongo.UpdateOne(
                {"user_id": m.id},
                {"$pull": {"status": {"mark": {"$lt": last_mark-720}}}}
            ),
            pymongo.UpdateOne(
                {"user_id": m.id},
                {"$push": {"status": {"$each": items}}, "$setOnInsert": {"user_id": m.id, "timezone": 0}},
                upsert=True
            )
        ]
        return reqs
Exemplo n.º 2
0
def update_mongo(db,
                 collection,
                 df,
                 host='localhost',
                 port=27017,
                 username=None,
                 password=None,
                 no_id=False):
    """ Read from Mongo and Store into DataFrame """

    # Connect to MongoDB
    conn = _connect_mongo(host=host,
                          port=port,
                          username=username,
                          password=password,
                          db=db)
    db = conn[db]
    # Make a query to the specific DB and Collection
    updates = []

    for _, row in df.iterrows():
        updates.append(
            pymongo.UpdateOne({'_id': row.get('_id')},
                              {'$set': {
                                  'filtered': row.get('filtered')
                              }},
                              upsert=True))
        updates.append(
            pymongo.UpdateOne({'_id': row.get('_id')},
                              {'$set': {
                                  'applied': row.get('applied')
                              }},
                              upsert=True))

    db[collection].bulk_write(updates)
Exemplo n.º 3
0
def find_discoverer(maxbsur):
    col_author = connectTable("qiuzh", "researchers0810_trainingset")
    cursor = col_author.find(no_cursor_timeout=True)
    count = 0
    operation = []
    for author in cursor:
        count += 1
        sur = author["sur"]
        author_id = author["_id"]
        if sur >= 0 and sur < maxbsur:
            operation.append(
                pymongo.UpdateOne({"_id": author_id}, {"$set": {
                    "ifdis": 0
                }}))
        else:
            operation.append(
                pymongo.UpdateOne({"_id": author_id}, {"$set": {
                    "ifdis": 1
                }}))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col_author.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        col_author.bulk_write(operation, ordered=False)
        print("又写入并完成", len(operation))
    cursor.close()
Exemplo n.º 4
0
    def UpdateCountrySummary(self,countryData,d):
        countryArr = ["Taiwan", "Korea"]
        for country in countryArr:
            if not self.CheckCountryBound(country,d):
                continue
            countryCode = ""
            if country == "Taiwan":
                countryCode = "s"
            elif country == "Korea":
                countryCode = "_krs"

            inc = {}
            area = self.LatToArea(country, d["gridY"]/self.gridPerUnit);
            inc[area+"Sum"] = d["pm25"]
            inc[area+"Num"] = d["weight"]
            tday = d["time"].replace(hour=0,minute=0,second=0)
            t10min = d["time"].replace(minute=(d["time"].minute-d["time"].minute%10),second=0)
            
            tableDaily = "sensordailysum"+countryCode
            if tableDaily not in countryData:
                countryData[tableDaily] = []
            countryData[tableDaily].append(pymongo.UpdateOne({"_id":tday}, {"$inc": inc}, upsert=True))
            
            table10min = "sensor10minsum"+countryCode
            if table10min not in countryData:
                countryData[table10min] = []
            countryData[table10min].append(pymongo.UpdateOne({"_id":t10min}, {"$inc": inc}, upsert=True))
        return countryData
Exemplo n.º 5
0
def update_array(path, change_list):
    """Updates an array of embedded documents. Return 0 on success, 1 if connection was lost."""
    write_operations = []
    # Return blank list if there are no changes at this path
    if not change_list:
        return write_operations
    # Remove documents to be updated
    write_operations.append(
        pymongo.UpdateOne({'tba_event_key': utils.TBA_EVENT_KEY},
                          {'$pull': {
                              path: {
                                  '$or': change_list
                              }
                          }}))
    # Select documents to add
    filter_change_list = []
    for change in change_list:
        equals = []
        for key, value in change.items():
            equals.append({'$eq': [f'$$item.{key}', value]})
        filter_change_list.append({'$and': equals})

    to_add = local_database_communicator.DB.competitions.aggregate([{
        '$match': {
            'tba_event_key': utils.TBA_EVENT_KEY
        }
    }, {
        '$project': {
            path: {
                '$filter': {
                    'input': f'${path}',
                    'as': 'item',
                    'cond': {
                        '$or': filter_change_list
                    }
                }
            }
        }
    }])
    # Aggregate returns a cursor object, so it must be converted to a list. `tba_event_key` is
    # guaranteed to be unique, so there will always one and only one result.
    to_add = list(to_add)[0]
    # Remove `_id` so so the only item is the array nested in the directory structure
    to_add.pop('_id')
    # Remove nesting, making `to_add` only a list of changed documents
    while isinstance(to_add, dict):
        to_add = to_add[[*to_add.keys()][0]]
    # No data matched or dataset does not exist, so warn & return blank list
    if to_add is None:
        utils.log_warning(f'No the dataset at {path} does not exist.')
        return []
    write_operations.append(
        pymongo.UpdateOne({'tba_event_key': utils.TBA_EVENT_KEY},
                          {'$push': {
                              path: {
                                  '$each': to_add
                              }
                          }}))
    return write_operations
Exemplo n.º 6
0
    def _do_write(self, collection, version, symbol, item, previous_version, segment_offset=0):

        sze = int(item.dtype.itemsize * np.prod(item.shape[1:]))

        # chunk and store the data by (uncompressed) size
        chunk_size = int(_CHUNK_SIZE / sze)

        previous_shas = []
        if previous_version:
            previous_shas = set([Binary(x['sha']) for x in
                                 collection.find({'symbol': symbol},
                                                 projection={'sha': 1, '_id': 0},
                                                 )
                                 ])

        length = len(item)

        if segment_offset > 0 and 'segment_index' in previous_version:
            existing_index = previous_version['segment_index']
        else:
            existing_index = None

        segment_index = []
        i = -1

        # Compress
        idxs = xrange(int(np.ceil(float(length) / chunk_size)))
        chunks = [(item[i * chunk_size: (i + 1) * chunk_size]).tostring() for i in idxs]
        compressed_chunks = compress_array(chunks)

        # Write
        bulk = []
        for i, chunk in zip(idxs, compressed_chunks):
            segment = {'data': Binary(chunk), 'compressed': True}
            segment['segment'] = min((i + 1) * chunk_size - 1, length - 1) + segment_offset
            segment_index.append(segment['segment'])
            sha = checksum(symbol, segment)
            if sha not in previous_shas:
                segment['sha'] = sha
                bulk.append(pymongo.UpdateOne({'symbol': symbol, 'sha': sha, 'segment': segment['segment']},
                                              {'$set': segment, '$addToSet': {'parent': version['_id']}},
                                              upsert=True))
            else:
                bulk.append(pymongo.UpdateOne({'symbol': symbol, 'sha': sha, 'segment': segment['segment']},
                                              {'$addToSet': {'parent': version['_id']}}))
        if i != -1:
            collection.bulk_write(bulk, ordered=False)

        segment_index = self._segment_index(item, existing_index=existing_index, start=segment_offset,
                                            new_segments=segment_index)
        if segment_index:
            version['segment_index'] = segment_index
        version['segment_count'] = i + 1
        version['append_size'] = 0
        version['append_count'] = 0

        self.check_written(collection, symbol, version)
Exemplo n.º 7
0
 def fmt(x):
     if x['op'] == 'i':
         op = x['o']
         return pymongo.UpdateOne({'_id': op['_id']}, {'$set': op},
                                  upsert=True)
     elif x['op'] == 'u':
         return pymongo.UpdateOne(x['o2'], x['o'], upsert=True)
     elif x['op'] == 'd':
         return pymongo.DeleteMany(x['o'])
def toptrend(thistime):
    tweetTrends = []
    updateTrend = []
    removeTrend = []
    dataB = db.retweet_permin_data.aggregate([
        {
            "$match": {
                "timeUpdate": {
                    "$gte": thistime - dt.timedelta(minutes=180)
                }
            }
        },
        {
            "$group": {
                "_id": "$id_str",
                "retweetNow": {
                    "$sum": {
                        "$cond": [{
                            "$gte": [
                                "$timeUpdate",
                                thistime - dt.timedelta(minutes=180)
                            ]
                        }, "$retweet", 0]
                    }
                },
            }
        },
    ])
    for item in dataB:
        tweetTrends.append({'id': item['_id'], 'retweet': item['retweetNow']})
    tweetTrends.sort(key=sort_by_retweet, reverse=True)
    dataA = db.master_data.find({'trend': {"$gte": 1}})
    for i in range(10):
        print(tweetTrends[i]['id'])
        print(tweetTrends[i]['retweet'])
    for i in dataA:
        removeTrend.append((pymongo.UpdateOne({'id_str': i['id_str']},
                                              {'$set': {
                                                  "trend": 0
                                              }},
                                              upsert=True)))
    if (len(removeTrend) > 0):
        db.master_data.bulk_write(removeTrend, ordered=False)
    for i in range(10):
        updateTrend.append(
            (pymongo.UpdateOne({'id_str': tweetTrends[i]['id']}, {
                '$set': {
                    "trend": i + 1,
                    "retweet_30min": tweetTrends[i]['retweet']
                }
            },
                               upsert=True)))
    if (len(updateTrend) > 0):
        db.master_data.bulk_write(updateTrend, ordered=False)
def getTopTrendInOneDay(thistime):
    tweetTrends = []
    updateTrend = []
    removeTrend = []
    dataB = db.retweet_permin_data.aggregate([
        {
            "$match": {
                "timeUpdate": {
                    "$gte":
                    dt.datetime(thistime.year, thistime.month, thistime.day, 0,
                                0, 0, 0)
                }
            }
        },
        {
            "$group": {
                "_id": "$id_str",
                "retweetNow": {
                    "$sum": {
                        "$cond": [{
                            "$gte": [
                                "$timeUpdate",
                                dt.datetime(thistime.year, thistime.month,
                                            thistime.day, 0, 0, 0, 0)
                            ]
                        }, "$retweet", 0]
                    }
                },
            }
        },
    ])
    for item in dataB:
        tweetTrends.append({'id': item['_id'], 'retweet': item['retweetNow']})
    tweetTrends.sort(key=sort_by_retweet, reverse=True)
    dataA = db.master_data.find({'trendInDay': {"$gte": 1}})
    for i in dataA:
        removeTrend.append((pymongo.UpdateOne({'id_str': i['id_str']},
                                              {'$set': {
                                                  "trendInDay": 0
                                              }},
                                              upsert=True)))
    if (len(removeTrend) > 0):
        db.master_data.bulk_write(removeTrend, ordered=False)
    for i in range(len(tweetTrends)):
        updateTrend.append(
            (pymongo.UpdateOne({'id_str': tweetTrends[i]['id']}, {
                '$set': {
                    "trendInDay": i + 1,
                    "retweet_1Day": tweetTrends[i]['retweet']
                }
            },
                               upsert=True)))
    if (len(updateTrend) > 0):
        db.master_data.bulk_write(updateTrend, ordered=False)
Exemplo n.º 10
0
    def save(self, data):
        """
        Сохранение цепи Маркова в базу данных

        :param data: модель цепи Маркова
        :type data: dict
        """
        # TODO: end_symbol is not enough add some start ones
        # Creating indexes
        self.model.create_index([('key', pymongo.ASCENDING)],
                                name='keys',
                                unique=True)

        res = list()
        items = data.items()
        ln = len(items)
        batch_size = 1e5
        for i, (key, value) in enumerate(data.items()):
            print('saving: {}/{}'.format(i + 1, ln))
            start = key[0] == self.tokenizer.end_symbol
            key = ' '.join(map(str, key))
            value = {
                str(value_key): value_value
                for value_key, value_value in value.items()
            }
            increments = {
                'value.{}'.format(k): val
                for k, val in value.items()
            }
            if start:
                res.append(
                    pymongo.UpdateOne({'key': key}, {
                        '$inc': increments,
                        '$set': {
                            'start': start
                        }
                    },
                                      upsert=True))
            else:
                res.append(
                    pymongo.UpdateOne({'key': key}, {
                        '$inc': increments,
                        '$setOnInsert': {
                            'start': start
                        }
                    },
                                      upsert=True))
            if (i + 1) % batch_size == 0:
                self.model.bulk_write(res, ordered=False)
                res = list()
        if len(res) > 0:
            self.model.bulk_write(res, ordered=False)
Exemplo n.º 11
0
    def update(self, data):
        """store data to storage

        Args:
            data (list): store data

        Returns:
            bool: updated or not
        """
        operations = [
            pymongo.UpdateOne({'id': row['id']}, {
                '$set': {
                    'chapter': row['chapter'],
                    'is_read': row['is_read']
                }
            },
                              upsert=True) for id, row in data.items()
        ]
        try:
            self.collection.bulk_write(operations)
            return True
        except pymongo.errors.BulkWriteError as e:
            print(e)
            print(traceback.format_exc())
            return False
def migrate_tasks(collection, requests):
    query = {
        "$or": [{
            "time.completed": {
                "$ne": 0
            }
        }, {
            "time.cancelled": {
                "$ne": 0
            }
        }, {
            "time.failed": {
                "$ne": 0
            }
        }]
    }
    ttl = CONF["cron"]["clean_finished_tasks_after_seconds"]
    ttl = datetime.timedelta(seconds=ttl)

    for item in collection.find(query, projection=["_id", "time"]):
        expired_at = max(item["time"].values())
        expired_at = datetime.datetime.utcfromtimestamp(expired_at) + ttl
        request = pymongo.UpdateOne({"_id": item["_id"]},
                                    {"$set": {
                                        task.TTL_FIELDNAME: expired_at
                                    }})
        requests.append(request)
Exemplo n.º 13
0
def process_run_batch(collection, records):
    updates = []

    for record in records:
        run_name = record["_id"]
        if ".run" in run_name:
            # obsolete format: experiment.run_name
            _, run_name = run_name.split(".", 1)

        if not run_name or not run_name.startswith("run"):
            # unrecognized format, just group all of these a -1
            run_num = -1
        else:
            base = run_name[3:]

            if "." in base:
                parent, child = base.split(".")
                # allow for 1 million 
                run_num = 1000*1000*int(parent) + int(child)
            else:
                run_num = 1000*1000*int(base)

        fd = {"_id": record["_id"]}
        ud = {"$set": {"run_num": run_num}}

        update = pymongo.UpdateOne(fd, ud)
        updates.append(update)

    # write batch
    if len(updates):
        collection.bulk_write(updates)
Exemplo n.º 14
0
def boot_strap(P_d):
    col_author = connectTable("qiuzh", "researchers0810_trainingset")
    cursor = col_author.find(no_cursor_timeout=True)
    count = 0
    operation = []
    for author in cursor:
        count += 1
        coauthor_times = author["new_con"]
        author_id = author["_id"]
        d_i_list = np.random.binomial(coauthor_times, P_d, 20)
        surprisal_list = []
        for di in d_i_list:
            P0 = stats.binom.sf(di - 1, coauthor_times, P_d)
            surprisal_list.append(-math.log(P0))
        S = np.mean(surprisal_list)
        operation.append(
            pymongo.UpdateOne({"_id": author_id}, {"$set": {
                "bsur": S
            }}))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col_author.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        col_author.bulk_write(operation, ordered=False)
        print("又写入并完成", len(operation))
    cursor.close()
    print(col_author.count_documents({"sur": -6}))
    print(col_author.count_documents({"dn": -1}))
    print(col_author.count_documents({"bsur": -6}))
Exemplo n.º 15
0
def initialize_surprisal():
    col_author = connectTable("qiuzh", "researchers0810_trainingset")

    cursor = col_author.find(no_cursor_timeout=True)
    # researcher_number = cursor.count()
    # print(researcher_number)
    count = 0
    operation = []
    for author in cursor:
        count += 1
        operation.append(
            pymongo.UpdateOne({"_id": author["_id"]},
                              {"$set": {
                                  "sur": -6,
                                  "bsur": -6
                              }}))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col_author.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
    if operation:
        col_author.bulk_write(operation, ordered=False)
    print("finished")
    cursor.close()
    print(count)
    print(col_author.find({"dn": -1}, no_cursor_timeout=True).count())
Exemplo n.º 16
0
def update_ink_embeddings(db, ink_embeddings):
    bulk_ops = list()
    for data in ink_embeddings:
        bulk_ops.append(pymongo.UpdateOne({"_id": data['_id']}, {'$set': {'sif_embeddings': data['sif_embeddings']}}))

    n_modified = db.inktalks.bulk_write(bulk_ops, ordered=False).bulk_api_result['nModified']
    return n_modified
def researchers_con():
    '''
    the coauthor times based on the mag_authors0510
    :return:
    '''
    col1 = connectTable('qiuzh', "mag_authors0510")
    col2 = connectTable('qiuzh', "mag_researchers0707")
    count = 0
    operation = []
    cursor = col2.find(no_cursor_timeout=True)
    for author in cursor:
        count += 1
        author_id = author["_id"]
        coauthor_number = col1.find_one({"_id": author_id})["con"]

        operation.append(
            pymongo.UpdateOne({"_id": author_id},
                              {"$set": {
                                  "con": coauthor_number
                              }}))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col2.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
    if operation:
        col2.bulk_write(operation, ordered=False)
        print("又处理", len(operation))
    cursor.close()
Exemplo n.º 18
0
    def _mongo_main_add_batch(_mongo, source_id, batch, max_attempts=3):

        unique_accounts = 0
        attempts_left = int(max_attempts)
        mongo_batch = []
        error_details = ''

        for account_doc in batch:
            _id = account_doc.pop('_id')
            mongo_batch.append(
                pymongo.UpdateOne({'_id': _id}, {'$setOnInsert': account_doc},
                                  upsert=True))

        while attempts_left > 0:
            try:

                result = _mongo.accounts.bulk_write(mongo_batch, ordered=False)
                unique_accounts = result.upserted_count
                return unique_accounts

            # sleep for a bit and try again if there's an error
            except (pymongo.errors.OperationFailure,
                    pymongo.errors.InvalidOperation) as e:
                try:
                    error_details = str(e.details)[:128]
                except AttributeError:
                    pass
                attempts_left -= 1
                sleep(5)
                continue

        raise pymongo.errors.PyMongoError(
            'Failed to add batch to main DB after {} tries'.format(
                max_attempts))
Exemplo n.º 19
0
Arquivo: base.py Projeto: nbashev/noc
    def _update_object(cls,
                       data,
                       meta=None,
                       fmt=None,
                       state=None,
                       bulk=None) -> bool:
        def is_changed(d, h):
            return not d or d.get(cls.F_HASH) != h

        obj_id = cls.clean_id(data["id"])
        if meta is None and "$meta" in data:
            meta = data.pop("$meta")
        m_name = "%s_%s" % (cls.name, fmt) if fmt else cls.name
        l_name = "%s|%s|%s" % (cls.name, obj_id,
                               fmt) if fmt else "%s|%s" % (cls.name, obj_id)
        metrics["ds_%s_updated" % m_name] += 1
        # Calculate hash
        hash = cls.get_hash(data)
        # Get existing object state
        if state:
            doc = state.get(obj_id)
        else:
            doc = cls.get_collection(fmt).find_one({cls.F_ID: obj_id}, {
                cls.F_ID: 0,
                cls.F_HASH: 1
            })
        if not is_changed(doc, hash):
            logger.info("[%s] Object hasn't been changed", l_name)
            return False  # Not changed
        if not fmt and cls.on_change(data):
            hash = cls.get_hash(data)
            if not is_changed(doc, hash):
                logger.info("[%s] Object hasn't been changed", l_name)
                return False  # Not changed after altering
        metrics["ds_%s_changed" % m_name] += 1
        change_id = bson.ObjectId()
        data["change_id"] = str(change_id)
        op = {
            "$set": {
                cls.F_CHANGEID: change_id,
                cls.F_HASH: hash,
                cls.F_DATA: smart_text(orjson.dumps(data)),
            }
        }
        if meta:
            op["$set"][cls.F_META] = meta
        elif "$deleted" not in data:
            op["$unset"] = {cls.F_META: ""}
        if bulk is None:
            cls.get_collection(fmt).update_one({cls.F_ID: obj_id},
                                               op,
                                               upsert=True)
        else:
            bulk += [pymongo.UpdateOne({cls.F_ID: obj_id}, op, upsert=True)]
        logger.info("[%s] Object has been changed", l_name)
        if cls.enable_message:
            # Build MX message
            logger.info("[%s] Sending message", l_name)
            cls.send_message(data, change_id)
        return True
Exemplo n.º 20
0
def get_messages(client, token):
    updates = []
    i = 0
    for issue in client.codereview.issues.find({
            "messages": {
                "$exists": False
            }
    }).sort([("modified", pymongo.DESCENDING)]):
        print(issue["issue"])
        params = {"messages": True, "format": "json"}
        r = requests.get("https://mongodbcr.appspot.com/api/" +
                         str(issue["issue"]),
                         params=params,
                         headers=dict(Authorization="OAuth " + token))
        print(r.status_code)
        if (r.status_code == 404):
            # The patch set was probably deleted somehow?
            print("Skipping patch set with a 404")
            continue
        if (r.status_code != 200):
            print(r.text)
            exit(1)
        updates.append(
            pymongo.UpdateOne({"_id": issue["_id"]},
                              {"$set": {
                                  "messages": r.json()["messages"]
                              }}))
        i += 1
        if (i % 1000 == 0):
            client.codereview.issues.bulk_write(updates)
            updates = []
    if len(updates) > 0:
        client.codereview.issues.bulk_write(updates)
Exemplo n.º 21
0
 def check_new_messages(self):
     """Проверка новых сообщений."""
     current_user = self._get_user_data()
     query = {
         'receiver_id': current_user._id,
         'seen': False,
     }
     messages = list(self._db.messages.find(query))
     msg_ids = []
     messages_list = []
     if len(messages) == 0:
         print(_('You have not any new messages'))
         return
     for msg in messages:
         sender = self._db.users.find_one(
             {
                 '_id': msg['sender_id']
             }
         )
         sender_login = sender['login']
         text = msg['text']
         ts = msg['ts']
         print(f'{sender_login} at {ts}: {text}')
         messages_list.append(f'{sender_login} at @@@: {text}')
         msg_ids.append(msg['_id'])
     bulk_query = [
         pymongo.UpdateOne({'_id': msg_id}, {'$set': {'seen': True}})
         for msg_id in msg_ids
     ]
     self._db.messages.bulk_write(bulk_query)
     return messages_list
Exemplo n.º 22
0
 def write(self, to_sink, data, index=None):
     coll_curr = self.bili_db[to_sink]
     # 将数据转换为  [{'a': 1}, {'a': 2}] 格式
     if type(data) == pd.DataFrame:
         data = data.to_dict(orient='records')
     elif type(data) == dict:
         data = [data]
     elif data is None:
         print("数据为空, 无法写入")
         return
     # json_data = json.loads(data.to_json(orient='records', lines=False))
     if index is None or index == "":
         coll_curr.insert_many(data)
         return
     def_filter = None
     if type(index) == str:
         def_filter = lambda item: {index: item[index]}
     elif type(index) == list:
         def_filter = lambda item: {i: item[i] for i in index}
     if len(data) == 0:
         print("sink:{}, 输入数据集为空".format(to_sink))
         return
     bulkWriteResult = coll_curr.bulk_write([
         pymongo.UpdateOne(def_filter(item), {"$set": item}, upsert=True)
         for item in data
     ])
     print("sink:{}, 匹配{}条数据".format(to_sink,
                                     bulkWriteResult.matched_count))
     print("sink:{}, 写入{}条数据".format(to_sink,
                                     bulkWriteResult.upserted_count))
     print("sink:{}, 修改{}条数据".format(to_sink,
                                     bulkWriteResult.modified_count))
Exemplo n.º 23
0
    def update_mongo_compound_variants(self, bulk):
        """Update the compound information for a bulk of variants in the database

            Args:
                bulk(dict): {'_id': scout.models.Variant}

        """
        requests = []
        for var_id in bulk:
            var_obj = bulk[var_id]
            if not var_obj.get("compounds"):
                continue
            # Add a request to update compounds
            operation = pymongo.UpdateOne(
                {"_id": var_obj["_id"]},
                {"$set": {
                    "compounds": var_obj["compounds"]
                }})
            requests.append(operation)

        if not requests:
            return

        try:
            self.variant_collection.bulk_write(requests, ordered=False)
        except BulkWriteError as err:
            LOG.warning("Updating compounds failed")
            raise err
Exemplo n.º 24
0
def initialize_discover_number():
    '''
    this function is used in 2021.8.12 in mag_researchers0810
    in 2021.9.1 we used this function in researchers0810_trainingset
    :return:
    '''
    col_author = connectTable("qiuzh", "researchers0810_trainingset")

    cursor = col_author.find(no_cursor_timeout=True)
    # researcher_number = cursor.count()
    # print(researcher_number)
    count = 0
    operation = []
    for author in cursor:
        count += 1
        operation.append(
            pymongo.UpdateOne({"_id": author["_id"]}, {"$set": {
                "dn": -1
            }}))

        if count % 10000 == 0:
            print("已处理:", count / 10000, flush=True)
            col_author.bulk_write(operation, ordered=False)
            print("已写入:", count / 10000, flush=True)
            operation = []
    if operation:
        col_author.bulk_write(operation, ordered=False)
    print("finished")
    cursor.close()
    print(count)
    print(col_author.find({"dn": -1}, no_cursor_timeout=True).count())
Exemplo n.º 25
0
def _getBulkUpsertOperations(arrays, simple):
    if simple:
        return list(
            map(
                lambda item: pymongo.UpdateOne(item, {'$set': item},
                                               upsert=True), arrays))
    return list(map(_transformUpdateOne, arrays))
Exemplo n.º 26
0
    def _mongo_meta_add_batch(_mongo, source_id, batch, max_attempts=3):

        attempts_left = int(max_attempts)
        mongo_tags_batch = []

        for account_doc in batch:
            _id = account_doc['_id']
            mongo_tags_batch.append(
                pymongo.UpdateOne({'_id': _id},
                                  {'$addToSet': {
                                      's': source_id
                                  }},
                                  upsert=True))

        while attempts_left > 0:
            try:

                _mongo.account_tags.bulk_write(mongo_tags_batch, ordered=False)
                return

            # sleep for a bit and try again if there's an error
            except (pymongo.errors.OperationFailure,
                    pymongo.errors.InvalidOperation) as e:
                #errprint('\n[!] Error adding account batch to meta DB.  Attempting to continue.\n{}'.format(str(e)[:64]))
                #try:
                #    errprint(str(e.details)[:64])
                #except AttributeError:
                #    pass
                attempts_left -= 1
                sleep(5)
                continue
Exemplo n.º 27
0
    def set_string_field_to_datetime(self,
                                     self_col: Collection = None,
                                     field_tag='TimeStamp'):

        if (self_col is not None) and (self_col != self._collection):
            self.collection = self_col
            logger.info("MongoInterface now using Collection: '{%s}'",
                        self.collection.name)

        # find docs with field tags that are not of datetime format
        query = {field_tag: {'$not': {'$type': "date"}}}
        proj = {field_tag: 1}

        with self.collection.find(query, proj) as cursor:
            bulk_requests = []
            for doc in cursor:
                timestamp = parse_datetime(doc[field_tag])
                bulk_requests.append(
                    pym.UpdateOne({'_id': doc['_id']},
                                  {'$set': {
                                      field_tag: timestamp
                                  }}))

        if bulk_requests:
            result = self.collection.bulk_write(bulk_requests).modified_count
        else:
            result = 0

        logger.info("Modified TimeStamp type for %s documents", result)
        return result
Exemplo n.º 28
0
def paper_citation_number(begin, end, msg):
    '''
    this function is appropriate for citation_network0515 and mag_papers0510
    :return: add each papers' total citation in mag_papers0510
    '''
    colpaper = connectTable("qiuzh", "mag_papers0510")
    col_citation_network = connectTable("qiuzh",
                                        "citation_network0810_trainingset")

    count = 0
    operation = []
    cursor = colpaper.find(no_cursor_timeout=True)[begin:end]
    for paper in cursor:
        count += 1
        paper_id = paper["_id"]
        citation_number = 0
        paper_citation_relations = col_citation_network.find(
            {"id": paper_id}, no_cursor_timeout=True)
        if paper_citation_relations:
            for paper_citation_relation in paper_citation_relations:
                citation_number += len(paper_citation_relation["citation"])
        operation.append(
            pymongo.UpdateOne({"_id": paper_id},
                              {"$set": {
                                  "cn_before1996": citation_number
                              }}))
        if count % 10000 == 0:
            print(msg, "已处理:", count / 10000, flush=True)
            colpaper.bulk_write(operation, ordered=False)
            print(msg, "已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        colpaper.bulk_write(operation, ordered=False)
    cursor.close()
Exemplo n.º 29
0
    def put_bulk(self, payload_list, selector_key, priority=0):
        """Put list of task into profiles queue

        :param payload: payload to save into the qeue
        :param priority: the bigger the better
        :param selector: key-value pair or more complex query to
        check if item already in queue
        :returns: `InsertOneResult`
        """
        ops = []
        for payload in payload_list:
            payload_normalized = self._payload_validator.normalized(payload)

            payload_key = 'payload.{}'.format(selector_key)
            op = pymongo.UpdateOne(
                {
                    payload_key: payload_normalized[selector_key]},
                {'$set': payload_normalized},
                upsert=True,
            )
            v = self._payload_validator.validate(payload)
            if v is False:
                raise PayloadValidationError(
                    "Vaidation_errors: {}".format(
                        self._payload_validator.errors))
            else:
                ops.append(op)

        res = self.col.bulk_write(ops)
        return res
Exemplo n.º 30
0
def new_pub_count(begin, end, msg):
    col_author = connectTable("qiuzh", "researchers0810_trainingset")
    count = 0
    operation = []
    cursor = col_author.find(no_cursor_timeout=True)[begin:end]
    for author in cursor:
        count += 1
        author_id = author["_id"]
        pub_count = 0
        for paper in author["new_pubs"]:
            if paper["year"] <= 1996:
                pub_count += 1
        operation.append(
            pymongo.UpdateOne({"_id": author_id},
                              {"$set": {
                                  "pub_count": pub_count
                              }}))

        if count % 10000 == 0:
            print(msg, "已处理:", count / 10000, flush=True)
            col_author.bulk_write(operation, ordered=False)
            print(msg, "已写入:", count / 10000, flush=True)
            operation = []
            print(time(), flush=True)
    if operation:
        col_author.bulk_write(operation, ordered=False)
    cursor.close()