示例#1
0
    def query(self,
              codes: list,
              fields: list,
              startdate: datetime,
              enddate: datetime,
              force_update=False,
              update_only=False,
              skip_update=False) -> defaultdict:
        # Convert string code to list

        if force_update:
            # Remove targeted data from database if deemed outdated
            self.remove(codes, startdate, enddate, fields)

        if not skip_update:
            self._auto_update(codes, startdate, enddate, fields)

        res: defaultdict = defaultdict(DataFrame)
        if not update_only:
            codes = mongodb_name_compliance(codes)
            fields = mongodb_name_compliance(fields)
            for field in fields:
                subcol = self.col[field]
                q_doc = {self.date_name: {'$gte': startdate, '$lte': enddate}}
                v = DataFrame(subcol.find(q_doc, codes + [self.date_name]))
                df = del_id(v)
                df.columns = [c.replace('~', '.') for c in df.columns]
                res[field] = df
        return res
示例#2
0
 def solve_remove_params(self, codes: list, fields: list, start: datetime,
                         end: datetime):
     target_date_range = [start, end + timedelta(1)]
     status = self.status[codes, fields]
     for code in codes:
         for field in fields:
             comp_code = mongodb_name_compliance(code)
             comp_field = mongodb_name_compliance(field)
             bubbles = status[comp_code, comp_field]
             gaps: Bubbles = bubbles.intersect(target_date_range)
             if not gaps.isempty:
                 yield code, field, bubbles, gaps
示例#3
0
 def solve_update_params(self, codes: list, fields: list, start: datetime,
                         end: datetime):
     '''Solve params for data that need to be downloaded'''
     target_date_range = [start, end + timedelta(1)]
     status = self.status[codes, fields]
     for code in codes:
         for field in fields:
             comp_code = mongodb_name_compliance(code)
             comp_field = mongodb_name_compliance(field)
             bubbles = status[comp_code, comp_field]
             gaps: Bubbles = bubbles.gaps(target_date_range)
             if not gaps.isempty:
                 yield code, field, bubbles, gaps
示例#4
0
    def __getitem__(self, key):
        codes, fields = key
        codes = [codes] if isinstance(codes, str) else codes
        fields = [fields] if isinstance(fields, str) else fields

        codes = mongodb_name_compliance(codes)
        fields = mongodb_name_compliance(fields)
        if isinstance(key[0], str) and isinstance(key[1], str):
            r = self.col.find_one({'code': codes[0]}, fields)
            try:
                return Bubbles(r[fields])
            except (TypeError, KeyError):
                return Bubbles()
        else:
            docs = self.col.find({'code': {'$in': codes}}, fields + ['code'])
            # TODO: unpack result
            res = defaultdict(Bubbles)
            for doc in docs:
                q_fields = set(doc.keys()) - {'code', '_id'}
                for f in q_fields:
                    res[doc['code'], f] = Bubbles(doc[f])
            return res
示例#5
0
        def write_batch_to_db(batches):
            def convert_2_bulks(df):
                bulks = []
                for i, v in df.dropna().iterrows():
                    q_doc = {self.date_name: i.to_pydatetime()}
                    u_doc = v.to_dict()
                    bulks.append(UpdateOne(q_doc, {'$set': u_doc},
                                           upsert=True))
                return bulks

            for field, df in batches.items():
                df.columns = mongodb_name_compliance(df.columns)
                bulks = convert_2_bulks(df)
                subcol = self.col[field.upper().replace('.', '~')]
                if bulks:  # if not empty
                    subcol.bulk_write(bulks, ordered=False)
示例#6
0
 def create_index():
     for field in mongodb_name_compliance(fields):
         subcol = self.col[field]
         subcol.create_index(self.date_name, unique=True)