def infer_collection_ordered_helper(l, cands): for vo in l: tv = type(vo) if is_ordered(tv): if is_list(vo) and is_dict(type(v[0])): cands.append(vo) else: infer_collection_ordered_helper(vo, cands) elif is_dict(tv): infer_collection_dict_helper(vo, cands)
def infer_collection_dict_helper(d, cands): for k, vd in d.iteritems(): tv = type(vd) if is_ordered(tv): if is_list(tv) and is_dict(type(vd[0])): cands.append(vd) else: infer_collection_ordered_helper(vd, cands) elif is_dict(tv): infer_collection_dict_helper(vd, cands)
def infer_collection(d): if is_list(type(d)): return d cands = [] for k, v in d.iteritems(): tv = type(v) if is_ordered(tv): if is_list(tv) and len(v) > 0 and is_dict(type(v[0])): cands.append(v) elif is_dict(tv): infer_collection_dict_helper(v, cands) if len(cands) > 0: return sorted(map(lambda c: (c, relsize(c)), cands), key=itemgetter(1), reverse=True)[0][0] else: return []
def standardize_tags_helper(d): nd = {} for k, v in d.iteritems(): if is_dict(type(v)): cd = standardize_tags_helper(v) else: cd = v nd[remove_standards_prefix(k)] = cd return nd
def describe(self): print "\n" print "TABLENAME: %s, # RECORDS: %d" % (self.name, self.__len__()) if len(self.rows) == 0: return c = self.rows[0] for k, v in c.iteritems(): print "\tOUTER KEY:", k if is_dict(type(v)): print "\t INNER KEYS:" print "\t ", v.keys(), "\n"
def unnest_value(row): """ For data collections which have nested value parameters (like RSS) this function will unnest the value to the higher level. For example, say the row is {"title":{"value":"yahoo wins search"}} This function will take that row and return the following row {"title": "yahoo wins search"} """ nr = {} for k, v in row.iteritems(): if is_dict(type(v)) and "value" in v: nr[k] = v["value"] else: nr[k] = v return nr
def select(udf, name="", url=None, table=None, data=None, keep_standards_prefix=False): if table is not None: tb = table keep_standards_prefix = True if len(name) == 0: name = tb.name else: tb = create(name, data=data, url=url, keep_standards_prefix=keep_standards_prefix) results = [] for d in tb.rows: try: value = udf(d) except KeyError: try: value = udf(strip_row(d)[0]) except KeyError: continue if is_dict(type(value)): results.append(value) return create(name=name, data=results, keep_standards_prefix=keep_standards_prefix)