예제 #1
0
def main():
    for i, item in enumerate(sanitize_dicts(filter_dicts(stdin_reader()))):
        # 131 seconds for 10 minutes of data (system json)
        # print(dumper(item, sort_keys=True))
        # 66 second for 10 minutes of data (system json)
        # 58 second for 10 minutes of data (simplejson)
        print(dumper(item))
        records = i
    print("{0} records written".format(records), file=stderr)
예제 #2
0
def main():
    date_fmt = "%Y-%m-%dT%H:%M:%S"
    client = MongoClient()
    db = client.data
    collection = db["urlhist"]
    converter = datetime.strptime
    for i, doc in enumerate(stdin_reader()):
        doc["timestamp"] = converter(doc["timestamp"], date_fmt)
        collection.insert(doc)
        records = i
    print("{0} records read".format(records), file=stderr)
예제 #3
0
def main():
    """
    Create a dict to store the mapping.
    *g : bitly global hash identifier
    *u : Long URL

    Need to write this out as JSON because there are no punctuation marks I can find that
    do not also appear in the long urls -- meaning that there is nothing obvious to use as
    a split character in a CSV.
    """
    c = {d["g"]: d["u"] for d in stdin_reader()}
    print(simplejson.dumps(c))
예제 #4
0
def main(countries=None):
    try:
        germanwings = load_germanwings()
        dumper = simplejson.dumps
        print("Extracting urls", file=stderr)
        records = 0
        stream = sanitize_dicts(
            filter_dicts(stdin_reader(), germanwings, countries))
        for i, item in enumerate(stream):
            print(dumper(item))
            records = i
        print("{0} records written".format(records), file=stderr)
    except Exception as exc:
        print(exc)
예제 #5
0
def main(fields):
    data = []
    for item in stdin_reader():
        dt = dateutil.parser.parse(item["timestamp"])
        data.append(
            dict(
                [(k, v) for k, v in item.items() if k in fields]
                + [("day", dt.day), ("hour", dt.hour), ("minute", dt.minute)]
            )
        )

    keys = fields + ["day", "hour", "minute"]
    keyfn = itemgetter(*keys)
    data = sorted(data, key=keyfn)
    for k, g in itertools.groupby(data, key=keyfn):
        d = {"key": k[0], "timestamp": datetime(2015, 3, k[1], k[2], k[3]), "count": len(list(g))}
        print(simplejson.dumps(d))
예제 #6
0
def main(fields):
    data = []
    for item in stdin_reader():
        dt = dateutil.parser.parse(item["timestamp"])
        data.append(
            dict([(k, v) for k, v in item.items() if k in fields] +
                 [("day", dt.day), ("hour", dt.hour), ("minute", dt.minute)]))

    keys = fields + ["day", "hour", "minute"]
    keyfn = itemgetter(*keys)
    data = sorted(data, key=keyfn)
    for k, g in itertools.groupby(data, key=keyfn):
        d = {
            "key": k[0],
            "timestamp": datetime(2015, 3, k[1], k[2], k[3]),
            "count": len(list(g)),
        }
        print(simplejson.dumps(d))
예제 #7
0
def main(key):
    key_fn = itemgetter(1)
    c = Counter(d[key] for d in stdin_reader(stdin) if key in d)
    for k, v in sorted(c.items(), key=key_fn, reverse=True):
        print("{0}, {1}".format(k, v))