Exemplo n.º 1
0
def main():
    import sys
    import glob
    import time
    import os

    start = time.time()

    inputdir = sys.argv[1]
    outputfile = sys.argv[2]
    valid_user_file = sys.argv[3]

    valid_users = set([line.strip() for line in open(valid_user_file)])
    split_func = split_by_5_minute

    csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv'))
    n = len(csvfiles)

    with open(outputfile, 'w') as outputfileobj:
        for idx, csvfile in enumerate(csvfiles):
            uid = get_uid(csvfile)
            if uid not in valid_users:
                continue
            with open(csvfile, buffering=(2 << 27)) as f:
                rows = prepare_user_log(f)
                logs = extract_location(rows, split_func)
            save_to_csv(outputfileobj, uid, logs)
            logging.info('[%d/%d]' % (idx + 1, n))

    logging.info('finish with time %s', str(time.time() - start))
Exemplo n.º 2
0
def start_btsync(request):
    """
        Run BitTorrent Sync application
    """
    global pid

    if is_btsync_active():
        return HttpResponseRedirect('/')

    # If wrong structure of config file, return an error
    if 'btsync_conf_file' not in config:
        return HttpResponse('Klucz btsync_conf_file nie istnieje w pliku konfiguracyjnym')

    # If btsync-folder doesn't exist, create it
    btsync_conf = load_json(config['btsync_conf_file'])
    if not os.path.exists(btsync_conf['storage_path']):
        os.makedirs(btsync_conf['storage_path'])

    # If BTSync config file doesn't exist, create a new one
    if not path.isfile(config['btsync_conf_file']):
        create_empty_btsync_config_file()

    # Start BTSync process
    if platform.system() == 'Windows':
        pass                                # for the future
    elif platform.system() == 'Linux':
        pid = subprocess.Popen([config['btsync_exe_file'], '--config', config['btsync_conf_file']])
        while not is_btsync_active():     pass      # need this line to wait for BTSync to start

        if 'uid' not in config:
            config['uid'] = get_uid(config['btsync_server_address'])
            save_json(os.path.join(config['application_path'], 'config.json'), config)

    return HttpResponseRedirect('/')
Exemplo n.º 3
0
def main():
    import sys
    import glob
    import time
    import os

    start = time.time()

    inputdir = sys.argv[1]
    topdomainfile = sys.argv[2]
    seconddomainfile = sys.argv[3]

    csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv'))
    n = len(csvfiles)

    topdomainobj = open(topdomainfile, 'w')
    seconddomainobj = open(seconddomainfile, 'w')
    for idx, csvfile in enumerate(csvfiles):
        uid = get_uid(csvfile)
        with open(csvfile, buffering=(2 << 27)) as f:
            topcounter, secondcounter = domain_statistic(f)
        save_to_csv(topdomainobj, uid, topcounter)
        save_to_csv(seconddomainobj, uid, secondcounter)
        logging.info('[%d/%d]' % (idx + 1, n))

    logging.info('finish with time %s', str(time.time() - start))
    topdomainobj.close()
    seconddomainobj.close()
Exemplo n.º 4
0
    def get_team_all_players(self):
        """给未进入大名单球员增加一条虚记录
        """
        tree = None
        try:
            tree = ET.parse(self.player_template)
        except Exception as e:
            print(e)
        players = set()
        root = tree.getroot()
        for entry in root[0][0]:
            clubid = common.get_uid(int(entry[4].text), self.sportType)
            if clubid == self.homeid or clubid == self.awayid:
                pid = int(entry[0].text)
                players.add(int(entry[0].text))

        return players
Exemplo n.º 5
0
    def get_team_all_players(self):
        """给未进入大名单球员增加一条虚记录
        """
        tree = None
        try:
            tree = ET.parse(self.player_template)
        except Exception as e:
            print(e)
        players = set()
        root = tree.getroot()
        for entry in root[0][0]:
            clubid = common.get_uid(int(entry[4].text), self.sportType)
            if clubid == self.homeid or clubid == self.awayid:
                pid = int(entry[0].text)
                players.add(int(entry[0].text))

        return players
def main():
    import sys
    import glob
    import time
    import os
    import redis

    start = time.time()

    inputdir = sys.argv[1]
    outputfile = sys.argv[2]
    valid_user_file = sys.argv[3]
    topdomainfile = sys.argv[4]
    seconddomainfile = sys.argv[5]
    blacklistdomainfile = sys.argv[6]

    topdomain_set = set([line.strip().split(',')[0] for line in open(topdomainfile)])
    seconddomain_set = set([line.strip().split(',')[0] for line in open(seconddomainfile)])
    blacklistdomain_set = set([line.strip().split(',')[0] for line in open(blacklistdomainfile)])

    valid_users = set([line.strip() for line in open(valid_user_file)])
    split_func = split_by_5_minute
    get_domain_func = generate_get_right_domain(topdomain_set, seconddomain_set, blacklistdomain_set)

    r = redis.StrictRedis(host='localhost', port=6379, db=0)
    csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv'))
    n = len(csvfiles)
    outputobj = open(outputfile, 'w')

    for idx, csvfile in enumerate(csvfiles):
        uid = get_uid(csvfile)
        if uid not in valid_users:
            continue
        f = load_file(r, csvfile)
        counter = app_and_category_statistic(f, get_domain_func, split_func)
        f.close()
        save_to_csv(outputobj, uid, counter)
        logging.info('[%d/%d]' % (idx + 1, n))

    logging.info('finish with time %s', str(time.time() - start))
    outputobj.close()
Exemplo n.º 7
0
 def test_get_uid(self):
     self.assertEqual('0001998', com.get_uid('/anb/0001998.csv'))
    for split_time, count in counter.iteritems():
        writer.writerow({'uid': uid, 'time': split_time, 'request_count': count})


if __name__ == '__main__':
    import sys
    import glob
    import time

    start = time.time()

    inputdir = sys.argv[1]
    outputfile = sys.argv[2]
    split_type = int(sys.argv[3])
    split_func = split_by_hour if split_type == 1 else split_by_halfhour

    csvfiles = glob.glob(os.path.join(inputdir, '*', '*.csv'))
    n = len(csvfiles)
    if os.path.isfile(outputfile):
        os.remove(outputfile)

    with open(outputfile, 'a') as outputfileobj:
        for idx, csvfile in enumerate(csvfiles):
            uid = get_uid(csvfile)
            with open(csvfile) as f:
                counter = gprs_statistic(f, split_func)
            save_to_csv(outputfileobj, uid, counter)
            logging.info('[%d/%d]' % (idx + 1, n))

    logging.info('finish with time %s', str(time.time() - start))