Exemplo n.º 1
0
def step2(step2_input, step2_output):
    """
    Handle the Step2 requests (comScore click-through) - check README for details

    :input: step2_input - list containing input files 
            step2_output - output file
    :return: 0 on success
             1 on error
    """

    global graf
    logger.info("###Step 2:")
    graf = defaultdict(dict)
    start2 = clock()
    info = []
    for input_file in step2_input:
        logger.info("Reading input file {}".format(input_file))
        with open(input_file) as csv_file:
            start_file=clock()
            rdr = csv.reader(csv_file)
            header = rdr.next()
            header_last = len(header) - 1
            try: 
                for chunk in gen_chunks(rdr, 10000):
                    info.extend([(row[0], row[header_last]) for row in chunk])
                    info = list(set(info))
                process_info(info)
            except csv.Error as e:
                logger.error("Exception {}: {}".format(type(e), e))
                logger.error("Check Problems section in readme for known issues.")
                return 1
            check_time(start_file, "Done reading.")
    check_time(start2, "Done generating full link dict...")
    start_write = clock()
    write_to_file(step2_output, graf)
    check_time(start_write, "Done writing output file.")
    check_time(start2, "Done with Step 2.")
    return 0
Exemplo n.º 2
0
def step3(step3_input, step3_output):
    """
    Handle the Step3 requests (comScore purchase) - check README for details

    :input: step3_input - list containing input files 
            step3_output - output file
    :return: 0 on success
             1 on error
    """

    global graf
    logger.info("###Step 3:")
    start3 = clock()
    graf = defaultdict(dict)
    info = []
    for input_file in step3_input:
        start_file=clock()
        logger.info("Reading input file {}".format(input_file))
        with open(input_file) as csv_file:
            rdr = csv.DictReader(csv_file)
            start_file = clock()
            try:
                for chunk in gen_chunks(rdr, 10000):
                    info.extend([(row['machine_id'], row['domain_name']) 
                                 for row in chunk if row['tran_flg'] == '1'])
                    info = list(set(info))
                process_info(info)
            except csv.Error as e:
                logger.error("Exception {}: {}".format(type(e), e))
                logger.error("Check Problems section in readme for known issues.")
                return 1                    
            check_time(start_file, "Done reading.")     
    check_time(start_file, "Done generating full link dict...")
    start_write = clock()
    write_to_file(step3_output, graf)
    check_time(start_write, "Done writing output file.")
    check_time(start3, "Done with Step 3.") 
    return 0
Exemplo n.º 3
0
def main():
    global retl

    args = cndi_lib.parse_cli_opts()
    arg_vals = args.step_to_run.split(',')

    if '1' in arg_vals or 'all' in arg_vals :
        retl.append(cndi_lib.step1(step1_input, step1_output, map_file))
        
    if '2' in arg_vals or 'all' in arg_vals :
        retl.append(cndi_lib.step2(step2_input, step2_output))

    if '3' in arg_vals or 'all' in arg_vals :
        retl.append(cndi_lib.step3(step2_input, step3_output))


if __name__ == "__main__":
    
    log_delimiter = "#"*20 + strftime("%a, %d %b %Y %X +0000", gmtime()) + "#"*10
    logger.debug("\n"*2 + log_delimiter + "\n") 
   
    main()

    if 1 in retl:
        logger.error("""
!!!
!!!Errors detected. Check above log or logfile for details.
!!!""")
    print "\nDebug log: '{}'\n".format(os.path.join(LOG_FILE_PATH, LOG_FILE))