Exemplo n.º 1
0
def splitData():
    for i in range(1,FILES+1):
        print '-'*50
        print 'Split dataset 1 %d: ' % i
        rfile = file(DATA_SET,'r')
        reader = csv.reader(rfile)
        j = i + 10
        if j != 32:
            train_file = file('splited_data/%d.csv'%i,'w')
            result_file = file('splited_data/%s_%d.csv'%('result',i),'w')
            train_writer = csv.writer(train_file)
            result_writer = csv.writer(result_file)
            for line in reader:
                progressBar(reader.line_ num,DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j: train_writer.writerow(line)
                if int(line[5]) == j and int(line[2]) == 4: result_writer.writerow([line[0],line[1]])
            train_file.close()
            result_file.close()
        else:
            train_file = file('splited_data/for_prediction.csv','w')
            train_writer = csv.writer(train_file)
            for line in reader:
                progressBar(reader.line_num,DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j: train_writer.writerow(line)
            train_file.close()
        rfile.close()
Exemplo n.º 2
0
def prepare_validation(filename):
    REPLICATE, DATES, INDIVIDUALS, WEIGHTED = 1, 2, 4, 8

    # Load the data, note the unique dates, replicates
    data = pd.read_csv(filename, header=None)
    dates = data[DATES].unique().tolist()
    replicates = data[REPLICATE].unique().tolist()

    # Calculate the 561H frequency for each date, replicate
    count, frequencies = 0, []
    for replicate in replicates:
        byReplicate = data[data[REPLICATE] == replicate]
        frequency = []
        for date in dates:
            byDate = byReplicate[byReplicate[DATES] == date]
            frequency.append(sum(byDate[WEIGHTED]) / sum(byDate[INDIVIDUALS]))
        if len(frequencies) != 0:
            frequencies = np.vstack((frequencies, frequency))
        else:
            frequencies = frequency

        # Update the progress bar
        count += 1
        progressBar(count, len(replicates))

    # Return the results
    return dates, frequencies
def report(title, dates, districtData):
    for district in rwanda.DISTRICTS:
        rwanda.plot_summary(title,
                            dates,
                            districtData[district],
                            district=district)
        progressBar(district, len(rwanda.DISTRICTS))
Exemplo n.º 4
0
def sampling():
    cutoffLine('*')
    print 'Sampling using EasyEnsemble method'
    start_time = time.time()

    TRAIN_SET = 'training_set'
    if not os.path.exists(TRAIN_SET): os.mkdir(TRAIN_SET)
    propotion = 10
    negative_size = POSITIVE * propotion
    r_file = file(PRE_DIR + '/negative_set.csv', 'r')
    reader = csv.reader(r_file)

    positive_set = readCSV(PRE_DIR + '/positive_set.csv', int)
    negative_set = []
    set_count = 0
    for line in reader:
        progressBar(reader.line_num, NEGATIVE)
        line = map(int, line)
        if line[-1] == 1: positive_set.append(line)
        if line[-1] == 0: negative_set.append(line)
        if len(negative_set) == negative_size or reader.line_num == NEGATIVE:
            set_count += 1
            training_set = positive_set + negative_set
            random.shuffle(training_set)
            file_name =  TRAIN_SET + '/' + '%d.csv'%set_count
            writeCSV(training_set, file_name)
            negative_set = []

    r_file.close()
    end_time = time.time()
    duration = timekeeper(start_time, end_time)
    cutoffLine('*')
    print 'It takes %s to sampling' % duration
Exemplo n.º 5
0
def splitData():
    cutoffLine('*')
    print 'Start split data with window %d' % WINDOW
    start_time = time.time()

    stat_file = file(PRE_DIR + '/stat.csv', 'w')
    stat_writer = csv.writer(stat_file)
    for i in range(1, FILES + 1):
        cutoffLine('-')
        print 'Split dataset %d/%d: ' % (i, FILES)
        rfile = file(DATA_SET, 'r')
        reader = csv.reader(rfile)
        j = i + WINDOW
        if j != TOTAL_DAY + 1:
            if j == TOTAL_DAY:
                train_file_name = 'test.csv'
                result_file_name = 'result_test.csv'
            else:
                train_file_name = '%d.csv' % i
                result_file_name = '%s_%d.csv' % ('result', i)
            train_file = file(PRE_DIR + '/' + train_file_name, 'w')
            result_file = file(PRE_DIR + '/' + result_file_name, 'w')
            train_writer = csv.writer(train_file)
            result_writer = csv.writer(result_file)
            train_count = 0
            result_count = 0
            for line in reader:
                progressBar(reader.line_num, DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
                if int(line[5]) == j and int(line[2]) == 4:
                    result_writer.writerow([line[0], line[1]])
                    result_count += 1
            stat_writer.writerow([train_file_name, train_count])
            stat_writer.writerow([result_file_name, result_count])
            train_file.close()
            result_file.close()
        else:
            forpredict_file_name = 'for_prediction.csv'
            train_file = file(PRE_DIR + '/' + forpredict_file_name, 'w')
            train_writer = csv.writer(train_file)
            train_count = 0
            for line in reader:
                progressBar(reader.line_num, DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
            stat_writer.writerow([forpredict_file_name, train_count])
            train_file.close()
        rfile.close()

    stat_file.close()
    end_time = time.time()
    duration = timekeeper(start_time, end_time)
    cutoffLine('-')
    print 'It takes ' + duration + ' to split dataset.'
    cutoffLine('*')
Exemplo n.º 6
0
def splitData():
    cutoffLine('*')
    print 'Start split data with window %d' % WINDOW
    start_time = time.time()

    stat_file = file(PRE_DIR + '/stat.csv','w')
    stat_writer = csv.writer(stat_file)
    for i in range(1,FILES+1):
        cutoffLine('-')
        print 'Split dataset %d/%d: ' % (i, FILES)
        rfile = file(DATA_SET,'r')
        reader = csv.reader(rfile)
        j = i + WINDOW
        if j != TOTAL_DAY + 1:
            if j == TOTAL_DAY:
                train_file_name = 'test.csv'
                result_file_name = 'result_test.csv'
            else:
                train_file_name = '%d.csv'%i
                result_file_name = '%s_%d.csv'%('result',i)
            train_file = file(PRE_DIR + '/' + train_file_name,'w')
            result_file = file(PRE_DIR + '/' + result_file_name,'w')
            train_writer = csv.writer(train_file)
            result_writer = csv.writer(result_file)
            train_count = 0
            result_count = 0
            for line in reader:
                progressBar(reader.line_num, DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
                if int(line[5]) == j and int(line[2]) == 4:
                    result_writer.writerow([line[0],line[1]])
                    result_count += 1
            stat_writer.writerow([train_file_name, train_count])
            stat_writer.writerow([result_file_name, result_count])
            train_file.close()
            result_file.close()
        else:
            forpredict_file_name = 'for_prediction.csv'
            train_file = file(PRE_DIR + '/' + forpredict_file_name,'w')
            train_writer = csv.writer(train_file)
            train_count = 0
            for line in reader:
                progressBar(reader.line_num,DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
            stat_writer.writerow([forpredict_file_name, train_count])
            train_file.close()
        rfile.close()

    stat_file.close()
    end_time = time.time()
    duration = timekeeper(start_time,end_time)
    cutoffLine('-')
    print 'It takes ' + duration + ' to split dataset.'
    cutoffLine('*')
Exemplo n.º 7
0
def process_datasets():
    print("Preparing data sets...")
    replicates = pd.read_csv(REPLICATES_LIST, header=None)
    configurations = replicates[2].unique()

    count = 0
    progressBar(count, len(configurations))
    for configuration in configurations:
        data = replicates[replicates[2] == configuration]
        filename = os.path.join(DATASET_DIRECTORY,
                                configuration.replace('yml', 'csv'))
        merge_data(data[-50:][3].to_numpy(), filename)
        count = count + 1
        progressBar(count, len(configurations))
def prepare(filename):
    REPLICATE, DATES, DISTRICT, INDIVIDUALS, WEIGHTED = 1, 2, 3, 4, 8

    # Load the data, note the unique dates, replicates
    data = pd.read_csv(filename, header=None)
    dates = data[DATES].unique().tolist()
    replicates = data[REPLICATE].unique().tolist()

    # Build the dictionary that will be used to store data
    districtData = {}
    for district in rwanda.DISTRICTS:
        districtData[district] = {}
        for key in rwanda.REPORT_LAYOUT:
            districtData[district][key] = []

    # Start by filtering by the replicate
    count = 0
    for replicate in replicates:
        byReplicate = data[data[REPLICATE] == replicate]

        # Load the relevent data for each district
        for district in rwanda.DISTRICTS:
            byDistrict = byReplicate[byReplicate[DISTRICT] == district]

            # Load the simple data
            for key in rwanda.REPORT_LAYOUT:
                if key != 'frequency':
                    # Append the basic information
                    index = rwanda.REPORT_LAYOUT[key][rwanda.REPORT_INDEX]
                    if len(districtData[district][key]) != 0:
                        districtData[district][key] = np.vstack(
                            (districtData[district][key], byDistrict[index]))
                    else:
                        districtData[district][key] = byDistrict[index]
                else:
                    # Append the 561H frequency data
                    frequency = byDistrict[WEIGHTED] / byDistrict[INDIVIDUALS]
                    if len(districtData[district][key]) != 0:
                        districtData[district][key] = np.vstack(
                            (districtData[district][key], frequency))
                    else:
                        districtData[district][key] = frequency

        # Update the progress bar
        count += 1
        progressBar(count, len(replicates))

    # Return the results
    return dates, districtData
Exemplo n.º 9
0
def main(method, filename, progress=True):
    # Load the relevent ASC data
    [ascHeader, pfpr] = load_asc("../../GIS/rwa_pfpr2to10.asc")
    [_, population] = load_asc("../../GIS/rwa_population.asc")
    [_, treatments] = load_asc("../../GIS/rwa_treatment.asc")

    beta = []
    for row in range(0, ascHeader['nrows']):
        beta.append([])
        for col in range(0, ascHeader['ncols']):

            # Append no data and continue
            if pfpr[row][col] == ascHeader['nodata']:
                beta[row].append(ascHeader['nodata'])
                continue

            # If PfPR is zero, then beta is zero
            if pfpr[row][col] == 0:
                beta[row].append(0)
                continue

            # Get the population bin, find the beta for the cell
            popBin = get_bin(population[row][col], POPULATION_BINS)
            target = round(pfpr[row][col] * 100.0, 8)
            result = method("data/calibration.csv", popBin,
                            treatments[row][col], target)

            # Check for errors before updating the array
            if result is None:
                sys.stderr.write("Null value returned for beta, exiting\n")
                sys.exit(1)
            if result < 0:
                sys.stderr.write(
                    "Projected beta {} is less than zero, exiting\n".format(
                        result))
                # Only exit if the debug flag isn't set
                if progress:
                    sys.exit(1)
            beta[row].append(result)

        # Note the progress
        if progress:
            progressBar(row + 1, ascHeader['nrows'])

    # Save the calculated beta values
    print("Saving {}".format(filename))
    write_asc(ascHeader, beta, filename)
Exemplo n.º 10
0
def prepare_national(filename):
    REPLICATE, DATES, INDIVIDUALS, WEIGHTED = 1, 2, 4, 8

    # Load the data, note the unique dates, replicates
    data = pd.read_csv(filename, header=None)
    dates = data[DATES].unique().tolist()
    replicates = data[REPLICATE].unique().tolist()

    # Build the dictionary for the results
    results = {}
    for key in rwanda.REPORT_LAYOUT:
        results[key] = []

    # Start by filtering by the replicate
    count = 0
    for replicate in replicates:
        byReplicate = data[data[REPLICATE] == replicate]

        # Prepare the data structure for this replicate
        values = {}
        for key in rwanda.REPORT_LAYOUT:
            values[key] = []

        # Next, filter by date so we can properly aggregate
        for date in dates:
            byDate = byReplicate[byReplicate[DATES] == date]
            for key in rwanda.REPORT_LAYOUT:
                if key != 'frequency':
                    index = rwanda.REPORT_LAYOUT[key][rwanda.REPORT_INDEX]
                    values[key].append(sum(byDate[index]))
                else:
                    values[key].append(
                        sum(byDate[WEIGHTED]) / sum(byDate[INDIVIDUALS]))

        # Append this replicate to our results
        for key in rwanda.REPORT_LAYOUT:
            if len(results[key]) != 0:
                results[key] = np.vstack((results[key], values[key]))
            else:
                results[key] = values[key]

        # Update the progress bar
        count += 1
        progressBar(count, len(replicates))

    # Return the results
    return dates, results
Exemplo n.º 11
0
def splitData():
    stat_file = file('splited_data/stat.csv','w')
    stat_writer = csv.writer(stat_file)
    for i in range(1,FILES+1):
        cutoffLine('-')
        print 'Split dataset %d: ' % i
        rfile = file(DATA_SET,'r')
        reader = csv.reader(rfile)
        j = i + 10
        if j != TOTAL_DAY + 1:
            if j == TOTAL_DAY:
                train_file_name = 'test.csv'
                result_file_name = 'result_test.csv'
            else:
                train_file_name = '%d.csv'%i
                result_file_name = '%s_%d.csv'%('result',i)
            train_file = file(PRE_DIR + '/' + train_file_name,'w')
            result_file = file(PRE_DIR + '/' + result_file_name,'w')
            train_writer = csv.writer(train_file)
            result_writer = csv.writer(result_file)
            train_count = 0
            result_count = 0
            for line in reader:
                progressBar(reader.line_num, DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
                if int(line[5]) == j and int(line[2]) == 4:
                    result_writer.writerow([line[0],line[1]])
                    result_count += 1
            stat_writer.writerow([train_file_name, train_count])
            stat_writer.writerow([result_file_name, result_count])
            train_file.close()
            result_file.close()
        else:
            forpredict_file_name = 'for_prediction.csv'
            train_file = file(PRE_DIR + '/' + forpredict_file_name,'w')
            train_writer = csv.writer(train_file)
            train_count = 0
            for line in reader:
                progressBar(reader.line_num,DATASET_SIZE)
                if int(line[5]) >= i and int(line[5]) < j:
                    train_writer.writerow(line)
                    train_count += 1
            stat_writer.writerow([forpredict_file_name, train_count])
            train_file.close()
        rfile.close()
Exemplo n.º 12
0
def process_annual_data(studyId):
    # Percentage of public market treatments, drawn from configuration
    # 3: Status Quo
    # 4: Rapid Private Market Elimination
    # 5: Ten Year Private Market Elimination
    # 6: Status Quo
    # 7: Rapid Private Market Elimination
    publicmarket = {
        3: {2025: 0.832, 2030: 0.832, 2035: 0.832},
        4: {2025: 1.0, 2030: 1.0, 2035: 1.0},
        5: {2025: 0.916, 2030: 1.0, 2035: 1.0},
        6: {2025: 0.832, 2030: 0.832, 2035: 0.832},
        7: {2025: 1.0, 2030: 1.0, 2035: 1.0}
    }

    # Range of dates for the year
    ranges = {
        2025: (6575, 6606, 6634, 6665, 6695, 6726, 6756, 6787, 6818, 6848, 6879, 6909),
        2030: (8401, 8432, 8460, 8491, 8521, 8552, 8582, 8613, 8644, 8674, 8705, 8735),
        2035: (10227, 10258, 10286, 10317, 10347, 10378, 10408, 10439, 10470, 10500, 10531, 10561)
    }

    # Exit if a study was not provided
    if studyId not in publicmarket.keys():
        print("Invalid study provided, {}".format(studyId))
        exit(1)

    # Let the user know what we are doing
    print("Processing annual data...")
    count = 0
    progressBar(count, len(ranges.keys()))
    
    for key in ranges.keys():
        # Query and save the data
        data = get_annual_data(studyId, ranges[key], publicmarket[studyId][key])
        with open("out/{}-{}-annual-data.csv".format(key, studyId), "wb") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["filename", "replicateid", "population", "cases", "reportedcases", "clinicalper1000", "pfpr2to10"])
            for row in data:
                writer.writerow(list(row))

        # Note the process
        count = count + 1
        progressBar(count, len(ranges.keys()))
Exemplo n.º 13
0
def extract_feature(file_name, line_count, start_date, result_name = ''):
    r_name = PRE_DIR + '/' + file_name
    w_name = PRE_DIR + '/set_' + file_name
    r_file = file(r_name, 'r')
    w_file = file(w_name, 'w')
    reader = csv.reader(r_file)
    writer = csv.writer(w_file)

    UI_feature = {}
    for line in reader:
        progressBar(reader.line_num, line_count)
        UI = (int(line[0]),int(line[1]))
        if UI not in UI_feature: UI_feature[UI] = [0]*40
        ##  0 ~  9 前十天浏览量统计
        ## 10 ~ 19 前十天收藏量统计
        ## 20 ~ 29 前十天加入购物车统计
        ## 30 ~ 39 前十天购买量统计
        index = int(line[5]) - start_date
        if int(line[2]) == 1 : UI_feature[UI][index] += 1
        if int(line[2]) == 2 : UI_feature[UI][10+index] += 1
        if int(line[2]) == 3 : UI_feature[UI][20+index] += 1
        if int(line[2]) == 4 : UI_feature[UI][30+index] += 1

    r_file.close()

    ## 打标签
    result_set = set()
    if result_name:
        r_name = PRE_DIR + '/' + result_name
        r_file = file(r_name, 'r')
        reader = csv.reader(r_file)
        for line in reader: result_set.add((int(line[0]), int(line[1])))
        r_file.close()

    if result_set:
        for UI in UI_feature:
            if UI in result_set:
                writer.writerow(list(UI) + UI_feature[UI] + [1])
            else:
                writer.writerow(list(UI) + UI_feature[UI] + [0])
    else:
        for UI in UI_feature: writer.writerow(list(UI) + UI_feature[UI])

    w_file.close()
Exemplo n.º 14
0
def process_summaries(replicates, burnIn, modelStartDate):
    # Update the user
    print("Processing {} replicate summaries...".format(len(replicates)))

    # Note the progress
    total = 0
    progressBar(total, len(replicates) + 1)

    # Iterate through all of the rows
    for replicate in replicates:

        # Only download complete summaries
        if replicate[COMPLETE] == False: continue

        # Check to see if the work has already been done
        filename = GENOTYPE_TEMPLATE.format(replicate[LABEL], replicate[REPLICATEID])
        if not os.path.exists(filename):
            save_genotype_summary(replicate[LABEL], replicate[REPLICATEID], burnIn)

        filename = TREATMENT_TEMPLATE.format(replicate[LABEL], replicate[REPLICATEID])
        if not os.path.exists(filename):
            save_treatment_summary(replicate[LABEL], replicate[REPLICATEID], burnIn)

        filename = FREQUENCIES_TEMPLATE.format(replicate[LABEL], replicate[REPLICATEID])
        if not os.path.exists(filename):
            save_genotype_frequencies(replicate[LABEL], replicate[REPLICATEID], burnIn, modelStartDate)
        
        # Note the progress
        total = total + 1
        progressBar(total, len(replicates) + 1)

    # Note that we are done
    progressBar(len(replicates) + 1, len(replicates) + 1) 
Exemplo n.º 15
0
def sampling(proportion):
    cutoffLine('*')
    start_time = time.time()
    print 'sampling with propotion %d...'%proportion
    negative_needed = POSITIVE * proportion
    sample_times = 10
    mod = NEGATIVE / sample_times
    negative_eachtime = negative_needed / sample_times

    training_set = readCSV(PRE_DIR + '/positive_set.csv', int)

    ## sampling negative example
    rfile = file(PRE_DIR + '/' + 'negative_set.csv', 'r')
    reader = csv.reader(rfile)
    negative_tmp = []
    for line in reader:
        progressBar(reader.line_num, NEGATIVE)
        negative_tmp.append(map(int, line))
        if reader.line_num % mod == 0:
            random.shuffle(negative_tmp)
            training_set = training_set + negative_tmp[0:negative_eachtime]
            negative_tmp = []
    rfile.close()

    wfile = file('data/training_set_%d.csv'%proportion, 'w')
    writer = csv.writer(wfile)
    random.shuffle(training_set)
    writer.writerows(training_set)
    wfile.close()

    cutoffLine('-')
    print "Real proportion: %f" %((len(training_set)-POSITIVE) / float(POSITIVE))
    cutoffLine('*')
    end_time = time.time()
    duration = timekeeper(start_time, end_time)
    print 'It takes %s to sampling with proportion %d'%(duration, proportion)
Exemplo n.º 16
0
def process_frequencies(replicates, subset):
    # If there are not replicates, then return
    if len(replicates) == 0: return

    # Update the user and note common data
    print("Processing {} replicate frequencies...".format(len(replicates)))
    nrows = replicates[0][1]
    ncols = replicates[0][2]
    
    # Note the progress
    total = 0
    progressBar(total, len(replicates) + 1)

    # Iterate through all of the rows
    currentRate = None
    for replicate in replicates:

        # Reset the replicate count on a new row
        if currentRate != replicate[0]:
            if currentRate is not None: 
                save_frequencies(data, currentRate)
                del data
            currentRate = replicate[0]
            data = {}

        # Run a short query to see if we have anything to work with
        for row in get_580y_frequency_subset(replicate[REPLICATEID], subset):
            days = row[0]
            if days not in data: data[days] = [[[0, 0, 0] for _ in range(nrows)] for _ in range(ncols)]
            c = row[1]
            r = row[2]

            # Array formatted as: 0 - infectedindividuals (query index 3)
            #                     1 - weightedoccurrences (query index 5)
            #                     3 - count
            data[days][r][c][0] += row[3]
            data[days][r][c][1] += row[5]
            data[days][r][c][2] += 1

        # Note the progress
        total = total + 1
        progressBar(total, len(replicates) + 1)

    # Save the last data set
    save_frequencies(data, currentRate)
    progressBar(total + 1, len(replicates) + 1) 
Exemplo n.º 17
0
def process_replicates():
    # Process the replicates to make sure we have all of the data we need locally

    print("Querying for replicates list...")
    replicates = get_replicates()
    save_csv(REPLICATES_LIST, replicates)

    print("Processing replicates...")
    count = 0
    progressBar(count, len(replicates))
    for row in replicates:
        # Check to see if we already have the data
        filename = os.path.join(REPLICATE_DIRECTORY, "{}.csv".format(row[3]))
        if os.path.exists(filename): continue

        # Query and store the data
        replicate = get_replicate(row[3])
        save_csv(filename, replicate)

        # Update the progress bar
        count = count + 1
        progressBar(count, len(replicates))

    if count != len(replicates): progressBar(len(replicates), len(replicates))
Exemplo n.º 18
0
def extract_feature(window, actday, file_name, line_count, start_date, result_name = ''):
    r_name = PRE_DIR + '/' + file_name
    w_name = PRE_DIR + '/set_' + file_name
    r_file = file(r_name, 'r')
    w_file = file(w_name, 'w')
    reader = csv.reader(r_file)
    writer = csv.writer(w_file)

    ## 统计同类商品排名,为了避免使用未来信息
    ci_rank = {}
    for c in ci_sale:
        ## 统计(actday-window)之前商品销量
        ci_rank[c] = {}
        for item in ci_sale[c]:
            ci_rank[c][item] = sum(ci_sale[c][item][0:actday-window])
        ## 销量排名;销量好的排名在后,方便处理没有销量的商品(设为0)
        rank_list = sorted(ci_rank[c].iteritems(), key = lambda x: x[1])
        for index, item in enumerate(rank_list):
            item = list(item)
            item[1] = index + 1
            rank_list[index] = item
        ci_rank[c] = dict(rank_list)

    UI_feature = {}
    for line in reader:
        progressBar(reader.line_num, line_count)
        UI = (int(line[0]),int(line[1]),int(line[4]))
        if UI not in UI_feature: UI_feature[UI] = [0]*(window*4)
        ## 4种行为统计
        index = int(line[5]) - start_date
        if int(line[2]) == 1 : UI_feature[UI][index] += 1
        if int(line[2]) == 2 : UI_feature[UI][window+index] += 1
        if int(line[2]) == 3 : UI_feature[UI][2*window+index] += 1
        if int(line[2]) == 4 : UI_feature[UI][3*window+index] += 1

    r_file.close()

    ## 商品同类排名
    for UI in UI_feature:
        if ci_rank[UI[2]].has_key(UI[1]): UI_feature[UI].append(ci_rank[UI[2]][UI[1]])
        else: UI_feature[UI].append(0)

    ## 打标签
    result_set = set()
    if result_name:
        r_name = PRE_DIR + '/' + result_name
        r_file = file(r_name, 'r')
        reader = csv.reader(r_file)
        for line in reader: result_set.add((int(line[0]), int(line[1])))
        r_file.close()

    if result_set:
        for UI in UI_feature:
            if (UI[0],UI[1]) in result_set:
                writer.writerow(list(UI) + UI_feature[UI] + [1])
            else:
                writer.writerow(list(UI) + UI_feature[UI] + [0])
    else:
        for UI in UI_feature: writer.writerow(list(UI) + UI_feature[UI])

    w_file.close()