Пример #1
0
def mean_error_dist(path):
    validation_id_traj = load.load_directory_trajectory(path + "training/")
    sim_id_traj = load.load_directory_trajectory(path + "sim/")
    markov_id_traj = load.load_directory_trajectory(path + "markovchain/")

    diff_list = []
    min_id = None

    validation_trajectoris = random.sample(validation_id_traj.values(), 20)
    trajectories = random.sample(markov_id_traj.values(), 20)

    count = 0

    for validation_id in validation_id_traj.keys():
        count += 1
        if count == 50:
            break
        min_dist = sys.maxint
        dist = traj_dist(
            (random.choice(trajectories), validation_id_traj[validation_id]))
        for trajectory in trajectories:
            dist = traj_dist((trajectory, validation_id_traj[validation_id]))
            if dist < min_dist:
                min_dist = dist
                min_id = validation_id
                print dist

        diff_list.append(dist)

        validation_id_traj.pop(min_id)

    diff_list.sort()

    print diff_list
    print np.average(diff_list)
Пример #2
0
def main():

    path = "D:/training data/KDDI/#201111.CDR-data/abf7380g/"

    train_traj = load.load_directory_trajectory(path + "slot/")

    traj_visual(train_traj)
Пример #3
0
def evaluation(path):
    id_traj = load.load_directory_trajectory(
        path + "training/")  # validation directory

    files = os.listdir(path + "param/")

    for filename in files:
        parampath = path + "param/" + filename
        if not os.path.isdir(parampath):

            trajectories = random.sample(id_traj.values(), 500)

            g = load.load_graph_traj(trajectories)
            gw = gridworld.Gridworld(g, 0.9)
            feature_matrix = gw.feature_matrix(g)

            t_alpha = {}
            with open(parampath, 'r') as f:
                t = 12
                for line in f:
                    line = line.strip('\n')
                    tokens = line.split(",")
                    param = np.zeros(11)
                    for j in range(11):
                        if len(tokens) > j:
                            param[j] = tokens[j]
                    t_alpha[t] = param.copy()
                    t += 1

            r = dict()
            for t in range(12, 48):
                r[t] = dict().fromkeys(g.get_edges(), 0)

            for edge in g.get_edges():
                for t in range(12, 48):
                    if t in t_alpha.keys():
                        r[t][edge] = feature_matrix[edge].dot(t_alpha[t])

            print "#######################################################"
            policy = irl.value_iteration.find_temporal_policy(g,
                                                              r,
                                                              0.9,
                                                              46,
                                                              stochastic=True)

            nll = irl_nll(policy, trajectories)
            m_nll = markov_nll(trajectories)

            print len(trajectories), nll, m_nll
Пример #4
0
def main():
    try:
        starttime = datetime.datetime.now()

        id_traj = load.load_directory_trajectory("D:/PT_Result/trajectory/")

        with open("C:/Users/PangYanbo/Desktop/Tokyo/Census5339/2015meshpop.csv"
                  ) as f:
            title = f.readline()
            for line in f.readlines():

                print "#############################"
                print line
                line = line.strip('\n')
                tokens = line.split(',')
                mesh_id = tokens[0]

                trajectories = []

                for uid in id_traj:
                    if 12 in id_traj[uid].keys():
                        if id_traj[uid][12][0].__eq__(mesh_id):
                            trajectories.append(id_traj[uid])

                if len(trajectories) == 0:
                    continue

                jobless = int(tokens[1])
                workers = int(tokens[2])
                students = int(tokens[3])

                jobless_path = "D:/PT_Result/others/"
                simulation(trajectories, jobless_path, mesh_id, jobless)

                workers_path = "D:/PT_Result/commuter/"
                simulation(trajectories, workers_path, mesh_id, workers)

                students_path = "D:/PT_Result/student/"
                simulation(trajectories, students_path, mesh_id, students)

        endtime = datetime.datetime.now()

        print endtime - starttime

    except Exception:
        print "main class wrong"
        raise
Пример #5
0
def main(date, discount, epochs, learning_rate, train=True):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()
        path = "D:/ClosePFLOW/53393575/"

        if not os.path.exists(path + "sim/"):
            os.mkdir(path + "sim/")

        if not os.path.exists(path + "param/"):
            os.mkdir(path + "param/")

        tools.move_files(path)

        if os.path.exists(path + "training/"):
            id_traj = load.load_directory_trajectory(path + "training/")

            # parameter set numbers
            for i in range(26):
                trajectories = random.sample(id_traj.values(), 50)
                g = load.load_graph_traj(trajectories)
                gw = gridworld.Gridworld(g, discount)
                feature_matrix = gw.feature_matrix(g)

                # train#
                print "training ", path
                maxent.t_irl(g, feature_matrix, trajectories, epochs,
                             learning_rate, path + "param/" + str(i))

        endtime = datetime.datetime.now()

        print "finished reading files with time of" + str(endtime - starttime)
    except Exception:
        print "mian class wrong"
        raise
Пример #6
0
def simulation(path):
    id_traj = load.load_directory_trajectory(path + "slot/")

    files = os.listdir(path + "param/")

    if not os.path.exists(path + "sim/"):
        os.mkdir(path + "sim/")

    for filename in files:
        parampath = path + "param/" + filename
        if not os.path.isdir(parampath):

            trajectories = id_traj.values()
            g = load.load_graph_traj(trajectories)
            gw = gridworld.Gridworld(g, 0.9)
            feature_matrix = gw.feature_matrix(g)

            t_alpha = {}
            with open(parampath, 'r') as f:
                t = 12
                for line in f:
                    line = line.strip('\n')
                    tokens = line.split(",")
                    param = numpy.zeros(11)
                    for j in range(11):
                        if len(tokens) > j:
                            param[j] = tokens[j]
                    t_alpha[t] = param.copy()
                    t += 1

            r = dict()
            for t in range(12, 48):
                r[t] = dict().fromkeys(g.get_edges(), 0)

            for edge in g.get_edges():
                for t in range(12, 48):
                    if t in t_alpha.keys():
                        r[t][edge] = feature_matrix[edge].dot(t_alpha[t])

            start = "53397561"
            tools.generate_temporal_traj(g, r, start, 0.5, path + "sim/",
                                         filename[0:2])
Пример #7
0
def main(target):
    path_observed = "D:/PT_Result/" + target + "/"
    id_traj = load.load_directory_trajectory(path_observed + "training/")

    if not os.path.exists(path_observed + "markovchain/"):
        os.mkdir(path_observed + "markovchain/")

    for i in range(36):
        path = "D:/PT_Result/" + target + "/" + '/markovchain/' + str(i) + '.csv'
        trajectories = random.sample(id_traj.values(), 50)

        initial = []

        for traj in trajectories:
            if 12 in traj.keys():
                initial.append(traj[12][1].get_origin())

        pairs = makepairs(trajectories)

        cfd = nltk.ConditionalFreqDist(pairs)

        generate(cfd, path, random.choice(initial))
        print "###################################"
Пример #8
0
def main(discount, epochs, learning_rate):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()
        path = "D:/training data/KDDI/#201111.CDR-data/abf7380g/slot/"

        id_traj = load.load_directory_trajectory(path)

        print len(id_traj)

        trajectories = random.sample(id_traj.values(), 20)
        g = load.load_graph_traj(trajectories)
        gw = gridworld.Gridworld(g, discount)
        feature_matrix = gw.feature_matrix(g)

        # train#
        print("training ", path)

        if not os.path.exists(path + "param/"):
            os.mkdir(path + "param/")

        maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate,
                     path + "param/")

        endtime = datetime.datetime.now()

        print("finished reading files with time of" + str(endtime - starttime))
    except Exception:
        print("mian class wrong")
        raise
Пример #9
0
            mode = trajectory[slot][1].get_mode()
            temp = (origin, destination)
            pairs.append(temp)
    return pairs


def generate(cfd, path, word='53393574', num=36):
    out = open(path, 'w')
    for i in range(num):
        # make an array with the words shown by proper count
        arr = []
        for j in cfd[word]:
            for k in range(cfd[word][j]):
                arr.append(j)

        # choose the word randomly from the conditional distribution
        word = arr[int((len(arr)) * random.random())]
        out.write(str(12 + i) + ',' + word + '\n')
    out.close()


path_observed = "D:/ClosePFLOW/53393574/training/"
id_traj = load.load_directory_trajectory(path_observed)

for i in range(36):
    path = 'D:/ClosePFLOW/53393574/markovchain/' + str(i) + '.csv'
    trajectories = random.sample(id_traj.values(), 50)
    pairs = makePairs(trajectories)
    cfd = nltk.ConditionalFreqDist(pairs)
    generate(cfd, path)
Пример #10
0
def main():
    root = "/home/ubuntu/Data/pflow_data/pflow-csv/"

    mesh_list = read_list("/home/ubuntu/Data/Tokyo/MeshCode/Tokyo.csv")

    list_dirs = os.walk(root)
    count = 0
    print mesh_list
    for root, dirs, files in list_dirs:
        with open("/home/ubuntu/Data/PT_Result/exp1/result.csv", "w") as f:
            for d in dirs:
                if d in mesh_list:
                    file_list = os.listdir(os.path.join(root, d))
                    if len(file_list) > 100 and "train_irl.csv" in file_list:
                        count += 1
                        id_traj = load.load_trajectory(os.path.join(root, d) + "/train_irl.csv")

                        train, validation = train_test_split(id_traj.values(), test_size=0.4)

                        g = load.load_graph_traj(train)
                        gw = gridworld.Gridworld(g, 0.9)
                        feature_matrix = gw.feature_matrix(g)

                        path = "/home/ubuntu/Data/PT_Result/exp1/"

                        # train
                        if not os.path.exists(path + "parameter/" + d + "param.csv"):
                            maxent.t_irl(g, feature_matrix, train, 200, 0.2, path + "parameter/" + d)

                        # simulation

                        t_alpha = read_param(path + "parameter/" + os.listdir(path+"parameter/")[0])

                        r = dict()
                        for t in range(12, 48):
                            r[t] = dict().fromkeys(g.get_edges(), 0)

                        for edge in g.get_edges():
                            for t in range(12, 48):
                                if t in t_alpha.keys():
                                    r[t][edge] = feature_matrix[edge].dot(t_alpha[t])

                        if not os.path.exists(path + "sim/" + d + "/"):
                            os.mkdir(path + "sim/" + d + "/")

                        for i in range(80):
                            tools.generate_temporal_traj(g, r, d, 0.5, path + "sim/" + d + "/", d + "_" + str(i))

                        # markov chain
                        if not os.path.exists(path + "markov/" + d + "/"):
                            os.mkdir(path + "markov/" + d + "/")

                        for i in range(80):
                            pairs = makepairs(train)

                            cfd = nltk.ConditionalFreqDist(pairs)

                            generate(cfd, str(i), path + "markov/" + d + "/" + str(i) + ".csv", d)

                        # expansion validation
                        expansion10_trajecotry = random.sample(train, int(len(train)*0.1))

                        diff_list = []

                        for validation_traj in validation:
                            min_dist = sys.maxint
                            for traj in expansion10_trajecotry:
                                dist = traj_dist((traj, validation_traj))

                                if dist < min_dist:
                                    min_dist = dist

                            diff_list.append(min_dist)

                        expansion10_score = np.average(diff_list)

                        expansion50_trajecotry = random.sample(train, int(len(train) * 0.5))

                        diff_list = []

                        for validation_traj in validation:
                            min_dist = sys.maxint
                            for traj in expansion50_trajecotry:
                                dist = traj_dist((traj, validation_traj))

                                if dist < min_dist:
                                    min_dist = dist

                            diff_list.append(min_dist)

                        expansion50_score = np.average(diff_list)

                        # validation

                        markov_id_traj = load.load_directory_trajectory(path + "markov/" + d + "/")

                        diff_list = []

                        print markov_id_traj.keys()
                        for traj in validation:
                            min_dist = sys.maxint
                            for markov_id in markov_id_traj.keys():

                                dist = traj_dist((traj, markov_id_traj[markov_id]))

                                if dist < min_dist:
                                    min_dist = dist
                            diff_list.append(min_dist)

                        markov_score = np.average(diff_list)

                        sim_id_traj = load.load_directory_trajectory(path + "sim/" + d + "/")

                        diff_list = []

                        for traj in validation:
                            min_dist = sys.maxint
                            for sim_id in sim_id_traj.keys():
                                dist = traj_dist((traj, sim_id_traj[sim_id]))

                                if dist < min_dist:
                                    min_dist = dist
                            if min_dist > 10:
                                continue
                            diff_list.append(min_dist)

                        sim_score = np.average(diff_list)

                        print d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score)
                        f.write(d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score))
                        f.write("\n")

                        if count > 80:
                            f.close()
Пример #11
0
def main():

    path_sim = "D:/ClosePFLOW/53393574/sim/"
    path_validation = "D:/ClosePFLOW/53393574/validation/"
    path_observed = "D:/PT_Result/commuter/sim/"

    if not os.path.exists("D:/ClosePFLOW/53393574/comparison/"):
        os.mkdir("D:/ClosePFLOW/53393574/comparison/")

    observed_id_traj = load.load_directory_trajectory(path_observed)
    sim_id_traj = load.load_directory_trajectory(path_sim)
    validation_id_traj = load.load_directory_trajectory(path_validation)

    markov_id_traj = load_markovchain("D:/ClosePFLOW/53393574/markovchain/")
    print len(markov_id_traj)
    # diff_list = []
    # min_id = None
    # trajectories = random.sample(observed_id_traj.values(), 10)
    # for validation_id in validation_id_traj.keys():
    #
    #     min_dist = sys.maxint
    #     dist = traj_dist((random.choice(trajectories), validation_id_traj[validation_id]))
    #     for trajectory in trajectories:
    #         dist = traj_dist((trajectory, validation_id_traj[validation_id]))
    #         if dist < min_dist:
    #             min_dist = dist
    #             min_id = validation_id
    #
    #     diff_list.append(dist)
    #
    #     validation_id_traj.pop(min_id)
    #
    # diff_list.sort()
    #
    # print diff_list
    # print np.average(diff_list)








    fig = plt.figure()

    ax = fig.add_subplot(111, projection='3d')

    count = 0
    for uid in observed_id_traj.keys():
        count += 1
        if count > 80:
            break
        X = []
        Y = []
        Z = []

        trajectory = observed_id_traj[uid]
        # print "trajectory", trajectory
        # print "adjusted trajectory", adjust_traj(trajectory)
        for i in range(12, 47):
            if i in trajectory.keys():
                x, y = tools.parse_MeshCode(trajectory[i][0])
                X.append(x)
                Y.append(y)
                Z.append(i-12)
        ax.plot(X, Y, Z)
    ax.set_zlabel('Hour')
    ax.set_ylabel('Lat')
    ax.set_xlabel('Lon')
    plt.show()


    # for uid in validation_id_traj.keys():
    #     X = []
    #     Y = []
    #     Z = []
    #
    #
    #     trajectory = validation_id_traj[uid]
    #     # print "trajectory", trajectory
    #     # print "adjusted trajectory", adjust_traj(trajectory)
    #     for i in range(12, 47):
    #         if i in trajectory.keys():
    #             x, y = tools.parse_MeshCode(trajectory[i][0])
    #             X.append(x)
    #             Y.append(y)
    #             Z.append(i-12)
    #     ax.plot(X, Y, Z)
    #
    # plt.show()
    #
    # for uid in sim_id_traj.keys():
    #     X = []
    #     Y = []
    #     Z = []
    #     trajectory = sim_id_traj[uid]
    #     for i in range(12, 46):
    #         if i in trajectory.keys():
    #             x, y = tools.parse_MeshCode(trajectory[i][0])
    #             X.append(x)
    #             Y.append(y)
    #             Z.append(i-12)
    #     ax.plot(X, Y, Z, color="blue")
    #
    # plt.show()
    diff_list = []
    #
    # for ob_id in observed_id_traj:
    #     fig = plt.figure()
    #
    #     ax = fig.add_subplot(111, projection='3d')
    #
    #     min_dist = sys.maxint
    #     for validation_id in validation_id_traj.keys():
    #         dist = traj_dist((observed_id_traj[ob_id], validation_id_traj[validation_id]))
    #         if dist < min_dist:
    #             min_dist = dist
    #             min_id = validation_id
    #
    #     diff_list.append(min_dist)
    #     if min_id in validation_id_traj.keys():
    #         X = []
    #         Y = []
    #         Z = []
    #         trajectory = validation_id_traj[min_id]
    #         # print "trajectory", trajectory
    #         # print "adjusted trajectory", adjust_traj(trajectory)
    #         for i in range(12, 47):
    #             if i in trajectory.keys():
    #                 x, y = tools.parse_MeshCode(trajectory[i][0])
    #                 print x, y
    #                 X.append(x)
    #                 Y.append(y)
    #                 Z.append(i - 12)
    #         ax.plot(X, Y, Z, color="red")
    #
    #         X = []
    #         Y = []
    #         Z = []
    #         trajectory = observed_id_traj[ob_id]
    #         for i in range(12, 46):
    #             if i in trajectory.keys():
    #                 x, y = tools.parse_MeshCode(trajectory[i][0])
    #                 X.append(x)
    #                 Y.append(y)
    #                 Z.append(i - 12)
    #         ax.plot(X, Y, Z, color="blue")
    #
    #         ax.set_zlabel('time')
    #         ax.set_ylabel('lat')
    #         ax.set_xlabel('lon')
    #         plt.show()
    #
    #         validation_id_traj.pop(min_id)
    #
    # diff_list.sort()
    #
    # print len(diff_list)
    #
    # x = range(len(diff_list))
    # plt.plot(x, diff_list)
    # plt.show()

    diff_list = []

    for sim_id in sim_id_traj:
        fig = plt.figure()

        ax = fig.add_subplot(111, projection='3d')

        min_dist = sys.maxint
        for validation_id in validation_id_traj.keys():
            dist = traj_dist((sim_id_traj[sim_id], validation_id_traj[validation_id]))
            if dist < min_dist:
                min_dist = dist
                min_id = validation_id

        diff_list.append(min_dist)
        print min_id
        if min_id in validation_id_traj.keys():

            X = []
            Y = []
            Z = []
            trajectory = validation_id_traj[min_id]
            # print "trajectory", trajectory
            # print "adjusted trajectory", adjust_traj(trajectory)
            for i in range(12, 47):
                if i in trajectory.keys():
                    x, y = tools.parse_MeshCode(trajectory[i][0])
                    X.append(x)
                    Y.append(y)
                    Z.append(i - 12)
            ax.plot(X, Y, Z, color="red", linewidth=3.5)

            X = []
            Y = []
            Z = []
            trajectory = sim_id_traj[sim_id]
            for i in range(12, 46):
                print trajectory.keys()
                if i in trajectory.keys():
                    x, y = tools.parse_MeshCode(trajectory[i][0])
                    print i,";;;;;;;;;;;;;;;"
                    print trajectory[i]
                    X.append(x)
                    Y.append(y)
                    Z.append(i - 12)
            ax.plot(X, Y, Z, color="blue", linewidth=3.5)
            ax.set_zlabel('time')
            ax.set_ylabel('lat')
            ax.set_xlabel('lon')
            for tick in ax.xaxis.get_major_ticks():
                tick.label1.set_fontsize(6)
            for tick in ax.yaxis.get_major_ticks():
                tick.label1.set_fontsize(6)
            for tick in ax.zaxis.get_major_ticks():
                tick.label1.set_fontsize(6)
            ax.xaxis.set_ticks_position('none')
            plt.xlim((139, 140))
            plt.ylim((35.3, 36))
            plt.title(min_dist)
            plt.savefig("D:/ClosePFLOW/53393574/comparison/traj_compare"+sim_id+"_"+".png")
            plt.show()

            validation_id_traj.pop(min_id)

    diff_list.sort()

    print np.average(diff_list)
Пример #12
0
def main(date, discount, epochs, learning_rate, train=True):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """
    """
    # this part is used for calculate uniform reward parameter

    id_trajectory = load.load_trajectory(10000)

    print tools.motion_model_policy(id_trajectory)

    for i in range(1000):
        graph_trajectories = tools.choose_trajectory(1000, id_trajectory)

        g = load.load_graph_traj(graph_trajectories)

        sample_trajectories = sample(graph_trajectories, 100)

        gw = gridworld.Gridworld(g, 0.9)

        feature_matrix = gw.feature_matrix(g)

        alpha = maxent.irl(g, feature_matrix, sample_trajectories, 40, 0.05)

        path = str("D:/Ubicomp/alpha" + str(i) + ".txt")

        numpy.savetxt(path, alpha)

    """
    """
    this part is usedfor temporal reward parameter training
    """

    try:
        starttime = datetime.datetime.now()
        path = "D:/ClosePFLOW/"

        dirs = os.listdir(path)

        for dirname in dirs:
            directory = path + dirname + "/"
            print directory

            if not os.path.exists(directory + "sim/"):
                os.mkdir(directory + "sim/")

            tools.move_files(directory)

            if os.path.exists(directory + "training/"):
                id_traj = load.load_directory_trajectory(directory +
                                                         "training/")
                if (len(id_traj) >= 40
                        and not os.path.exists(directory + "param.csv")
                    ) or os.path.getsize(directory + "param.csv") > 2038:
                    trajectories = id_traj.values()
                    g = load.load_graph_traj(trajectories)
                    gw = gridworld.Gridworld(g, discount)
                    feature_matrix = gw.feature_matrix(g)

                    # train#
                    print "training ", directory
                    maxent.t_irl(g, feature_matrix, trajectories, epochs,
                                 learning_rate, directory)

        indicator = 0
        i = 0

        while indicator <= 5000:
            sample_id = []
            trajectories = []
            for k in range(indicator, indicator + 100):
                sample_id.append(id_list[k])

            for sid in sample_id:
                trajectories.append(id_traj.get(sid))

            start_state = []

            for traj in trajectories:
                start_state.append(traj[12][0])

            training_data = "C:/Users/PangYanbo/Desktop/UbiResult/TrainingTrajectoriesGroup_" + str(
                i) + ".csv"

            with open(training_data, "wb") as f:
                for k in range(100):
                    for j in range(12, 47):
                        if j in trajectories[k].keys():
                            f.write(
                                str(j) + ',' +
                                trajectories[k][j][1].get_origin() + ',' +
                                trajectories[k][j][1].get_destination() + ',' +
                                trajectories[k][j][1].get_mode() + '\n')

            # initial environment based on trajectories

            g = load.load_graph_traj(trajectories)
            gw = gridworld.Gridworld(g, discount)
            feature_matrix = gw.feature_matrix(g)

            print g

            if train:

                # training the model

                maxent.t_irl(g, feature_matrix, trajectories, epochs,
                             learning_rate, date)
            else:

                # simulation

                for start in start_state:

                    # read alpha from saved file
                    root = "C:/Users/PangYanbo/Desktop/UbiResult/param/"
                    para_list = list(
                        os.path.join(root, name) for name in os.listdir(root))
                    for filename in para_list:
                        if os.path.isdir(filename):
                            para_list.remove(filename)

                    param_path = random.choice(para_list)

                    agent_id = param_path[43:-4]

                    print agent_id, param_path

                    t_alpha = {}
                    with open(param_path, 'r') as f:
                        t = 12
                        for line in f:
                            line = line.strip('\n')
                            tokens = line.split(",")
                            param = numpy.zeros(11)
                            for j in range(11):
                                if len(tokens) > j:
                                    param[j] = tokens[j]
                            t_alpha[t] = param.copy()
                            t += 1

                    r = dict()
                    for t in range(12, 48):
                        r[t] = dict().fromkeys(g.get_edges(), 0)

                    for edge in g.get_edges():
                        for t in range(12, 48):
                            if t in t_alpha.keys():
                                r[t][edge] = feature_matrix[edge].dot(
                                    t_alpha[t])
                    tools.generate_temporal_traj(g, r, start, 0.5, i, agent_id)

            i += 1
            indicator += 50

        endtime = datetime.datetime.now()

        print "finished reading files with time of" + str(endtime - starttime)
    except Exception:
        print "something wrong"
        raise