Exemplo n.º 1
0
def do_train_test_split(work_dir, filetype, uid2, uid3, max_dt2, max_dt3):
    print("=== Train Test Split ===")

    tmp = work_dir.split('/')
    tmp[1] = tmp[1] + "_trevte"
    to_path = "/".join(tmp)
    paths, tags = get_file_list(work_dir)
    try:
        rmt(to_path)
    except:
        pass
    finally:
        os.mkdir(to_path)
        os.mkdir("{}/train".format(to_path))
        os.mkdir("{}/eval".format(to_path))
        os.mkdir("{}/test".format(to_path))

    print("Working on dir: {}\nSave result to: {}".format(work_dir, to_path))
    #print(paths)
    for i, p in tqdm(enumerate(paths)):

        uid2, uid3, max_dt2, max_dt3 = train_test_split([p, tags[i], to_path],
                                                        filetype, uid2, uid3,
                                                        max_dt2, max_dt3)

    return uid2, uid3, max_dt2, max_dt3
Exemplo n.º 2
0
def rmtree(path):
    if not osp.exists(path):
        return

    try:
        rmt(path)
    except:
        pass
Exemplo n.º 3
0
def do_dt_shift(df, uids):
    # check sharing tmp folder
    try:
        rmt("./_tmp(all)")
    except:
        pass
    finally:
        os.mkdir("./_tmp(all)")

    print("Start dt shift...")

    for uid in tqdm(uids):
        dt_shift(df[df['UUID'] == uid].copy())

    print("Merging Result...")

    paths, tags = get_file_list("./_tmp(all)")

    print("DT Shifted, store in merged")

    return paths
Exemplo n.º 4
0
def rmtree(path):
    if osp.exists(path):
        rmt(path)
ecs_rank_list = "ECS_{}".format(rank[0])

# In[ ]:

### first merge each UUID data with day unit

# In[ ]:

paths, tags = get_file_list("./cs_shift_result/")
c_paths, c_tags = get_file_list("./csall_shift_result/")
e_paths, e_tags = get_file_list("./csecs_shift_result/")

# In[ ]:

try:
    rmt("./_days_cs")
    rmt("./_days_cs_all")
    rmt("./_days_cs_ecs")
except:
    pass
finally:
    os.mkdir("./_days_cs")
    os.mkdir("./_days_cs_all")
    os.mkdir("./_days_cs_ecs")

# In[ ]:


def do_date_join(date_uni, path, dataset):
    mer = pd.read_csv(path)
    domain = []
Exemplo n.º 6
0
def rmtree(path):
    if osp.exists(path):
        rmt(path)
Exemplo n.º 7
0
 def removeFolder(path):
     if not shutil.rmt(path):
         print ('path is removed successfully')
     else:
         print ('unable to remove the path')
Exemplo n.º 8
0
# In[9]:

pq_CS_all = pq_CS_all[[
    'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type'
]]
pq_history = pq_history[[
    'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type'
]]
pq_CS_ECS = pq_CS_ECS[[
    'UUID', 'cate', 'date', 'datetime', 'domain', 'dt', 'type'
]]

# In[11]:

try:
    rmt("./csall_shift_result")
    rmt("./cs_shift_result")
    rmt("./csecs_shift_result")
except:
    pass
finally:
    os.mkdir("./csall_shift_result")
    os.mkdir("./cs_shift_result")
    os.mkdir("./csecs_shift_result")

# In[26]:

time_bucket = {
    "L00_5S": Interval(0, 5 * 1e3, upper_closed=False),
    "L01_20S": Interval(5 * 1e3, 20 * 1e3, upper_closed=False),
    "L02_2M": Interval(20 * 1e3, 2 * 60 * 1e3, upper_closed=False),