示例#1
0
文件: dataUtils.py 项目: LuMelon/ERD
def sortTempList(temp_list):
    time = np.array([item[0] for item in temp_list])
    posts = np.array([item[1] for item in temp_list])
    idxs = time.argsort().tolist()
    rst = [[t, p] for (t, p) in zip(time[idxs], posts[idxs])]
    del time, posts
    return rst
示例#2
0
def sort_by_date(data, tpos=2):
    """
    sort the data by time, assuming the second element is the time
    input:  data    --- a list of string data
            tpos    --- the position of the time data
    output: numpy array of 7 x m dimention
        note: assume that the second element is time of the format of 2007-05-07T11:59:59
    """
    mdata = [[], [], [], [], [], [], []]
    time = []
    for ent in data:
        atemp = re.split('\s+', ent)
        for k in range(0, 7):
            mdata[k].append(atemp[k])
        time.append(convert_time(atemp[tpos]))

    mdata = numpy.array(mdata)
    time = numpy.array(time)

    tind = time.argsort()

    for k in range(0, 7):
        mdata[k] = mdata[k][tind[::]]

    return mdata
示例#3
0
                  help="Input HDF file.")

parser.add_option("-p", "--purge", dest="purge",
                  action=count, default=0,
                  help="remove entries that might be duplicates")

(options, args) = parser.parse_args()

if len(args) and options.inputFile == None:
    options.inputFile = args[0]

if options.debug:
    print options

fh = tables.openFile(options.input, mode = "a") 

for tbl in fh.iterNodes("/"):
    print "Sorting", tbl
    time=tbl.col('time')
    val=tbl.col('val')
    ts=time.argsort()
    time=time.take(ts)
    val=val.take(ts)
    tbl.modifyColumns(names=('time', 'val'), columns=(time, val))

    if options.purge:
        print "Purging duplicates from", tbl
        print "code me"

fh.close()