示例#1
0
def read_main_df():
    # Reading main data into memory
    df = read_sv(return_as=pd.DataFrame,
                 path=find_path('up.csv'),
                 encoding='utf-8',
                 delimiter='\t',
                 header=True)

    assert 'FixationOOB' in df.columns
    # Mapping FixationOOB strings to numpy booleans
    OOBstatus = {'True': True, 'False': False}
    df['FixationOOB'] = df['FixationOOB'].replace(OOBstatus)

    convert_type(df,
                 Timestamp='int64', FixationDuration='int64',
                 MappedFixationPointX='int64', MappedFixationPointY='int64',
                 StimuliName='object')

    return df
示例#2
0
 def look_up_index(self, data, content, equal=True, greater=False, smaller=False):
     """
     Use binary search to get the data we want
     :return: a list of row ids or [may_exist, offset, data_size]
     """
     # first, we locate the content's position
     lines = data.splitlines()
     length = len(lines)
     low = int(0)
     high = int(length - 1)
     middle = int((high - low) / 2 + low)
     while low <= high:
         middle = int((high - low) / 2 + low)
         val_loc = lines[middle].split(self.long_sep)
         attr_value = val_loc[0]
         # Integer, Real, Text, Date, Boolean
         attr_value = utils.convert_type(attr_value, self.attr_type)
         if attr_value == content:
             break  # get the target block
         if attr_value < content:
             low = middle + 1
         else:
             high = middle - 1
     if low <= high:  # which means we "catch it" in this level
         # if compare == '<=' or compare == '>=' or compare == "=" or compare == ">":
         if equal or greater:
             # When compare is >, we cannot make sure the result, need further validation
             may_exist = True
             info = lines[middle].split(self.long_sep)[1]
         elif smaller:
             if middle == 0:
                 return [False, 0, 0]
             else:  # In fact, this is a special situation, we can determine which block to use directly
                 may_exist = True
                 info = lines[middle - 1].split(self.long_sep)[1]
         else:
             raise Exception("This situation should be resolved earlier")
         offset = info.split(self.short_sep)[0]
         data_length = info.split(self.short_sep)[1]
         return [may_exist, offset, data_length]
     else:  # Can't find the value in this level, but maybe we can roll down
         # if compare == '=' or compare == '<=' or compare == '>=' or compare == '>':
         if equal or greater:
             may_exist = True
         elif smaller:
             if high == -1:
                 may_exist = False
             else:
                 may_exist = True
         else:
             raise Exception("This situation should be resolved earlier")
         info = lines[high].split(self.long_sep)[1]
         offset = info.split(self.short_sep)[0]
         data_length = info.split(self.short_sep)[1]
         return [may_exist, offset, data_length]
示例#3
0
文件: read.py 项目: IvdBrandt/Visu
def read_main_df():
    # Reading main data into memory
    df = read_sv(return_as=pd.DataFrame,
                 path=find_path('up.csv'),
                 encoding='utf-8',
                 delimiter='\t',
                 header=True)

    assert 'FixationOOB' in df.columns

    for i, row in df.iterrows():
        df.at[i, 'FixationOOB'] = eval(df.at[i, 'FixationOOB'])

    convert_type(df,
                 Timestamp='int64',
                 FixationDuration='int64',
                 MappedFixationPointX='int64',
                 MappedFixationPointY='int64',
                 StimuliName='str',
                 FixationOOB='?')

    print('Read df as pd.DataFrame')

    return df
示例#4
0
 def create_level_index(self, level, input_list=None):
     """
     Generate the index file, level 0 for meta file, level 1 for primary index
     Others are sparse indexes
     """
     index_filename = utils.convert_filename(self.csv_file, FILE_TYPE_INDEX, attr=self.attr, level=level,
                                             create_dir=True)
     if level in self.index_dict:
         raise Exception("Found used level, check your code")
     self.index_dict[level] = index_filename
     if level == 0:
         pass
     elif level == 1:
         # create primary index
         df = pd.read_csv(self.csv_file, encoding='utf-8')
         tmp_list = list(df[self.attr])
         column_list = []
         for i, item in enumerate(tmp_list):
             column_list.append([utils.convert_type(item,self.attr_type), i])
         sorted_column = sorted(column_list)
         return self.save_index(index_filename, sorted_column)
     else:
         return self.save_index(index_filename, input_list, True)
示例#5
0
 def look_up_data(self, data, content, equal=True, greater=False, smaller=False):
     lines = data.splitlines()
     length = len(lines)
     low = int(0)
     high = int(length - 1)
     middle = int((high - low) / 2 + low)
     while low <= high:
         middle = int((high - low) / 2 + low)
         val_loc = lines[middle].split(self.long_sep)
         attr_value = val_loc[0]
         # Integer, Real, Text, Date, Boolean
         attr_value = utils.convert_type(attr_value, self.attr_type)
         if attr_value == content:
             break  # get the target block
         if attr_value < content:
             low = middle + 1
         else:
             high = middle - 1
     result_list = []
     if equal and smaller:
         if low <= high:  # catch it
             for i in range(0, middle + 1):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return result_list
         else:
             if high == -1:
                 return []
             else:
                 for i in range(0, high + 1):
                     info = lines[i].split(self.long_sep)[1]
                     result_list.append(info.split(self.short_sep))
                 return result_list
     elif equal and greater:
         if low <= high:
             for i in range(middle, len(lines)):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return result_list
         else:
             if low == len(lines):
                 return []
             for i in range(low, len(lines)):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return True, result_list
     elif equal:
         if low <= high:
             info = lines[middle].split(self.long_sep)[1]
             return info.split(self.short_sep)
         else:
             return []
     elif smaller:
         if low <= high:
             if middle == 0:
                 return []
             for i in range(0, middle):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return result_list
         else:
             if high == -1:
                 return []
             else:
                 for i in range(0, high + 1):
                     info = lines[i].split(self.long_sep)[1]
                     result_list.append(info.split(self.short_sep))
                 return result_list
     elif greater:
         if low <= high:
             if middle == len(lines) - 1:
                 return []
             for i in range(middle + 1, len(lines)):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return result_list
         else:
             if low == len(lines):
                 return []
             for i in range(low, len(lines)):
                 info = lines[i].split(self.long_sep)[1]
                 result_list.append(info.split(self.short_sep))
             return result_list
     else:
         raise Exception("This situation should be resolved earlier")