def load_survey1(request, survey_filename, map_filename): if map_filename != '': survey.qa = survey.qa_map(map_filename) survey_name = os.path.splitext(survey_filename)[0].split('-', 1)[0].strip() ml_file = 'data/' + survey_filename converters = {} survey_df = pd.read_csv(ml_file, sep=';', encoding='ISO-8859-1', low_memory=False, dtype=object) # col_map[column]: (field, question, answer, dashboard) # field_map[field]: [question=0, answer=1, column=2, field_type=3)] field_map, col_map, header_map = survey.map_columns( survey_name, survey_df.columns) converters = {} for col, map in col_map.items(): if map[3] == 'text': converters[col] = str survey_df = pd.read_csv(ml_file, sep=';', encoding='ISO-8859-1', low_memory=False, converters=converters) survey_df.fillna(0, inplace=True) survey_df.index = survey_df[field_map['resp_id'][0][2]] bulk_data = [] count = 0 total_count = 0 for resp_id, survey_s in survey_df.iterrows(): resp_id = survey.answer_value_to_string( survey_s[field_map['resp_id'][0][2]]) blindcode = survey.answer_value_to_string( survey_s[field_map['blindcode'][0][2]]) #sl = models.SurveyMap() #sl.resp_id = resp_id+"_"+blindcode #sl.survey = survey_name data = {} #data['_id'] = resp_id+"_"+blindcode #data['resp_id'] = resp_id+"_"+blindcode #data['survey'] = survey_name for field, maps in field_map.items(): # resp_id is the unique id of the record, this is already set above #if field == 'resp_id': # continue # map: 0=question, 1=answer, 2=column, 3=field_type, 4=keys map = maps[0] answer_value = survey_s[map[2]] answer_value = survey.answer_value_to_string(answer_value) answer_value = survey.answer_value_encode(map[0], map[1], field, answer_value) answer_values = [answer_value] # column mapping, no question if map[0] == None: # in case of multiple mapping search for the column that has a value for ix in range(1, len(maps)): map = maps[ix] answer_value_2 = survey_s[map[2]] answer_value_2 = survey.answer_value_to_string( answer_value_2) answer_values.append(answer_value_2) if (field == 'blindcode'): if answer_value_2 != '': answer_value = answer_value + '-' + answer_value_2[: 3] else: if len(answer_value_2) > len(answer_value): answer_value = answer_value_2 if map[3] == 'dict': answer_value = survey.answer_values_dict( answer_values, map[4]) #setattr(sl, field, answer_value) elastic.convert_field(data, field, map, answer_value) # question mapping, no answer elif map[1][0] == '_': #setattr(sl, field, answer_value) elastic.convert_field(data, field, map, answer_value) # answer mapping else: #setattr(sl, field, {map[1]: answer_value}) #attr = getattr(sl, field) for ix in range(0, len(maps)): map = maps[ix] answer_value = survey_s[map[2]] answer_value = survey.answer_value_to_string(answer_value) answer_value = survey.answer_value_encode( map[0], map[1], field, answer_value) #attr[map[1]] = answer_value ##attr.append({map[1]: answer_value}) elastic.convert_field(data, field, map, answer_value) #data = elastic.convert_for_bulk(sl, 'update') survey.map_header(request, survey_name, data) data['_id'] = survey.map_id(survey_name, data) data = elastic.convert_data_for_bulk(data, 'survey', 'survey', 'update') bulk_data.append(data) count = count + 1 if count > 100: bulk(models.client, actions=bulk_data, stats_only=True) total_count = total_count + count print("crawl_survey: written another batch, total written {0:d}". format(total_count)) bulk_data = [] count = 1 #break bulk(models.client, actions=bulk_data, stats_only=True) pass
def crawl_survey(survey_filename): ml_file = 'data/' + survey_filename survey_df = pd.read_csv(ml_file, sep=';', encoding='ISO-8859-1', low_memory=False) survey_df.fillna(0, inplace=True) # col_map[column]: (field, question, answer, dashboard) # field_map[field]: [question=0, answer=1, column=2)] field_map, col_map = survey.map_columns(survey_df.columns) survey_df.index = survey_df[field_map['resp_id'][0][2]] bulk_data = [] count = 0 total_count = 0 for resp_id, survey_s in survey_df.iterrows(): sl = models.SurveyMap() resp_id = survey.answer_value_to_string( survey_s[field_map['resp_id'][0][2]]) blindcode = survey.answer_value_to_string( survey_s[field_map['blindcode'][0][2]]) sl.resp_id = resp_id + "_" + blindcode sl.survey = survey_filename sl.children = {} sl.concept = {} sl.emotion = {} sl.fragrattr = {} sl.mood = {} sl.smell = {} sl.suitable_product = {} sl.suitable_stage = {} sl.question = {} for field, maps in field_map.items(): # resp_id is the unique id of the record, this is already set above if field == 'resp_id': continue # map: 0=answer, 1=column map = maps[0] answer_value = survey_s[map[2]] answer_value = survey.answer_value_to_string(answer_value) answer_value = survey.answer_value_encode(map[1], answer_value) # column mapping, no question if map[0] == None: # in case of multiple mapping search for the column that has a value for ix in range(1, len(maps)): map = maps[ix] answer_value_2 = survey_s[map[2]] answer_value_2 = survey.answer_value_to_string( answer_value_2) if (field == 'blindcode'): answer_value = answer_value + '-' + answer_value_2[:3] else: if len(answer_value_2) > len(answer_value): answer_value = answer_value_2 setattr(sl, field, answer_value) # answer mapping else: setattr(sl, field, {map[1]: answer_value}) attr = getattr(sl, field) for ix in range(1, len(maps)): map = maps[ix] answer_value = survey_s[map[2]] answer_value = survey.answer_value_to_string(answer_value) answer_value = survey.answer_value_encode( map[1], answer_value) attr[map[1]] = answer_value #attr.append({map[1]: answer_value}) data = elastic.convert_for_bulk(sl, 'update') bulk_data.append(data) count = count + 1 if count > 100: bulk(models.client, actions=bulk_data, stats_only=True) total_count = total_count + count print("crawl_survey: written another batch, total written {0:d}". format(total_count)) bulk_data = [] count = 1 #break bulk(models.client, actions=bulk_data, stats_only=True) pass