Exemplo n.º 1
0
def extract_semi_structured_addresses(address_file, segmentation_input_file, output_file):
  segment_data = segment.load_data_from_pickle_file(segmentation_input_file)
  segment.set_segmentation_data(segment_data, factor = 100)
  address_list = segment.read_pickle_for_segmentation_file(address_file)
  address_list = map(semi_structured_address, address_list)
  with open(output_file,'wb') as f:
    pickle.dump(address_list, f) 
Exemplo n.º 2
0
def extract_semi_structured_addresses(address_file, segmentation_input_file,
                                      output_file):
    segment_data = segment.load_data_from_pickle_file(segmentation_input_file)
    segment.set_segmentation_data(segment_data, factor=100)
    address_list = segment.read_pickle_for_segmentation_file(address_file)
    address_list = map(semi_structured_address, address_list)
    with open(output_file, 'wb') as f:
        pickle.dump(address_list, f)
Exemplo n.º 3
0
def create_data_for_processing(feeder_number):
    extract_data_for_feeder(
        ALL_FEEDERS_LIST.index(feeder_number), ADDRESSES_FROM_SINGLE_FEEDER
    ) if not os.path.isfile(ADDRESSES_FROM_SINGLE_FEEDER) else None
    addresses = segment.read_pickle_for_segmentation_file(
        ADDRESSES_FROM_SINGLE_FEEDER)
    segment.extract_segmentation_file_from_text(
        addresses, DATA_FILE_FOR_WORD_SEGMENTATION, 1,
        8) if not os.path.isfile(DATA_FILE_FOR_WORD_SEGMENTATION) else None
    segment.extract_segmentation_file_from_text(
        addresses, DATA_FILE_FOR_META_SEGMENTATION, 1, 8, metaphone=True
    ) if not os.path.isfile(DATA_FILE_FOR_META_SEGMENTATION) else None
    extract_semi_structured_addresses(
        ADDRESSES_FROM_SINGLE_FEEDER, DATA_FILE_FOR_WORD_SEGMENTATION,
        SEMI_STRUCTURED_ADDRESS_LIST
    ) if not os.path.isfile(SEMI_STRUCTURED_ADDRESS_LIST) else None
Exemplo n.º 4
0
def create_data_for_processing(feeder_number):
  extract_data_for_feeder(ALL_FEEDERS_LIST.index(feeder_number), ADDRESSES_FROM_SINGLE_FEEDER) if not os.path.isfile(ADDRESSES_FROM_SINGLE_FEEDER) else None
  addresses = segment.read_pickle_for_segmentation_file(ADDRESSES_FROM_SINGLE_FEEDER) 
  segment.extract_segmentation_file_from_text(addresses, DATA_FILE_FOR_WORD_SEGMENTATION, 1, 8) if not os.path.isfile(DATA_FILE_FOR_WORD_SEGMENTATION) else None
  segment.extract_segmentation_file_from_text(addresses, DATA_FILE_FOR_META_SEGMENTATION, 1, 8, metaphone = True) if not os.path.isfile(DATA_FILE_FOR_META_SEGMENTATION) else None
  extract_semi_structured_addresses(ADDRESSES_FROM_SINGLE_FEEDER, DATA_FILE_FOR_WORD_SEGMENTATION, SEMI_STRUCTURED_ADDRESS_LIST) if not os.path.isfile(SEMI_STRUCTURED_ADDRESS_LIST) else None