def release_dbcfg(cfg, from_path, to_path):
    tic = time.time()
    log.info("\nrelease_dbcfg:-----------------------------")
    timestamp = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())

    base_from_path = common.getBasePath(from_path)
    log.info("base_from_path: {}".format(base_from_path))

    base_to_path = common.getBasePath(to_path)
    log.info("base_to_path: {}".format(base_to_path))

    dbcfg = create_dbcfg(cfg, base_from_path)
    dbcfg['TIMESTAMP'] = timestamp

    filepath = save_dbcfg(base_to_path, dbcfg)
    log.info('\nDone (t={:0.2f}s)\n'.format(time.time() - tic))

    return filepath
示例#2
0
def remove_ann(json_file):

    log.info("json_file : {}".format(json_file))
    new_json = []
    ## annotations below this points will be removed
    dist = 30

    timestamp = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())

    with open(json_file, 'r') as file:
        json_lines = file.readlines()
        res_lane = []

        for line_index, val in tqdm.tqdm(enumerate(json_lines),
                                         total=len(json_lines)):

            json_line = json_lines[line_index]
            sample = json.loads(json_line)
            lanes = sample['lanes']

            # Number of lanes
            for lane in lanes:
                count = 0
                for lane_id in lane:
                    if lane_id == -2:
                        continue
                    else:
                        count = count + 1
                    if count > dist:
                        new_json.append(sample)
                        break
                break

    json_basepath = common.getBasePath(json_file)
    json_name = json_file.split('/')[-1]
    new_json_name = json_name.split('.')[0]
    with open(
            json_basepath + '/' + new_json_name + '-filtered-' + timestamp +
            '.json', 'w') as outfile:
        for items in new_json:
            # log.info("items : {}".format(items))
            json.dump(items, outfile)
            outfile.write('\n')
def get_metadata(from_path, task, subset, year):
    metadata = {
        "annotation_file": None,
        "annotation_filepath": None,
        "image_dir": None,
        "task": None,
        "year": None,
        "base_from_path": None
    }
    base_from_path = common.getBasePath(from_path)
    log.info("base_from_path: {}".format(base_from_path))

    ## TODO: fix the subset issue
    if task == "panoptic":
        annotation_file = "{}_{}{}.json".format(task + "_instances", subset,
                                                year)
        subset = task + "_" + subset
    else:
        annotation_file = "{}_{}{}.json".format(task, subset, year)

    log.info("annotation_file: {}".format(annotation_file))
    annotation_filepath = os.path.join(base_from_path, annotation_file)
    log.info("annotation_filepath: {}".format(annotation_filepath))
    if not os.path.exists(annotation_filepath):
        raise Exception(
            "File: {} does not exists!".format(annotation_filepath))

    if subset == "minival" or subset == "valminusminival":
        subset = "val"

    image_dir = "{}/{}{}".format(base_from_path, subset, year)
    log.info("image_dir: {}".format(image_dir))

    metadata['task'] = task
    metadata['year'] = year
    metadata['base_from_path'] = base_from_path
    metadata['annotation_file'] = annotation_file
    metadata['annotation_filepath'] = annotation_filepath
    metadata['image_dir'] = image_dir

    return metadata
示例#4
0
def via_to_tusimple(json_file):
    with open(json_file, 'r') as file:
        new_json = []
        timestamp = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())
        via = json.load(file)
        for line_index, val in enumerate(via.values()):
            tusimple = {"lanes": []}
            lanes = []
            rawfile = val['filename']
            r = val["regions"]
            resize = 1.5
            for j in r:
                lane = []
                x_axis = j["shape_attributes"]["all_points_x"]
                # print("x_axis : {}".format(x_axis))
                resized_x_axis = [int(x / resize) for x in x_axis]
                # print("resized_x_axis : {}".format(resized_x_axis))
                y_axis = j["shape_attributes"]["all_points_y"]
                # print("y_axis : {}".format(y_axis))
                resized_y_axis = [int(y / resize) for y in y_axis]
                # print("resized_y_axis : {}".format(resized_y_axis))
                lane.append(resized_x_axis)
                lane.append(resized_y_axis)
                lanes.append(lane)
            tusimple["lanes"] = lanes
            tusimple["raw_file"] = rawfile
            new_json.append(tusimple)

    json_basepath = common.getBasePath(json_file)
    json_name = json_file.split('/')[-1]
    new_json_name = json_name.split('.')[0]
    with open(json_basepath + '/' + new_json_name + '-' + timestamp + '.json',
              'w') as outfile:
        for items in new_json:
            json.dump(items, outfile)
            outfile.write('\n')
    print("Done!!")
    print("Saved in path -> {}".format(json_basepath + '/' + new_json_name +
                                       '-' + timestamp + '.json'))
示例#5
0
def copy_images(json_file):
  assert os.path.exists(json_file),'{:s} not exists'.format(json_file)
  print("json_file : {}".format(json_file))
  base_path = getBasePath(json_file)
  # base_path = os.path.join(os.path.dirname(json_file),'')
  print("base_path : {}".format(base_path))
  path = os.path.join(base_path,'test_images')
  print("path : {}".format(path))
  mkdir_p(path)
  with open(json_file,'r') as json_file:
    json_lines = json_file.readlines()
    images = []
    for line_index,val in enumerate(json_lines):
      # print(line_index)
      json_line = json_lines[line_index]
      sample = json.loads(json_line)
      lanes = sample['lanes']
      image = sample['raw_file']
      images.append(image)        
    print(len(images))
    for im in images:
      # print(im)
      shutil.copy(im,path)
示例#6
0
def via_to_tusimple(json_file):
  with open(json_file, 'r') as file:
    new_json = []
    timestamp = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())
    json_lines = file.readlines()
    line_index = 0

    # Iterate over each image
    while line_index < len(json_lines):
      json_line = json_lines[line_index]
      tusimple = {
        "lanes" : []
      }
      lanes = []
      sample = json.loads(json_line)
      x_axis = sample['x_axis']
      y_axis = sample['y_axis']
      rawfile = sample['image_name']
      for i in range(len(x_axis)):
        lane = []
        lane.append(x_axis[i])
        lane.append(y_axis[i])
        lanes.append(lane)
      tusimple["lanes"] = lanes
      tusimple["raw_file"] = rawfile
      new_json.append(tusimple)   
      line_index += 1     

  json_basepath = common.getBasePath(json_file)
  json_name = json_file.split('/')[-1]
  new_json_name = json_name.split('.')[0]
  with open(json_basepath+'/'+new_json_name+'-'+timestamp+'.json','w') as outfile:
    for items in new_json:
      json.dump(items, outfile)
      outfile.write('\n')
  print("Done!!")
  print("Saved in path -> {}".format(json_basepath+'/'+new_json_name+'-'+timestamp+'.json'))
def split_train_test(json_file):
    assert ops.exists(json_file), '{:s} not exist'.format(json_file)

    base_path = common.getBasePath(json_file)
    print("base_path : {}".format(json_file))

    train_dir = ops.join(base_path, "train")
    test_dir = ops.join(base_path, "test")
    print("train_dir : {}".format(train_dir))
    print("test_dir : {}".format(test_dir))
    # os.makedirs(train_dir, exist_ok=True)
    # os.makedirs(test_dir, exist_ok=True)

    res_lanes = {
        '0_lanes': 0,
        '1_lanes': 0,
        '2_lanes': 0,
        '3_lanes': 0,
        '4_lanes': 0,
        '5_lanes': 0,
        '6_lanes': 0
    }
    no_of_lanes = {
        "zero_lane": [],
        "one_lane": [],
        "two_lane": [],
        "three_lane": [],
        "four_lane": [],
        "five_lane": [],
        "six_lane": []
    }
    train = []
    test = []

    lines = []
    with open(json_file, 'r') as file:
        json_lines = file.readlines()

        # Iterate over each image
        for line_index, val in enumerate(json_lines):
            json_line = json_lines[line_index]
            sample = json.loads(json_line)
            # image_name = sample['raw_file']
            lanes = sample['lanes']
            res_lane = []

            for lane in lanes:
                lane_id_found = False
                for lane_id in lane:
                    if lane_id == -2:
                        continue
                    else:
                        lane_id_found = True
                        break
                if lane_id_found:
                    res_lane.append(lane)
            if len(res_lane) == 0:
                # res_lanes['0_lanes']=res_lanes['0_lanes']+1
                no_of_lanes["zero_lane"].append(json_line)
                res_lanes['0_lanes'] = len(no_of_lanes["zero_lane"])
            elif len(res_lane) == 1:
                no_of_lanes["one_lane"].append(json_line)
                res_lanes['1_lanes'] = len(no_of_lanes["one_lane"])
            elif len(res_lane) == 2:
                # res_lanes['2_lanes']=res_lanes['2_lanes']+1
                no_of_lanes["two_lane"].append(json_line)
                res_lanes['2_lanes'] = len(no_of_lanes["two_lane"])
            elif len(res_lane) == 3:
                # res_lanes['3_lanes']=res_lanes['3_lanes']+1
                no_of_lanes["three_lane"].append(json_line)
                res_lanes['3_lanes'] = len(no_of_lanes["three_lane"])
            elif len(res_lane) == 4:
                # res_lanes['4_lanes']=res_lanes['4_lanes']+1
                no_of_lanes["four_lane"].append(json_line)
                res_lanes['4_lanes'] = len(no_of_lanes["four_lane"])
            elif len(res_lane) == 5:
                # res_lanes['5_lanes']=res_lanes['5_lanes']+1
                no_of_lanes["five_lane"].append(json_line)
                res_lanes['5_lanes'] = len(no_of_lanes["five_lane"])
            elif len(res_lane) == 6:
                # res_lanes['6_lanes']=res_lanes['6_lanes']+1
                no_of_lanes["six_lane"].append(json_line)
                res_lanes['6_lanes'] = len(no_of_lanes["six_lane"])

    print(res_lanes)

    for i, k in enumerate(no_of_lanes.keys()):
        key = no_of_lanes[k]
        random.shuffle(key)
        tmp_train = []
        tmp_test = []
        split_size = int(0.85 * len(key))
        # print("split_size :{}".format(split_size))
        if split_size > 0:
            tmp_train = key[:split_size]
            tmp_test = key[split_size:]
            train.append(tmp_train)
            test.append(tmp_test)
    train = [item for sublist in train for item in sublist]
    print('train : {}'.format(len(train)))
    test = [item for sublist in test for item in sublist]
    print('test : {}'.format(len(test)))

    with open(train_dir + "-tusimple" + ".json", 'w') as outfile:
        for item1 in train:
            outfile.write(item1)

    with open(test_dir + "-tusimple" + ".json", 'w') as outfile:
        for item2 in test:
            outfile.write(item2)
示例#8
0
def merge_ann(cfg, from_path, to_path, move_file=False):
  """copy annotation data from multiple folders to a single destination folder
  """
  tic = time.time()
  log.info("\nrelease_anndb:-----------------------------")
  timestamp = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  base_to_path = common.getBasePath(to_path)
  log.info("base_to_path: {}".format(base_to_path))

  ## Get only top level directories
  ## Ref:
  ## https://stackoverflow.com/questions/141291/how-to-list-only-top-level-directories-in-python
  ## https://stackoverflow.com/questions/4568580/python-glob-multiple-filetypes
  aijobs = next(os.walk(base_from_path))[1]
  aijobs_path = [os.path.join(base_from_path,x) for x in aijobs]
  exts = appcfg['ALLOWED_IMAGE_TYPE']
  files_to_copy = {x:{
    'annotations':glob.glob(os.path.join(base_from_path, x, 'annotations', '*.json'))
    # ,'images': [item for sublist in [glob.glob(os.path.join(base_from_path, x, 'images') + '/*/*'+ext) for ext in exts  for x in aijobs ] for item in sublist]
  } for x in aijobs}
  
  images_annotated = {
    'files':[]
    ,'unique':set()
    ,'not_found':set()
  }

  stats = {}
  groups = []

  IMAGE_API = cfg['IMAGE_API']
  USE_IMAGE_API = IMAGE_API['ENABLE']
  SAVE_LOCAL_COPY = True
  # NO_OF_ANNON_FILES_THRESHOLD = 5
  
  for i, x in enumerate(files_to_copy):
    log.info("\n[{}]x:-----------------------------{}".format(i,x))
    for y in files_to_copy[x]:
      log.info("y:-------{}".format(y))
      filepaths = files_to_copy[x][y]
      if y not in stats:
        stats[y] = {'count':0, 'unique':set(), 'total':0 }
        groups.append(y)

      stats[y]['total'] += len(filepaths)
      for j, src_filepath in enumerate(filepaths):
        index = -1 if y=='annotations' else -2

        filename = os.path.basename(src_filepath)
        
        ## if annotations, read it fetch images from it
        if y == 'annotations':
          with open(src_filepath,'r') as fr:
            ref = annonutils.parse_annon_filename(src_filepath)
            annotations = json.load(fr)
            annon_file_name = {}
            for ak,av in annotations.items():
              # imgpath, base_path_img = annonutils.getImgPath(base_from_path, ref['image_dir'])
              base_path_img = os.path.join(base_to_path, 'images', ref['image_dir'])
              filepath_img = os.path.join(base_path_img, av['filename'])
              if av['filename'] not in annon_file_name:
                annon_file_name[av['filename']] = {
                  'annotations': []
                  ,'imagename': av['filename']
                  ,'metadata': {}
                  ,'image_dir': ref['image_dir']
                }
              annon_file_name[av['filename']]['annotations'].append(av['regions'])

              if USE_IMAGE_API:
                get_img_from_url_success = annonutils.get_image_from_url(IMAGE_API, av['filename'], base_path_img, save_local_copy=SAVE_LOCAL_COPY)
                if get_img_from_url_success:
                  images_annotated['files'].append(av['filename'])
                  images_annotated['unique'].add(av['filename'])
                else:
                  images_annotated['not_found'].add(av['filename'])

        basedir = os.path.sep.join(os.path.dirname(src_filepath).split(os.path.sep)[index:])
        dst_to_basedir = os.path.join(base_to_path, basedir)

        stats[y]['unique'].add(filename)
        stats[y]['count'] += 1
        log.info("stats[y]['count']:{}, [x:j]:[{}:{}]: Exists: {} \n src_filepath: {}".format(stats[y]['count'], x, j, os.path.exists(src_filepath), src_filepath))
        log.info("basedir: {}".format(basedir))
        log.info("dst_to_basedir: {}".format(dst_to_basedir))

        # ## Ref: https://www.pythoncentral.io/how-to-copy-a-file-in-python-with-shutil/
        common.mkdir_p(dst_to_basedir)
        shutil.copy2(src_filepath, dst_to_basedir)

  for g in groups:
    stats[g]['unique'] = len(stats[g]['unique'])
  
  
  stats['images_annotated'] = {
    'files': len(images_annotated['files'])
    ,'unique': len(images_annotated['unique'])
    ,'not_found': len(images_annotated['not_found'])
  }
  log.info("\nstats: {}".format(stats))
  log.info('\nDone (t={:0.2f}s)\n'.format(time.time()- tic))
示例#9
0
def release_db(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))

  dbname = None
  if 'to_path' in args and not os.path.exists(args.to_path):
    dbname = args.to_path

  from_path = args.from_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  uuid_rel = common.createUUID('rel')

  timestamp = cfg['RELEASE']['COLS']['timestamp'] = cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']
  cfg['RELEASE']['COLS']['rel_id'] = cfg['LOG']['COLS']['rel_id'] = uuid_rel

  cfg['SAVE_TO_FILE'] = False

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path, cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  total_files = len(files)

  log.info("-------")
  log.debug("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(total_files))

  total_annon_file_processed = 0
  total_annon_file_existed = 0

  DBCFG = cfg['DBCFG']
  ANNONCFG = DBCFG['ANNONCFG']
  mclient = MongoClient('mongodb://'+ANNONCFG['host']+':'+str(ANNONCFG['port']))
  dbname = ANNONCFG['dbname'] if not dbname else dbname
  log.info("dbname: {}".format(dbname))
  db = mclient[dbname]

  rel_tblname = annonutils.get_tblname('RELEASE')
  annonutils.create_unique_index(db, rel_tblname, 'rel_id')
  rel_collection = db.get_collection(rel_tblname)

  log_tblname = annonutils.get_tblname('LOG')
  annonutils.create_unique_index(db, log_tblname, 'created_on')
  log_collection = db.get_collection(log_tblname)

  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## check if the file is parsed: skip the processing in normal mode of the already parsed file
    # res = log_collection.find_one({'rel_filename': annon_filename})
    res = log_collection.find_one({'rel_filepath': annon_filepath})
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on  = common.now()
      cfg['RELEASE']['COLS']['created_on'] = cfg['LOG']['COLS']['created_on'] = created_on
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filepath'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      save_parsed_data(cfg, annondata, db=db)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      cfg['LOG']['COLS']['total_exec_time'] = '{:0.2f}s'.format(toc2 - tic)

      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 
      log_collection.update_one(
        {'created_on': created_on}
        ,{'$setOnInsert': cfg['LOG']['COLS']}
        ,upsert=True
      )

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, total_files - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")
      total_annon_file_existed += 1


  cfg['RELEASE']['COLS']['total_annon_file_processed'] = total_annon_file_processed
  # cfg['RELEASE']['COLS']['total_exec_time'] = '{:0.2f}s'.format(time.time() - tic)
  cfg['RELEASE']['COLS']['total_exec_time_in_sec'] = '{:0.2f}'.format(time.time() - tic)

  if total_annon_file_processed:
    rel_collection.update_one(
      {'rel_id': uuid_rel}
      ,{'$setOnInsert': cfg['RELEASE']['COLS']}
      ,upsert=True
    )

  log.info("total_files, total_annon_file_processed, total_annon_file_existed: {} = {} + {}".format(total_files, total_annon_file_processed, total_annon_file_existed))

  mclient.close()

  return timestamp
示例#10
0
def release_files(cfg, args):
  """Entry point to parse VIA based annotations for creating and saving basic data structures - IMAGES, ANNOTATIONS, LABELS and related data
  Implements the DRC - Design Rule Checks and acts as a gatekeeper, also reports any possible errors
  Create data structures to be parsed in 2nd pass to create the AIDS - AI Datasets with the actual splits 

  Test Cases:
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/images-p1-230119_AT1_via205_250119.json
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  ## /some/path/AIML_Annotation/ods_job_230119/annotations/
  """

  ## Check required args
  for d in ['from_path', 'to_path']:
    if d not in args:
      log.info("'{}' is not present.\n".format(d))
      sys.exit(-1)
  if not os.path.exists(args.from_path):
    raise NotADirectoryError("{}".format(args.from_path))
  if not os.path.exists(args.to_path):
    raise NotADirectoryError("{}".format(args.to_path))

  from_path, to_path = args.from_path, args.to_path

  tic = time.time()
  log.info("\nrelease_db:-----------------------------")
  cfg['TIMESTAMP'] = ("{:%d%m%y_%H%M%S}").format(datetime.datetime.now())

  base_from_path = common.getBasePath(from_path)
  log.info("base_from_path: {}".format(base_from_path))

  base_to_path = common.getBasePath(to_path)
  log.info("base_to_path: {}".format(base_to_path))

  cfg['LOG']['COLS']['timestamp'] = cfg['TIMESTAMP']

  ## Create Base Directories
  db_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['DB'])
  log.info("db_dir: {}".format(db_dir))
  common.mkdir_p(db_dir)

  db_data_dir = os.path.join(db_dir, cfg['TIMESTAMP'])
  log.info("ANNDB db_data_dir: {}".format(db_data_dir))
  common.mkdir_p(db_data_dir)

  rel_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['RELEASE'], cfg['TIMESTAMP'])
  log.info("rel_dir: {}".format(rel_dir))
  common.mkdir_p(rel_dir)

  log_dir = os.path.join(base_to_path, cfg['BASEDIR_NAME']['LOG'])
  log.info("log_dir: {}".format(log_dir))
  common.mkdir_p(log_dir)

  ant_data_dir = os.path.join(db_data_dir,cfg["BASEDIR_NAME"]["ANNON"])
  log.info("ant_data_dir: {}".format(ant_data_dir))
  common.mkdir_p(ant_data_dir)

  cfg['BASE_PATH']['DB_DIR'] = db_dir
  cfg['BASE_PATH']['DB_DATA_DIR'] = db_data_dir
  cfg['BASE_PATH']['RELEASE_DIR'] = rel_dir
  cfg['BASE_PATH']['LOG_DIR'] = log_dir
  cfg['BASE_PATH']['ANT_DATA_DIR'] = ant_data_dir

  log.info("-------")
  log.info("cfg: {}".format(cfg))

  if os.path.isdir(from_path):
    ## normalizes and takes care of path ending with slash or not as the user input
    files = glob.glob(os.path.join(base_from_path,cfg['ANNON_FILENAME_PREFIX']+'*.json'))
  else:
    files = [from_path]

  log.info("-------")
  log.info("\nfiles: {}".format(files))
  log.info("-------")
  log.info("\nTotal files to process =======>: {}".format(len(files)))

  total_annon_file_processed = 0

  log_tblname = annonutils.get_tblname('LOG')
  for annon_filepath in files:
    log.info("-------")
    tic2 = time.time()
    annon_filename = os.path.basename(annon_filepath)

    ## TODO: check if the file is parsed: skip the processing in normal mode of the already parsed file
    res = False
    
    ## TODO: in update mode
    ## delete the entries of annotations and images before inserting the values of the same file again 
    if not res:
      log.info(" annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))

      created_on = cfg['LOG']['COLS']['created_on'] = common.now()
      log.info("created_on: {}".format(created_on))

      cfg['LOG']['COLS']['rel_filename'] = annon_filename
      cfg['LOG']['COLS']['rel_filename'] = annon_filepath
      annondata = annon_parser.parse_annon_file(cfg, annon_filepath, base_from_path)
      total_annon_file_processed += 1

      # ## if the annon_filepath is absolute path, base_bast gets ignored and thus the dst_dir is the file's directory
      ## dst_dir= os.path.join(base_from_path,os.path.splitext(annon_filepath)[0])

      ## log.info("annon_filepath: {}".format(annon_filepath))
      ## dst_dir = os.path.join(db_dir,os.path.splitext(annon_filepath)[0])

      ## dst_dir = os.path.join(db_dir,os.path.splitext(annon_filepath)[0])

      dst_dir = os.path.join(rel_dir, os.path.splitext(annon_filename)[0])
      ## log.info("dst_dir: {}".format(dst_dir))
      common.mkdir_p(dst_dir)
      save_parsed_data(cfg, annondata, dst_dir=dst_dir, ant_data_dir=ant_data_dir, annon_filepath=annon_filepath)

      cfg['LOG']['COLS']['modified_on'] = None

      toc2 = time.time()
      total_exec_time = '{:0.2f}s'.format(toc2 - tic)
      cfg['LOG']['COLS']['total_exec_time'] = total_exec_time

      ##TODO:
      ## if exception occurs or terminate, save what has been processed so for in the log instead of one-shot update of log out of for loop
      ## this helps to recover from the abrupt termination and start from previous successfuly processed file 

      log.info("=======> Total Execution Time: {:0.2f}s, Processed files: {}, Remaning files: {}".format(toc2 - tic2, total_annon_file_processed, len(files) - total_annon_file_processed))

      ## Update the LOG table here itself
    else:
      log.info("Already Exist in Database: annon_filename: {} \n annon_filepath: {}".format(annon_filename, annon_filepath))
      log.info("Use update / delete command to process this file again")

  ## Every execution of the script is a release
  ## For every release, recreate the entire database either for directory or specific file release
  
  ## create and save db data i.e. consolidated data with index structure
  db_data = create_db_data(cfg, rel_dir)

  save_db_data(cfg, db_dir, db_data)

  log.info("total_annon_file_processed: {}".format(total_annon_file_processed))

  return db_data_dir