def __init__(self, task, pov_replace_unk): import nltk from nltk.translate.bleu_score import SmoothingFunction self._smooth_fn = SmoothingFunction().method4 nltk.download('wordnet', quiet=True) self.pov_replace_unk = pov_replace_unk dataset_name = PurePosixPath(task.args.data.split(':')[0]).name if dataset_name.startswith('java'): import javalang self._javalang = javalang self._parse_method = self._parse_java elif dataset_name.startswith('python'): self._javalang = None self._parse_method = self._parse_python # To fix the bug of original dataset (a temporarily solution). self._fix_python_orig_bug = dataset_name.endswith('orig') else: raise RuntimeError( 'code generation scorer requires only support Java and Python now, ' 'dataset name must be started with "java" or "python"') self.reset()
def parseBed2Json(bedfile): # test if the file exitst try: # try resolve path and get the absolute path my_abs_path_file = Path(bedfile).resolve() ###### Open bed file and parse to json ##### jsonData = [] # open bed file infile = open(bedfile, 'r').readlines() # read header header = infile.pop(0).strip('\n').split("\t") for line in infile: # get row and convert to array data = line.strip('\n').split("\t") # test if the row is complete and have the same size as the header if len(data) == len(header): datadict = {} # iterate over my bedfile row for i, element in enumerate(data): datadict[header[i]] = element # push into array the dictionary structure jsonData.append(datadict) ######## write into file the json structure # bed filename (without path) filename = PurePosixPath(my_abs_path_file).name # json file name jsonFile = filename + ".json" # test if file ends with .bed if filename.endswith('.bed'): jsonFile = filename.split(".bed")[0] + ".json" with open(jsonFile, 'w') as outfile: json.dump(jsonData, outfile, indent=4) outfile.close() except FileNotFoundError: print("your file does not exist")