def categorize_json(self, folder_name): log.info(f"Dividing data between raw_fse and raw_dd.") json_fse = [ folder_name + json for json in os.listdir(folder_name) if json.endswith('.json') and json.startswith('fse_data') ] json_dd = [ folder_name + json for json in os.listdir(folder_name) if json.endswith('.json') and json.startswith('domain_data') ] return json_fse, json_dd
def cleanup_json(json: str) -> str: """Remove codeblocks, if present.""" if json.startswith("```") and json.endswith("```"): # remove ```json and ``` from start and end json = json.strip("```json") json = json.strip("```py") # not documented but want to accept it as well return json.strip("```") elif json.startswith("`") and json.endswith("`"): # inline codeblocks return json.strip("`") return json
def convert_annotation_offline(result_dir): """ :param result_dir: 标注结果保存路径 :return: 老相机结果, 新相机结果, 图片被标注次数 """ total_ann = defaultdict(list) old_cam_ann, new_cam_ann = defaultdict(list), defaultdict(list) num = 0 for id in os.listdir(result_dir): for camera in ['Old_Camera', 'New_Camera']: camera_path = os.path.join(result_dir, id, camera) for video in os.listdir(camera_path): if os.path.isdir(os.path.join(camera_path, video)): for json in os.listdir(os.path.join(camera_path, video)): if json.endswith('.json'): ann = read_json( os.path.join( os.path.join(camera_path, video, json))) old_cam_ann[json].append( ann ) if 'old' in json else new_cam_ann[json].append( ann) num += 1 return old_cam_ann, new_cam_ann, num
def strip_json(json): json = json.strip() if not json.startswith("{"): json = json[json.index("{"):] if not json.endswith("}"): json = json[:json.rindex("}") + 1] return json
def strip_json(json): json = json.strip() if not json.startswith("{"): json = json[json.index("{") :] if not json.endswith("}"): json = json[: json.rindex("}") + 1] return json
def loadFromFile(): print("Loading tweets from file...") tweets = [] try: jsonFiles = [ json for json in os.listdir("data/" + collectionName) if json.endswith('.json') ] except: print( "Cannot load tweets from specified path. No directory called: " + collectionName) return for file in jsonFiles: with open("data/" + collectionName + "/" + file) as tweet: tweets.append(json.load(tweet)) print("Done. Tweets correctly loaded") return tweets
def main(): # 1. TAKING CARE OF STOPWORDS AND NUMBER OF ARGUMENTS if len(terms_normal) > 5: print("Hey! You have used more than 5 terms, which is not allowed.") sys.exit() else: counter = 0 stop_words = list(stopwords.words('english')) for term in terms_normal: if term in stop_words: print( "Error: You have used a stop-word in the terms, which is not allowed. Try another word." ) sys.exit() #2.1 HANDLING THE SPEECHES path = args.path try: speeches_jsons = [ json for json in os.listdir(path) if json.endswith('.json') ] except: print("Path not found.") sys.exit() speeches_df = pd.DataFrame(columns=['Date', 'Score', 'Term']) corpus, combined_jsons = [], [] for text in speeches_jsons: with open(os.path.join(speeches_path, text)) as element: json_text = json.load(element) combined_jsons.append(json_text) corpus.append(json_text["Speech"].lower()) #2.2 CREATING VECTORS vectorizer = TfidfVectorizer(ngram_range=(1, 3), stop_words="english") vectors = vectorizer.fit_transform(corpus) vocab = vectorizer.vocabulary_ #2.3 CREATING DATAFRAMES for term in terms_normal: df_term = pd.DataFrame(columns=['Date', 'Score', 'Term']) for index, speech_item in enumerate(combined_jsons): date = pd.to_datetime(speech_item['Date'], yearfirst=True) speech = speech_item['Speech'].lower() if term in speech: column = vocab[term] score = vectors[index, column] else: score = float(0) df_term.loc[index] = [date, score, term] speeches_df = speeches_df.append(df_term) # 3. MAKING CHARTS sns.set_theme() plot = sns.relplot(data=speeches_df, x="Date", y="Score", kind="line", hue="Term", legend="auto") plt.title(args.title) plot.savefig(args.output) plt.show()
def import_jsons(json_folder_path): json_files = [ json for json in os.listdir(json_folder_path) if json.endswith('.json') ] return json_files
def read_jsons(folder): jsons = [json for json in os.listdir(folder) if json.endswith(".json")] return jsons