def social_reconstruction(keyword_list_file, relations_file): keyword_list = csv_io.read_csv(keyword_list_file) relations = json_io.read_json(relations_file) relation_graph = {'nodes': [], 'links': []} node_index = {} index = 0 for keyword in keyword_list: if keyword not in node_index: relation_graph['nodes'].append({'name': keyword, 'group': index, 'ID': index}) node_index[keyword] = index index += 1 for name, relation in relations.iteritems(): #total = sum(relation.values()) for person in relation: #if total != 0 and (float(relation[person]) / total > (1.0/len(relation)) - 0.03 ): relation_graph['links'].append({'source': node_index[name], 'target': node_index[person], 'value': relation[person], 'label': person }) relation_graph['links'].append({'source': node_index[person], 'target': node_index[name], 'value': relation[person], 'label': name }) print relation_graph json_io.write_json('output/result/relation_graph.json', relation_graph)
def face_recongnition(position_merge_file): frame_list = json_io.read_json(position_merge_file) # Read face image for frame in frame_list: img_name = frame_list[frame]['keyword'].encode('utf8') + str(frame_list[frame]['frame_position']) + '.jpg' frame_list[frame]['img'] = cv2.imread( OUTPUT_PATH + "img/" + img_name , 0) # transforamt to keyword as key keyword_list = {} for frame in frame_list: keyword = frame_list[frame]['keyword'] face_id = frame_list[frame]['face_id'] if keyword not in keyword_list: keyword_list[keyword] = {} if face_id not in keyword_list[keyword]: keyword_list[keyword][face_id] = [] keyword_list[keyword][face_id].append(frame_list[frame]) for keyword, frame_list in keyword_list.iteritems(): print keyword for frame in frame_list: for face in frame_list[frame]: print face['ID'], print global detector global matcher detector, matcher = cv_face.init_feature('orb') threadLock = threading.Lock() thread_count = 0 threads = [] match_rate = {} for keyword, frame_list in keyword_list.iteritems(): thread = Pthread(thread_count, 'Thread-'+str(thread_count), frame_list, threadLock) thread.start() threads.append(thread) thread_count += 1 # wait all threads complete for thread in threads: thread.join() for keyword, frame_list in keyword_list.iteritems(): print keyword for frame in frame_list: for face in frame_list[frame]: if 'img' in face: del face['img'] print face['ID'], print json_io.write_json('output/face_recongnition.json', keyword_list)
def build_bipartite_graph(keyword_dic_file): keyword_dic = json_io.read_json(keyword_dic_file) keyword_dic = weight_normalize(keyword_dic) pair_bipartite_graph = to_pair(keyword_dic) json_io.write_json(OUTPUT_PATH + 'pair_graph.json', pair_bipartite_graph) json_io.write_json(OUTPUT_PATH + 'single_graph.json', keyword_dic)
def relationship_minig(min_votes, iter_stop): single_graph_file ='scripts/output/single_graph.json' pair_graph_file = 'scripts/output/pair_graph.json' social_graph_file = 'scripts/output/single_graph.json' dir_file = 'scripts/input/dir_rel.json' clip_file = 'scripts/input/clip_rel.json' single_graph = json_io.read_json(single_graph_file) pair_graph = json_io.read_json(pair_graph_file) bi_graph, social_graph = graph_init(single_graph, pair_graph, social_graph_file, dir_file, clip_file) output_graph = {'nodes':[], "links":[]} node_dic = {} change = True itr = 0 # iterator algorithm1 while change: role_pair, dominant_keyword, votes = bi_graph.dominant_pair() if role_pair is None: break source, target, dir_prob = bi_graph.get_direction(role_pair, dominant_keyword) valid_tag = valid_checking(social_graph, source, target, dominant_keyword) if source not in node_dic: node_dic[source] = len(node_dic) output_graph['nodes'].append({"group": node_dic[source], "name": source, "ID": node_dic[source]}) if target not in node_dic: node_dic[target] = len(node_dic) output_graph['nodes'].append({"group": node_dic[target], "name": target, "ID": node_dic[target]}) if valid_tag != False and votes >= int(min_votes): if type(valid_tag) != unicode: print source, '-->', dominant_keyword, '-->', target add_links(output_graph, source, target, dominant_keyword, votes, node_dic) social_graph.relationship_tagging(source, target, dominant_keyword, votes) else: print source, '-->', valid_tag, '-->', target add_links(output_graph, source, target, valid_tag, votes, node_dic) social_graph.relationship_tagging(source, target, valid_tag, votes) print votes, dir_prob bi_graph.update_weighting(role_pair, dominant_keyword) if valid_tag: bi_graph.remove_keyword(role_pair, dominant_keyword) else: bi_graph.remove_edges(role_pair, dominant_keyword) itr += 1 if itr == int(iter_stop): break json_io.write_json('result/social_graph.json', output_graph) social_graph.clear() social_graph.shutdown()
def movie_prosessing(movie_file, two_entity_file, search_result_file): two_entity_set = json_io.read_json(two_entity_file) keyword_search_result = csv_io.read_csv(search_result_file) # load video videoInput = cv2.VideoCapture(movie_file) # crate a start_frame to end_frame dictionary for two_entity_set look up start_end = {} for row in keyword_search_result: start_frame, end_frame = time_format.to_frame(row) while start_frame in start_end: start_frame = start_frame + 0.001 while end_frame in start_end: end_frame = end_frame + 0.001 start_end[start_frame] = end_frame frame = {} face_count = 0 for keyword in two_entity_set: for start_frame in two_entity_set[keyword]: frame_position = int(start_frame) - 24 * 10 finish_frame = start_end[start_frame] + 24 * 10 while frame_position <= finish_frame: print keyword videoInput.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, frame_position) flag, img = videoInput.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = cv2.equalizeHist(gray) face_position_list, rects = cv_image.face_detect(gray, frame_position, (85, 85)) #face_position_list, rects = faceDetection(gray, frame_position) if 0xFF & cv2.waitKey(5) == 27: cv2.destroyAllWindows() sys.exit(1) if len(face_position_list) == 1: print 'detected' image_name = keyword + str(frame_position) + '.jpg' cv_image.output_image(rects, img, OUTPUT_PATH + '/img/' + image_name) for face_position in face_position_list: face_count += 1 print face_count frame[face_count] = { 'keyword' : keyword, 'face_position': face_position.tolist(), 'ID' : face_count, 'frame_position': frame_position, 'face_id': face_count} frame_position += FRAME_INTERVAL #close video videoInput.release() json_io.write_json(OUTPUT_PATH + 'frame.json', frame)
def tf(terms, output_path): term_tf = {} for term in terms: if term not in term_tf: term_tf[term] = 1.0 else: term_tf[term] += 1.0 length = len(terms) for term, count in term_tf.iteritems(): count = count / length json_io.write_json(output_path + doc + '.json', term_tf)
def tf(terms, output_path): term_tf = {} for term in terms: if term not in term_tf: term_tf[term] = 1.0 else: term_tf[term] += 1.0 length = len(terms) for term, count in term_tf.iteritems(): count = count / length json_io.write_json(output_path+doc+'.json', term_tf)
def position_merge(frame_file): frame = json_io.read_json(frame_file) keys = frame.keys() for i in range(0, len(frame)): for j in range(i+1, len(frame)): if is_near( frame[keys[i]], frame[keys[j]] ): frame[keys[j]]['face_id'] = frame[keys[i]]['face_id'] print 111 json_io.write_json(OUTPUT_PATH + 'merge_position.json', frame)
def init_docs(document_list, doc_foldr='data/'): doc_id = 1 documents = [] doc_hash = {} id_hash = {} for doc in document_list: documents.append(Document(doc_id, doc, doc_foldr)) doc_hash[doc] = doc_id id_hash[doc_id] = doc doc_id += 1 json_io.write_json('output/doc_hash.json', doc_hash) json_io.write_json('output/id_hash.json', id_hash) return documents
def find_relation(keyword_list_file, search_result_file, time_interval): time_to_keyword = csv_io.read_csv(search_result_file) keyword_list = csv_io.read_csv(keyword_list_file) leading_keyword = keyword_list[0] frame_to_keyword = {} for row in time_to_keyword: start_frame, end_frame = time_format.to_frame(row) while start_frame in frame_to_keyword: start_frame = start_frame + 0.001 while end_frame in frame_to_keyword: end_frame = end_frame + 0.001 frame_to_keyword[start_frame] = row[1] # Transfrom to timeline format frame_list = frame_to_keyword.keys() frame_list.sort() relations = {} for i in range(1, len(keyword_list)): relations.update( {keyword_list[i] : count_ralation(keyword_list[i], frame_list, frame_to_keyword, time_interval)} ) count = 0 proper_relation = {} for name, relation in relations.iteritems(): total = sum(relation.values()) proper_relation[name] = {} print name, for person in relation: if proper_test(total, leading_keyword, person, relation): proper_relation[name][person] = relation[person] print person , relation[person], count += 1 print print str(time_interval/(24*60)) + ',' + str(count) json_io.write_json('output/relations.json', proper_relation)
pages_dic[movie_title] = curr_count return pages, graph def get_movie_title(page): soup = BeautifulSoup(page) movie_title = soup.find('h1', class_='header') \ .find('span', class_='itemprop') \ .get_text() return soup, movie_title def http_get(url): return urllib2.urlopen(url).read() if __name__=='__main__': if len(sys.argv) != 2: print >> sys.stderr, "Usage: <file>" exit(-1) f = open(sys.argv[1]) urls = f.readlines() pages, graph = movie_graph(urls) f.close() json_io.write_json('dataset/imdb.json', pages) json_io.write_json('dataset/graph_9.json', graph)
def video_processing(movie_file, role_list_file, search_result_file, role_input_way): # load frame-keyword files keyword_search_result = csv_io.read_csv(search_result_file) role_list = csv_io.read_csv(role_list_file) # load video videoInput = cv2.VideoCapture(movie_file) frame = {} keyword_id = 0 frame_number = 0 for row in keyword_search_result: start_frame, end_frame, keyword = float(row[0]), float(row[1]), row[2] frame_position = round(start_frame) + 24 * BACKWORD_EXPAND_TIME finish_frame = round(end_frame) + 24 * FORWORD_EXPAND_TIME keyword_id += 1 keyword_time = keyword + '_t' + str(keyword_id) while frame_position <= finish_frame: face_position_list, rects, img = frame_caputre( videoInput, frame_position) if len(face_position_list) >= 1: print "detect face..." image_name = OUTPUT_PATH + 'img/' + keyword_time + str( frame_number) #if role_input_way == 0: cv_image.output_image(rects, img, image_name) count = 0 for face_position in face_position_list: if role_input_way == 'auto': break role_name = role_identify( image_name + '-' + str(count) + '.jpg', role_list) else: role_name = role_input(role_list) count += 1 if role_name == -1: continue else: if keyword_time not in frame: print keyword_time, role_name frame[keyword_time] = {} frame[keyword_time][role_name] = { 'keyword': keyword, 'face_position': face_position.tolist(), 'frame_position': frame_position, 'keyword_id': keyword_id, 'weight': 1, 'speaker': True } elif role_name in frame[keyword_time]: frame[keyword_time][role_name]['weight'] += 1 else: frame[keyword_time][role_name] = { 'keyword': keyword, 'face_position': face_position.tolist(), 'frame_position': frame_position, 'keyword_id': keyword_id, 'weight': 1, 'speaker': False } frame_number += 1 frame_position += FRAME_INTERVAL #close video videoInput.release() json_io.write_json(OUTPUT_PATH + 'keywordt_roles.json', frame)
if j > 0: graph[node].append(node - 1) if j < (grid_size): graph[node].append(node + 1) node += 1 return graph def rewire_edge(graph, prob): for node, out_nodes in graph.iteritems(): if random.uniform(0, 1) < prob: pick = random.randint(0, len(out_nodes)-1) new_edge = random.randint(1, len(graph.keys())) while new_edge in out_nodes or new_edge == node: new_edge = random.randint(1, len(graph.keys())) out_nodes.append(new_edge) out_nodes.pop(pick) return graph if __name__=='__main__': p1 = 0.2 p2 = 0.8 lp_graph = get_lp_graph(4) rewire_graph1 = rewire_edge(lp_graph.copy(), p1) rewire_graph2 = rewire_edge(lp_graph.copy(), p2) json_io.write_json('dataset/graph_7.json', rewire_graph1) json_io.write_json('dataset/graph_8.json', rewire_graph2)
if j < (grid_size): graph[node].append(node + 1) node += 1 return graph def rewire_edge(graph, prob): for node, out_nodes in graph.iteritems(): if random.uniform(0, 1) < prob: pick = random.randint(0, len(out_nodes) - 1) new_edge = random.randint(1, len(graph.keys())) while new_edge in out_nodes or new_edge == node: new_edge = random.randint(1, len(graph.keys())) out_nodes.append(new_edge) out_nodes.pop(pick) return graph if __name__ == '__main__': p1 = 0.2 p2 = 0.8 lp_graph = get_lp_graph(4) rewire_graph1 = rewire_edge(lp_graph.copy(), p1) rewire_graph2 = rewire_edge(lp_graph.copy(), p2) json_io.write_json('dataset/graph_7.json', rewire_graph1) json_io.write_json('dataset/graph_8.json', rewire_graph2)
def video_processing(movie_file, role_list_file, search_result_file, role_input_way): # load frame-keyword files keyword_search_result = csv_io.read_csv(search_result_file) role_list = csv_io.read_csv(role_list_file) # load video videoInput = cv2.VideoCapture(movie_file) frame = {} keyword_id = 0 frame_number = 0 for row in keyword_search_result: start_frame, end_frame, keyword = float(row[0]), float(row[1]), row[2] frame_position = round(start_frame) + 24 * BACKWORD_EXPAND_TIME finish_frame = round(end_frame) + 24 * FORWORD_EXPAND_TIME keyword_id += 1 keyword_time = keyword + '_t' + str(keyword_id) while frame_position <= finish_frame: face_position_list, rects, img = frame_caputre(videoInput, frame_position) if len(face_position_list) >= 1: print "detect face..." image_name = OUTPUT_PATH + 'img/' + keyword_time + str(frame_number) #if role_input_way == 0: cv_image.output_image(rects, img, image_name) count = 0 for face_position in face_position_list: if role_input_way == 'auto': break role_name = role_identify( image_name + '-' + str(count) + '.jpg', role_list) else: role_name = role_input(role_list) count += 1 if role_name == -1: continue else: if keyword_time not in frame: print keyword_time, role_name frame[keyword_time] = {} frame[keyword_time][role_name] = {'keyword' : keyword, 'face_position' : face_position.tolist(), 'frame_position' : frame_position, 'keyword_id' : keyword_id, 'weight' : 1, 'speaker': True} elif role_name in frame[keyword_time]: frame[keyword_time][role_name]['weight'] += 1 else: frame[keyword_time][role_name] = {'keyword' : keyword, 'face_position' : face_position.tolist(), 'frame_position' : frame_position, 'keyword_id' : keyword_id, 'weight' : 1, 'speaker': False} frame_number += 1 frame_position += FRAME_INTERVAL #close video videoInput.release() json_io.write_json(OUTPUT_PATH + 'keywordt_roles.json', frame)
def relationship_minig(min_votes, iter_stop): single_graph_file = 'scripts/output/single_graph.json' pair_graph_file = 'scripts/output/pair_graph.json' social_graph_file = 'scripts/output/single_graph.json' dir_file = 'scripts/input/dir_rel.json' clip_file = 'scripts/input/clip_rel.json' single_graph = json_io.read_json(single_graph_file) pair_graph = json_io.read_json(pair_graph_file) bi_graph, social_graph = graph_init(single_graph, pair_graph, social_graph_file, dir_file, clip_file) output_graph = {'nodes': [], "links": []} node_dic = {} change = True itr = 0 # iterator algorithm1 while change: role_pair, dominant_keyword, votes = bi_graph.dominant_pair() if role_pair is None: break source, target, dir_prob = bi_graph.get_direction( role_pair, dominant_keyword) valid_tag = valid_checking(social_graph, source, target, dominant_keyword) if source not in node_dic: node_dic[source] = len(node_dic) output_graph['nodes'].append({ "group": node_dic[source], "name": source, "ID": node_dic[source] }) if target not in node_dic: node_dic[target] = len(node_dic) output_graph['nodes'].append({ "group": node_dic[target], "name": target, "ID": node_dic[target] }) if valid_tag != False and votes >= int(min_votes): if type(valid_tag) != unicode: print source, '-->', dominant_keyword, '-->', target add_links(output_graph, source, target, dominant_keyword, votes, node_dic) social_graph.relationship_tagging(source, target, dominant_keyword, votes) else: print source, '-->', valid_tag, '-->', target add_links(output_graph, source, target, valid_tag, votes, node_dic) social_graph.relationship_tagging(source, target, valid_tag, votes) print votes, dir_prob bi_graph.update_weighting(role_pair, dominant_keyword) if valid_tag: bi_graph.remove_keyword(role_pair, dominant_keyword) else: bi_graph.remove_edges(role_pair, dominant_keyword) itr += 1 if itr == int(iter_stop): break json_io.write_json('result/social_graph.json', output_graph) social_graph.clear() social_graph.shutdown()
nodes.append(str(i)) graph = Graph(graph, nodes) s_rank = datetime.datetime.now() rank = page_rank(graph, 20, 0.85) e_rank = datetime.datetime.now() s_hits = datetime.datetime.now() auth, hubs = hits(graph, 20) sorted_auth = sorted(auth.items(), key=operator.itemgetter(1)) sorted_hubs = sorted(hubs.items(), key=operator.itemgetter(1)) e_hits = datetime.datetime.now() print rank print auth print hubs output_path = 'dist/' + sys.argv[1].split('/')[1][:-4] if sys.argv[1][-4:] != 'json': sim = sim_rank(graph) np.savetxt(output_path + '_sim_rank', sim, fmt='%.2e') f.close() json_io.write_json(output_path + '_rank.json', rank) json_io.write_json(output_path + '_auth.json', auth) json_io.write_json(output_path + '_hubs.json', hubs) t_rank = e_rank - s_rank t_hits = e_hits - s_hits print t_rank.microseconds print t_hits.microseconds