Python read_json примеры, modules.json_io.read_json Python примеры использования

Пример #1

0

Показать файл

Файл: relationship_mining.py Проект: chihsuan/social-reconstruction-demo

def relationship_minig(min_votes, iter_stop):

    single_graph_file ='scripts/output/single_graph.json'
    pair_graph_file = 'scripts/output/pair_graph.json'
    social_graph_file = 'scripts/output/single_graph.json'
    dir_file = 'scripts/input/dir_rel.json'
    clip_file = 'scripts/input/clip_rel.json'

    single_graph = json_io.read_json(single_graph_file)
    pair_graph = json_io.read_json(pair_graph_file)
    
    bi_graph, social_graph = graph_init(single_graph, pair_graph, social_graph_file, dir_file, clip_file)

    output_graph = {'nodes':[], "links":[]}
    node_dic = {}
    change = True
    itr = 0
   # iterator algorithm1
    while change:
        role_pair, dominant_keyword, votes = bi_graph.dominant_pair()
        if role_pair is None:
            break

        source, target, dir_prob = bi_graph.get_direction(role_pair, dominant_keyword)
        valid_tag = valid_checking(social_graph, source, target, dominant_keyword)
        
        if source not in node_dic:
            node_dic[source] = len(node_dic)
            output_graph['nodes'].append({"group": node_dic[source], "name": source, "ID": node_dic[source]})
        if target not in node_dic: 
            node_dic[target] = len(node_dic)
            output_graph['nodes'].append({"group": node_dic[target], "name": target, "ID": node_dic[target]})

        if valid_tag != False and votes >= int(min_votes):
            if type(valid_tag) != unicode:
                print source, '-->', dominant_keyword, '-->', target
                add_links(output_graph, source, target, dominant_keyword, votes, node_dic)
                social_graph.relationship_tagging(source, target, dominant_keyword, votes)
            else:
                print source, '-->', valid_tag, '-->', target
                add_links(output_graph, source, target, valid_tag, votes, node_dic)
                social_graph.relationship_tagging(source, target, valid_tag, votes)
            print votes, dir_prob
        
        bi_graph.update_weighting(role_pair, dominant_keyword)
        if valid_tag:
            bi_graph.remove_keyword(role_pair, dominant_keyword)
        else:
            bi_graph.remove_edges(role_pair, dominant_keyword)
        itr += 1
        if itr == int(iter_stop):
            break

    json_io.write_json('result/social_graph.json', output_graph)
    social_graph.clear()
    social_graph.shutdown()

Пример #2

0

Показать файл

Файл: social_reconstruction.py Проект: chihsuan/social_reconstruction

def social_reconstruction(keyword_list_file, relations_file):
    
    keyword_list = csv_io.read_csv(keyword_list_file)
    relations = json_io.read_json(relations_file)

    relation_graph = {'nodes': [], 'links': []}

    node_index = {}
    index = 0
    for keyword in keyword_list:
        if keyword not in node_index:
            relation_graph['nodes'].append({'name': keyword, 'group': index, 'ID': index})
            node_index[keyword] = index
            index += 1

    for name, relation in relations.iteritems():
        #total = sum(relation.values())
        for person in relation:
            #if total != 0 and (float(relation[person]) / total > (1.0/len(relation)) - 0.03 ):
            relation_graph['links'].append({'source': node_index[name], 'target': node_index[person],
                                           'value': relation[person], 'label': person })
            relation_graph['links'].append({'source': node_index[person], 'target': node_index[name],
                                           'value': relation[person], 'label': name })
    print relation_graph
    json_io.write_json('output/result/relation_graph.json', relation_graph)

Пример #3

0

Показать файл

Файл: find_relation.py Проект: chihsuan/social_reconstruction

def reconstruct_role(recongition_merge_file, keword_list_file):

    keyword_to_frame = json_io.read_json(recongition_merge_file)
    keword_list = csv_io.read_csv(keword_list_file)

    leading_keyword = keword_list[0]

    for keyword, frame_list in keyword_to_frame.iteritems():
        for frame in frame_list:
            for face in frame_list[frame]:
                name = keyword + str(face['frame_position']) + '.jpg'
                face['img'] = cv2.imread(OUTPUT_PATH + '/img/' + name)

    detector, matcher =  cv_face.init_feature('orb')
    # Find other characters
    face_list = {}
    character_list = {}
    for keyword, frame_list in keyword_to_frame.iteritems():
        print keyword 
        for frame in frame_list:
            for face in frame_list[frame]:
                if face and face['face_id'] not in face_list:
                    face_list[face['face_id']] = []
                if face:
                    face_list[face['face_id']].append(face)
        rank  = sorted(face_list, key=lambda k: len(face_list[k]), reverse=True)
        character_list[keyword] = [face_list[rank[0]]]
        i=0

Пример #4

0

Показать файл

Файл: face_recognition.py Проект: chihsuan/social_reconstruction

def face_recongnition(position_merge_file):
    
    frame_list = json_io.read_json(position_merge_file) 

    # Read face image
    for frame in frame_list:
       img_name = frame_list[frame]['keyword'].encode('utf8') + str(frame_list[frame]['frame_position']) + '.jpg'
       frame_list[frame]['img'] = cv2.imread( OUTPUT_PATH + "img/" + img_name , 0)
   
    # transforamt to keyword as key 
    keyword_list = {}
    for frame in frame_list:
        keyword = frame_list[frame]['keyword']
        face_id = frame_list[frame]['face_id']
        if keyword not in keyword_list:
            keyword_list[keyword] = {}
        
        if face_id not in keyword_list[keyword]:
            keyword_list[keyword][face_id] = []
        keyword_list[keyword][face_id].append(frame_list[frame])
    
    for keyword, frame_list in keyword_list.iteritems():
        print keyword
        for frame in frame_list:
            for face in frame_list[frame]:
                print face['ID'],
            print 

    global detector
    global matcher

    detector, matcher =  cv_face.init_feature('orb')

    threadLock = threading.Lock() 
    thread_count = 0 
    threads = []
    match_rate = {}

    for keyword, frame_list in keyword_list.iteritems():
        thread = Pthread(thread_count, 'Thread-'+str(thread_count), frame_list, threadLock)
        thread.start()
        threads.append(thread)
        thread_count += 1


    # wait all threads complete 
    for thread in threads:
        thread.join()

    for keyword, frame_list in keyword_list.iteritems():
        print keyword
        for frame in frame_list:
            for face in frame_list[frame]:
                if 'img' in face:
                    del face['img']
                    print face['ID'],
            print 

    json_io.write_json('output/face_recongnition.json', keyword_list)

Пример #5

0

Показать файл

Файл: build_graph.py Проект: chihsuan/relationships-reconstruction

def build_bipartite_graph(keyword_dic_file):    
    
    keyword_dic = json_io.read_json(keyword_dic_file)
    keyword_dic = weight_normalize(keyword_dic)
    
    pair_bipartite_graph = to_pair(keyword_dic)

    json_io.write_json(OUTPUT_PATH + 'pair_graph.json', pair_bipartite_graph) 
    json_io.write_json(OUTPUT_PATH + 'single_graph.json', keyword_dic)

Пример #6

0

Показать файл

Файл: build_graph.py Проект: chihsuan/relationships-reconstruction

def build_bipartite_graph(keyword_dic_file):

    keyword_dic = json_io.read_json(keyword_dic_file)
    keyword_dic = weight_normalize(keyword_dic)

    pair_bipartite_graph = to_pair(keyword_dic)

    json_io.write_json(OUTPUT_PATH + 'pair_graph.json', pair_bipartite_graph)
    json_io.write_json(OUTPUT_PATH + 'single_graph.json', keyword_dic)

Пример #7

0

Показать файл

Файл: term_frequency.py Проект: WayWingsDev/search-engine

def to_db(mydb, term_id, document_list, doc_hash, input_dir):
    for doc in document_list:
        terms_tf = json_io.read_json(input_dir+doc)
        for term, tf in terms_tf.iteritems():
            term = term.replace("'", "")
            if len(term) > 255:
                term = term[:254]
            sql = "INSERT INTO doc_lookups (doc_id,title,tf,term_id) VALUES (" \
                    + "'" + str(doc_hash[doc[:-5]]) + "','" + doc[:-5]  + "','" + str(tf) + "','" + str(term_id[term]) + "');"
            mydb.exe_sql(sql)

Пример #8

0

Показать файл

Файл: term_frequency.py Проект: wsxbjx/search-engine

def to_db(mydb, term_id, document_list, doc_hash, input_dir):
    for doc in document_list:
        terms_tf = json_io.read_json(input_dir + doc)
        for term, tf in terms_tf.iteritems():
            term = term.replace("'", "")
            if len(term) > 255:
                term = term[:254]
            sql = "INSERT INTO doc_lookups (doc_id,title,tf,term_id) VALUES (" \
                    + "'" + str(doc_hash[doc[:-5]]) + "','" + doc[:-5]  + "','" + str(tf) + "','" + str(term_id[term]) + "');"
            mydb.exe_sql(sql)

Пример #9

0

Показать файл

Файл: term_weighting.py Проект: wsxbjx/search-engine

def idf(input_dir):
    doc_id = 0
    term_idf = {}
    for doc in document_list:
        terms = json_io.read_json(input_dir + doc)
        for term in terms:
            if term not in term_idf:
                term_idf[term] = []
            term_idf[term].append(term)
    return term_idf

Пример #10

0

Показать файл

Файл: term_weighting.py Проект: WayWingsDev/search-engine

def idf(input_dir):
    doc_id = 0
    term_idf = {}
    for doc in document_list:
        terms = json_io.read_json(input_dir+doc)
        for term in terms:
            if term not in term_idf:
                term_idf[term] = []
            term_idf[term].append(term)
    return term_idf

Пример #11

0

Показать файл

Файл: movie_preprocessing.py Проект: chihsuan/social_reconstruction

def movie_prosessing(movie_file, two_entity_file, search_result_file):
    two_entity_set = json_io.read_json(two_entity_file)
    keyword_search_result = csv_io.read_csv(search_result_file)

    # load video
    videoInput = cv2.VideoCapture(movie_file)

    # crate a start_frame to end_frame dictionary for two_entity_set look up
    start_end = {}
    for row in keyword_search_result:
        start_frame, end_frame = time_format.to_frame(row)
        while start_frame in start_end:
            start_frame = start_frame + 0.001
        while end_frame in start_end:
            end_frame = end_frame + 0.001
        start_end[start_frame] = end_frame 

    frame = {}
    face_count = 0
    for keyword in two_entity_set:
        for start_frame in two_entity_set[keyword]:
            frame_position = int(start_frame) - 24 * 10
            finish_frame = start_end[start_frame] + 24 * 10
            while frame_position <= finish_frame: 
                print keyword
                videoInput.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, frame_position)
                flag, img = videoInput.read()
                gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                gray = cv2.equalizeHist(gray)
                face_position_list, rects = cv_image.face_detect(gray, frame_position, (85, 85))
                #face_position_list, rects =  faceDetection(gray, frame_position)
                if 0xFF & cv2.waitKey(5) == 27:
                    cv2.destroyAllWindows()
                    sys.exit(1)
                
                if len(face_position_list) == 1:
                    print 'detected'
                    image_name = keyword + str(frame_position) + '.jpg'
                    cv_image.output_image(rects, img, OUTPUT_PATH + '/img/' + image_name)
                    for face_position in face_position_list:
                        face_count += 1
                        print face_count
                        frame[face_count] = { 'keyword' : keyword, 
                                                  'face_position': face_position.tolist(),
                                                  'ID' : face_count,
                                                  'frame_position': frame_position,
                                                  'face_id': face_count} 
                frame_position += FRAME_INTERVAL
    #close video  
    videoInput.release()

    json_io.write_json(OUTPUT_PATH + 'frame.json', frame)

Пример #12

0

Показать файл

Файл: position_merge.py Проект: chihsuan/social_reconstruction

def position_merge(frame_file):

    frame = json_io.read_json(frame_file)
    
    keys = frame.keys()
    
    for i in range(0, len(frame)):
        for j in range(i+1, len(frame)):
            if is_near( frame[keys[i]], frame[keys[j]] ):
                frame[keys[j]]['face_id'] = frame[keys[i]]['face_id']
                print 111
    
    json_io.write_json(OUTPUT_PATH + 'merge_position.json', frame)

Пример #13

0

Показать файл

Файл: graph_db.py Проект: chihsuan/social_reconstruction

def neo4j_db(neo4j_url, data_path):

    gdb = GraphDatabase(neo4j_url)
    data = json_io.read_json(data_path)

    if 'nodes' in data and 'links' in data:
        nodes = []
        for node in data['nodes']:
            nodes.append(gdb.nodes.create(name=node['name'])) 

        for edge in data['links']:
            source = nodes[edge['source']]
            target = nodes[edge['target']]
            source.relationships.create('Knows', target)

Пример #14

0

Показать файл

Файл: SocialGraph.py Проект: chihsuan/social-reconstruction-demo

 def file_to_db(self, data_path):
     data = json_io.read_json(data_path)
     with self.db.transaction:
         for source_name, targets in data.iteritems():
             if source_name in self.nodes:
                 source = self.nodes[source_name]
             else:
                 source = self.db.node(name=source_name)
                 self.nodes[source_name] = source
             for target_name in targets:
                 if target_name in self.nodes:
                     target = self.nodes[target_name]
                 else:
                     target = self.db.node(name=target_name)
                     self.nodes[target_name] = target
                 #for attr, val in targets[target_name].iteritems():
                 self.rels.append(source.knows(target))
         return self.nodes

Пример #15

0

Показать файл

Файл: SocialGraph.py Проект: chihsuan/relationships-reconstruction

 def file_to_db(self, data_path):
     data = json_io.read_json(data_path)
 
     with self.db.transaction:
         for source_name, targets in data.iteritems():
             if source_name in self.nodes:
                 source = self.nodes[source_name]
             else:
                 source = self.db.node(name=source_name)
                 self.nodes[source_name] = source
             for target_name in targets:
                 if target_name in self.nodes:
                     target = self.nodes[target_name]
                 else:
                     target = self.db.node(name=target_name) 
                     self.nodes[target_name] = target
                 #for attr, val in targets[target_name].iteritems():
                 self.rels.append(source.knows(target))
         return self.nodes

Пример #16

0

Показать файл

Файл: create_table.py Проект: chihsuan/NHI-decoder

#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-

from modules import json_io
from db.MyDB import MyDB


if __name__=='__main__':
    
    codebook_path = 'modules/codebook/'

    config = json_io.read_json('config.json')

    db_config = config[u'database']
    mydb = MyDB( db_config[u'dbtype'], db_config[u'host'], db_config[u'dbname'], \
            db_config[u'username'], db_config[u'password'], db_config[u'encoding'], "")

    tables = config[u'table']

    for category, table_info in tables.iteritems():
        for name, years in table_info.iteritems():
            f = codebook_path + 'codebook_' + name[:6] + '.json'
            table_format = json_io.read_json(f)
            mydb.create_table(name, table_format.keys(), table_format.values())
    mydb.close()

Пример #17

0

Показать файл

Файл: term_weighting.py Проект: WayWingsDev/search-engine

    for doc in document_list:
        terms = json_io.read_json(input_dir+doc)
        for term in terms:
            if term not in term_idf:
                term_idf[term] = []
            term_idf[term].append(term)
    return term_idf

if __name__=='__main__':

    if len(sys.argv) > 1:
        input_dir = sys.argv[1]
    else:
        input_dir = 'output/en_tf/'

    config = json_io.read_json('config.json')[u'database']
    doc_hash = json_io.read_json('output/doc_hash.json')
    document_list = get_docs_list(input_dir)

    mydb = DataDB( config[u'dbtype'], config[u'host'], config[u'dbname'], \
            config[u'username'], config[u'password'], config[u'encoding'], "")

    try:
        term_id = mydb.select('SELECT id FROM terms order by id desc limit 1;')[0][0]
    except IndexError, e:
        term_id = 1

    #Get idf
    term_doc_list = idf(input_dir)
    term_hash= {}
    doc_number = len(document_list)

Пример #18

0

Показать файл

Файл: SocialGraph.py Проект: chihsuan/relationships-reconstruction

 def load_pattern(self, dir_file, clip_file):
     self.dir_patterns = json_io.read_json(dir_file)
     self.clip_patterns = json_io.read_json(clip_file)

Пример #19

0

Показать файл

    if match:
        target = match.groups()[0]
    return source, target


if __name__=='__main__':
    
    if len(sys.argv) < 2:
        print >> sys.stderr, "Usage: <file>"
        exit(-1)
   
    if sys.argv[1][-4:] != 'json':
        f = open(sys.argv[1])
        graph = read_graph(f)
    else:
        graph = json_io.read_json(sys.argv[1])

        if len(sys.argv) == 3:
            movie_dic = json_io.read_json(sys.argv[2])
            nodes = movie_dic.keys()
            for i in xrange(len(nodes)):
                nodes[i] = str(nodes[i])
        else:
            nodes = []
            for i in range(1, 26):
                nodes.append(str(i))
            
        graph = Graph(graph, nodes)

    s_rank = datetime.datetime.now()
    rank = page_rank(graph, 20, 0.85)

Пример #20

0

Показать файл

Файл: IKDDhw2.py Проект: AkiraKane/IKDD-HW

def split_text(text):
    if len(text) >= 10:
        line = text[:10]
        text = text[10:]
    else:
        line = text
        text = None
    return text, line

if __name__=="__main__":
    if len(sys.argv) != 2:
        print sys.stderr
        exit(-1)
    
    config = json_io.read_json('config.json')[u'database']
    
    mydb = IServDB( config[u'dbtype'], config[u'host'], config[u'dbname'], \
            config[u'username'], config[u'password'], config[u'encoding'], "")

    sql_query =  ("SELECT * FROM \"twitter\" WHERE q = '%s' ORDER BY user_id ASC" % (sys.argv[1]))
  
    responese = mydb.select(sql_query)
    responese = sorted(responese, key=lambda row: row[3], reverse=True)
    if len(responese) == 0:
        print "There is no data matching the query string."
    else:
        print '---------------------------------------------'
        print ("%10s | %10s | %10s" % ("text", "user_name", "user_id"))

        for row in responese:

Пример #21

0

Показать файл

Файл: role_reconstruct.py Проект: chihsuan/social_reconstruction

def reconstruct_role(recongition_merge_file, keword_list_file):

    keyword_to_frame = json_io.read_json(recongition_merge_file)
    keword_list = csv_io.read_csv(keword_list_file)

    leading_keyword = keword_list[0]

    for keyword, frame_list in keyword_to_frame.iteritems():
        for frame in frame_list:
            for face in frame_list[frame]:
                name = keyword + str(face['frame_position']) + '.jpg'
                face['img'] = cv2.imread(OUTPUT_PATH + '/img/' + name)

    detector, matcher =  cv_face.init_feature('orb')
    # Find other characters
    face_list = {}
    character_list = {}
    for keyword, frame_list in keyword_to_frame.iteritems():
        print keyword 
        for frame in frame_list:
            for face in frame_list[frame]:
                if face and face['face_id'] not in face_list:
                    face_list[face['face_id']] = []
                if face:
                    face_list[face['face_id']].append(face)
        rank  = sorted(face_list, key=lambda k: len(face_list[k]), reverse=True)
        character_list[keyword] = [face_list[rank[0]]]
        i=0
        #for face in face_list[rank[5]]:
         #   i+=1
        for j in rank:
            face = face_list[j][0]
            cv2.imwrite(OUTPUT_PATH + '/result2/' + keyword + str(i) + '.jpg', face['img'])
            i += 1
        if len(rank) > 1 and '-' in keyword:
            character_list[keyword].append(face_list[rank[1]])
        '''    for i in range(1, len(rank)):
                if cv_face.list_match(MIN_MATCH, character_list[keyword][0], face_list[rank[i]], detector, matcher):
                    continue
                else:
                    character_list[keyword].append(face_list[rank[i]])
                    break 
            if len(character_list[keyword]) == 1:
                character_list[keyword].append(face_list[rank[1]])'''
        face_list = {}
        print


    role_list = {}
    # Use leading role image to check
    lead_role_list = character_list[leading_keyword]
    for keyword, characters in character_list.iteritems():
        if keyword == leading_keyword or len(characters) < 2:
            continue

        if leading_keyword in keyword:
            print keyword, '---'
            match_count1 = 0
            match_count2 = 0
            for face in character_list[leading_keyword][0]:
                match_count1 += cv_face.get_match_rate(face['img'], characters[0][0]['img'])
            cv2.imwrite(OUTPUT_PATH + '/result/' + '000' + keyword + '.jpg', characters[0][0]['img'])
            for face in character_list[leading_keyword][0]:
                match_count2 += cv_face.get_match_rate(face['img'], characters[1][0]['img'])
            cv2.imwrite(OUTPUT_PATH + '/result/' + '001' + keyword + '.jpg', characters[1][0]['img'])
            if match_count1 > match_count2:
                print 'characters1', match_count1, match_count2
                del characters[0]
            else:
                print 'characters2', match_count1, match_count2
                del characters[1]
            role_list[keyword.split('-')[0]] = characters[0] 
        
    
    for keyword, characters in character_list.iteritems():
        if leading_keyword in keyword or len(characters) < 2:
            continue
        important_person = keyword.split('-')[1]
        if important_person in role_list: 
            print keyword, important_person, '---'
            match_count1 = 0
            match_count2 = 0
            for face in role_list[important_person]:
                match_count1 += cv_face.get_match_rate(face['img'], characters[0][0]['img'])
            #cv2.imwrite(OUTPUT_PATH + '/result/' + '000' + keyword + '.jpg', characters[0][0]['img'])
            for face in role_list[important_person]:
                match_count2 += cv_face.get_match_rate(face['img'], characters[1][0]['img'])
            #cv2.imwrite(OUTPUT_PATH + '/result/' + '001' + keyword + '.jpg', characters[1][0]['img'])
            if match_count1 > match_count2:
                print 'characters1', match_count1, match_count2
                del characters[0]
            else:
                print 'characters2', match_count1, match_count2
                del characters[1]
        else:
            del characters[1]
    

        

    
    # Output
    for keyword, characters in character_list.iteritems():
        for character in characters:
            if '-' in keyword:
                keyword = keyword.split('-')[0]
            cv2.imwrite(OUTPUT_PATH + '/result/' + keyword + '.jpg', character[0]['img'])

Пример #22

0

Показать файл

Файл: doc_to_db.py Проект: wsxbjx/search-engine

# -*- coding: utf-8 -*-

import sys

from my_class.DataDB import DataDB
from my_class.Document import Document
from modules import json_io
from doc_preprocessing import get_docs_list

if __name__ == '__main__':
    if len(sys.argv) >= 2:
        data_dir = sys.argv[1]
        config = sys.argv[1]
    else:
        data_dir = 'output/processed_data/'
        config = json_io.read_json('config.json')[u'database']

    doc_hash = json_io.read_json('output/doc_hash.json')

    document_list = get_docs_list(data_dir)

    mydb = DataDB( config[u'dbtype'], config[u'host'], config[u'dbname'], \
            config[u'username'], config[u'password'], config[u'encoding'], "")

    table_name = "documents"
    key_list = ['doc_id', 'content']

    for doc in document_list:
        doc_obj = Document(doc_hash[doc], doc, data_dir)
        content = doc_obj.read().replace("'", '"')
        data_list = [str(doc_hash[doc]), content]

Пример #23

0

Показать файл

def split_text(text):
    if len(text) >= 10:
        line = text[:10]
        text = text[10:]
    else:
        line = text
        text = None
    return text, line


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print sys.stderr
        exit(-1)

    config = json_io.read_json('config.json')[u'database']

    mydb = IServDB( config[u'dbtype'], config[u'host'], config[u'dbname'], \
            config[u'username'], config[u'password'], config[u'encoding'], "")

    sql_query = (
        "SELECT * FROM \"twitter\" WHERE q = '%s' ORDER BY user_id ASC" %
        (sys.argv[1]))

    responese = mydb.select(sql_query)
    responese = sorted(responese, key=lambda row: row[3], reverse=True)
    if len(responese) == 0:
        print "There is no data matching the query string."
    else:
        print '---------------------------------------------'
        print("%10s | %10s | %10s" % ("text", "user_name", "user_id"))

Пример #24

0

Показать файл

Файл: get_pages.py Проект: SWPFlow/machine-learning-on-text

#! /usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2
from modules import json_io

pages = json_io.read_json('urls.json')

for category, urls in pages.iteritems():
    page_count = 0
    for url in urls:
        page_count += 1
        file_name = category + str(page_count)
        with open('pages/' + file_name, 'w') as output_file:
            page = urllib2.urlopen(url).read()
            output_file.write(page)

Пример #25

0

Показать файл

Файл: NHI.py Проект: chihsuan/NHI-decoder

from modules import json_io
from modules.CodeBook import CodeBook
from db.MyDB import MyDB

def get_table_name(table_dic, encoding, year):
    category = table_dic[encoding.decode('utf-8')]
    for table_name, years in category.iteritems():
        if year >= years[0] and year <= years[1]:
            return table_name

if __name__=='__main__':
    '''
    Read NHI .dat fromat data, decode and insert to db 
    '''

    config = json_io.read_json('config.json')
    codebook = CodeBook(config[u'codebook'][u'path'])
    
    db_config = config[u'database']
    mydb = MyDB( db_config[u'dbtype'], db_config[u'host'], db_config[u'dbname'], \
            db_config[u'username'], db_config[u'password'], db_config[u'encoding'], None)
    
    table_dic = config[u'table']

    for root, _, files in os.walk(config[u'data'][u'folder_path']):
        for f in files:
            print f
            encoding = f[5:7]
            year = int(f[7:11])
            data = codebook.decode_file(root+f, encoding, year)
            table_name = get_table_name(table_dic, encoding, year)

Пример #26

0

Показать файл

def relationship_minig(min_votes, iter_stop):

    single_graph_file = 'scripts/output/single_graph.json'
    pair_graph_file = 'scripts/output/pair_graph.json'
    social_graph_file = 'scripts/output/single_graph.json'
    dir_file = 'scripts/input/dir_rel.json'
    clip_file = 'scripts/input/clip_rel.json'

    single_graph = json_io.read_json(single_graph_file)
    pair_graph = json_io.read_json(pair_graph_file)

    bi_graph, social_graph = graph_init(single_graph, pair_graph,
                                        social_graph_file, dir_file, clip_file)

    output_graph = {'nodes': [], "links": []}
    node_dic = {}
    change = True
    itr = 0
    # iterator algorithm1
    while change:
        role_pair, dominant_keyword, votes = bi_graph.dominant_pair()
        if role_pair is None:
            break

        source, target, dir_prob = bi_graph.get_direction(
            role_pair, dominant_keyword)
        valid_tag = valid_checking(social_graph, source, target,
                                   dominant_keyword)

        if source not in node_dic:
            node_dic[source] = len(node_dic)
            output_graph['nodes'].append({
                "group": node_dic[source],
                "name": source,
                "ID": node_dic[source]
            })
        if target not in node_dic:
            node_dic[target] = len(node_dic)
            output_graph['nodes'].append({
                "group": node_dic[target],
                "name": target,
                "ID": node_dic[target]
            })

        if valid_tag != False and votes >= int(min_votes):
            if type(valid_tag) != unicode:
                print source, '-->', dominant_keyword, '-->', target
                add_links(output_graph, source, target, dominant_keyword,
                          votes, node_dic)
                social_graph.relationship_tagging(source, target,
                                                  dominant_keyword, votes)
            else:
                print source, '-->', valid_tag, '-->', target
                add_links(output_graph, source, target, valid_tag, votes,
                          node_dic)
                social_graph.relationship_tagging(source, target, valid_tag,
                                                  votes)
            print votes, dir_prob

        bi_graph.update_weighting(role_pair, dominant_keyword)
        if valid_tag:
            bi_graph.remove_keyword(role_pair, dominant_keyword)
        else:
            bi_graph.remove_edges(role_pair, dominant_keyword)
        itr += 1
        if itr == int(iter_stop):
            break

    json_io.write_json('result/social_graph.json', output_graph)
    social_graph.clear()
    social_graph.shutdown()

Python read_json примеры использования