Exemplo n.º 1
0
def reset_basic_data(interaction_list, negative_interaction_list, lncRNA_list, protein_list):
    for lncRNA in lncRNA_list:
        lncRNA.interaction_list = []
    for protein in protein_list:
        protein.interaction_list = []
    
    lncRNA_serial_number_index_dict = nodeSerialNumber_listIndex_dict_generation(lncRNA_list)
    protein_serial_number_index_dict = nodeSerialNumber_listIndex_dict_generation(protein_list)

    new_interaction_list = []
    for i in range(len(interaction_list)):
        interaction = interaction_list[i]
        lncRNA_index = lncRNA_serial_number_index_dict[interaction.lncRNA.serial_number]
        protein_index = protein_serial_number_index_dict[interaction.protein.serial_number]

        temp_interaction = LncRNA_Protein_Interaction(lncRNA=lncRNA_list[lncRNA_index], protein=protein_list[protein_index], y=1)
        new_interaction_list.append(temp_interaction)
        lncRNA_list[lncRNA_index].interaction_list.append(temp_interaction)
        protein_list[protein_index].interaction_list.append(temp_interaction)
    
    new_negative_interaction_list = []
    for i in range(len(negative_interaction_list)):
        interaction = negative_interaction_list[i]
        lncRNA_index = lncRNA_serial_number_index_dict[interaction.lncRNA.serial_number]
        protein_index = protein_serial_number_index_dict[interaction.protein.serial_number]

        temp_interaction = LncRNA_Protein_Interaction(lncRNA=lncRNA_list[lncRNA_index], protein=protein_list[protein_index], y=0)
        new_negative_interaction_list.append(temp_interaction)
        lncRNA_list[lncRNA_index].interaction_list.append(temp_interaction)
        protein_list[protein_index].interaction_list.append(temp_interaction)
 
    return new_interaction_list, new_negative_interaction_list, lncRNA_list, protein_list
Exemplo n.º 2
0
def negative_interaction_generation():
    global lncRNA_list, protein_list, interaction_list, negative_interaction_list, set_interactionKey, set_negativeInteractionKey
    set_negativeInteractionKey = set()

    if len(negative_interaction_list) != 0:
        raise Exception('negative interactions exist')

    num_of_interaction = len(interaction_list)
    num_of_lncRNA = len(lncRNA_list)
    num_of_protein = len(protein_list)

    negative_interaction_count = 0
    while (negative_interaction_count < num_of_interaction):
        random_index_lncRNA = random.randint(0, num_of_lncRNA - 1)
        random_index_protein = random.randint(0, num_of_protein - 1)
        temp_lncRNA = lncRNA_list[random_index_lncRNA]
        temp_protein = protein_list[random_index_protein]
        # 检查随机选出的lncRNA和protein是不是有已知相互作用
        key_negativeInteraction = (temp_lncRNA.serial_number,
                                   temp_protein.serial_number)
        if key_negativeInteraction in set_interactionKey:
            continue
        if key_negativeInteraction in set_negativeInteractionKey:
            continue

        # 经过检查,随机选出的lncRNA和protein是可以作为负样本的
        set_negativeInteractionKey.add(key_negativeInteraction)
        temp_interaction = LncRNA_Protein_Interaction(temp_lncRNA,
                                                      temp_protein, 0,
                                                      key_negativeInteraction)
        negative_interaction_list.append(temp_interaction)
        temp_lncRNA.interaction_list.append(temp_interaction)
        temp_protein.interaction_list.append(temp_interaction)
        negative_interaction_count = negative_interaction_count + 1
    print('generate ', len(negative_interaction_list), ' negative samples')
Exemplo n.º 3
0
def read_interaction_dataset(dataset_path, dataset_name):
    global interaction_list, negative_interaction_list, lncRNA_list, protein_list, lncRNA_name_index_dict, protein_name_index_dict
    # lncRNA_name_index_dict, protein_name_index_dic为了在interaction dataset中,读到重复的lncRNA或protein时
    # 能在lncRNA_list和protein_list中快速的找到
    if not osp.exists(dataset_path):
        raise Exception('interaction datset does not exist')
    print('start reading xlsx file')
    wb = load_workbook(dataset_path)
    sheets = wb.worksheets   # 获取当前所有的sheet
    sheet = sheets[0]
    rows = sheet.rows

    serial_number = 0
    lncRNA_count = 0
    protein_count = 0
    flag = 0 #用来排除RPI7317.xlsx的第一行

    for row in rows:
        #排除第一行
        if flag == 0:
            flag = flag + 1
            continue
        #读出这一行的每个元素,每一行对应一个interaction实例,如果这个interaction对应的lncRNA和protein还没创建,就创建它
        #并在索引词典中加入它在lncRNA_list或者protein_list中的索引
        [lncRNA_name, protein_name, label] = [col.value for col in row]
        label = int(label)
        if lncRNA_name not in lncRNA_name_index_dict:   # 新的,没创建过的lncRNA
            temp_lncRNA = LncRNA(lncRNA_name, serial_number, 'LncRNA')
            lncRNA_list.append(temp_lncRNA)
            lncRNA_name_index_dict[lncRNA_name] = lncRNA_count
            serial_number = serial_number + 1
            lncRNA_count = lncRNA_count + 1
        else:   # 在interaction dataset中已经读到过,已经创建了对象的lncRNA,就存在lncRNA_list中
            temp_lncRNA = lncRNA_list[lncRNA_name_index_dict[lncRNA_name]]
        if protein_name not in protein_name_index_dict: # 新的,没创建过的protein
            temp_protein = Protein(protein_name, serial_number, 'Protein')
            protein_list.append(temp_protein)
            protein_name_index_dict[protein_name] = protein_count
            serial_number = serial_number + 1
            protein_count = protein_count + 1
        else:   # 在interaction dataset中已经读到过,已经创建了对象的protein,就存在protein_list中
            temp_protein = protein_list[protein_name_index_dict[protein_name]]
        temp_interaction = LncRNA_Protein_Interaction(temp_lncRNA, temp_protein, label)
        # print(temp_interaction.protein.name, temp_interaction.lncRNA.name)
        
        if label == 1:
            interaction_list.append(temp_interaction)
        elif label == 0:
            negative_interaction_list.append(temp_interaction)
        else:
            print(label)
            raise Exception('{dataset_name}中有除了0和1之外的label'.format(dataset_name=dataset_name))

        temp_lncRNA.interaction_list.append(temp_interaction)
        temp_protein.interaction_list.append(temp_interaction)
    print('number of lncRNA:{:d}, number of protein:{:d}, number of node:{:d}'.format(lncRNA_count, protein_count, lncRNA_count + protein_count))
    print(f'number of positive samples:{len(interaction_list)}, number of negative samples:{len(negative_interaction_list)}\n')
Exemplo n.º 4
0
def rebuild_all_negativeInteraction(set_negativeInteractionKey):
    global lncRNA_list, protein_list, negative_interaction_list
    dict_serialNumber_lncRNA = build_dict_serialNumber_node(lncRNA_list)
    dict_serialNumber_protein = build_dict_serialNumber_node(protein_list)
    # 根据set_negativeInteractionKey把负样本集构造出来
    for negativeInteractionKey in set_negativeInteractionKey:
        lncRNA_temp = dict_serialNumber_lncRNA[negativeInteractionKey[0]]
        protein_temp = dict_serialNumber_protein[negativeInteractionKey[1]]
        # 构造负样本
        temp_negativeInteraction = LncRNA_Protein_Interaction(lncRNA_temp, protein_temp, 0, negativeInteractionKey)
        negative_interaction_list.append(temp_negativeInteraction)
        lncRNA_temp.interaction_list.append(temp_negativeInteraction)
        protein_temp.interaction_list.append(temp_negativeInteraction)