# Regions for Secondary Structure regions_SS=[81,95,103,104,109,113,118,120,125,127,136,137,140,155,161,172,178,179,186,188,198,200,209,210,218,231,234,236,244,251,259,261,269,279,284,286,292,300,307,310,317,350,353,355,361,367,377,387,393,397,400,401,408,409,413,418,430,436,448,469] # Regions for Active Site regions_AS=[82,117,118,119,150,151,152,155,156,176,177,222,223,224,225,227,228,276,277,278,292,293,294,295,367,368,401,402] # print csv file with the sequences for i in range(len(alignRecords)): mut="" mut3d="" for j in range(len(wildtype.seq)): # if a mutation appears then show that if wildtype.seq[j] != alignRecords[i].seq[j]: # record the Accession Number, the mutation and country j3d=domain_data.notation_3D(j) notation3d = str(wildtype.seq[j])+j3d+str(alignRecords[i].seq[j]) # notation for 3D structure. # check what ist the region in the sequence by active site. if not('-' in notation3d): # to notation issues j+1 to print. mut = mut+str(wildtype.seq[j])+str(j+1)+str(alignRecords[i].seq[j]) # Active Site mutRegion_AS[domain_data.whatRegion_AS(j+1)].append(notation3d) # Secondary structure mutRegion_2D[domain_data.whatRegion_2D(j+1)].append(notation3d) mut3d = mut3d+notation3d mut = mut+csv_token mut3d = mut3d+csv_token line=str(alignRecords[i].description)+csv_token+str(alignRecords[i].id)+csv_token+mut line3d=str(alignRecords[i].description)+csv_token+str(alignRecords[i].id)+csv_token+mut3d
# if a mutation appears then show that seq_record=db.lookup(accession=alignRecords[i].id) alignRecords[i].description=seq_record.description # if a mutation exist and is not an insertion or deletion if (wildtype.seq[j] != alignRecords[i].seq[j]) and (alignRecords[i].seq[j]!='-'): # record the Accession Number, the mutation and country sleep(1) country=getCountry_fromGBDescription(alignRecords[i].description) # add the found mutation to the mutation_list mutations_list.append(Mutation.Mutation(wildtype.id,alignRecords[i].id,wildtype.seq[j],j,alignRecords[i].seq[j],country)) # write data in csv file in 3D NOTATION mut_act_pos=len(mutations_list)-1 # Define regions for SS and AS ss_reg=domain_data.whatRegion(mutations_list[mut_act_pos].position_1D,regions_SS) as_reg=domain_data.whatRegion(mutations_list[mut_act_pos].position_1D,regions_AS) csvrow=[mutations_list[mut_act_pos].sequence_AN,mutations_list[mut_act_pos].old_aa,domain_data.notation_3D(mutations_list[mut_act_pos].position_1D),mutations_list[mut_act_pos].new_aa,str(mutations_list[mut_act_pos].country),ss_reg,as_reg] #print csvrow dataWriter.writerow(csvrow) # increase progressbar pbar.update(i+1) # finish progressbar pbar.finish() sys.stdout.write('\n') print 20*'-' print 'Creating country files ...' # put country_list for accession number mutation country_list={} country_csv_files={} for m in mutations_list: