def _process_pathway(cls, download_file, stage_output_file, section, source, is_public, config=None):
        '''Function to parse the pathway input files eg: kegg, reactome, go
        INPUT file format:
        Pathway name \t Pathyway url \t List of entrez ids
        REACTOME_RNA_POL_I_TRANSCRIPTION_TERMINATION
        http://www.broadinstitute.org/gsea/msigdb/cards/REACTOME_RNA_POL_I_TRANSCRIPTION_TERMINATION1022
        2068    2071    25885    284119    2965    2966    2967    2968    4331

        The entrez ids are converted to ensembl ids and logs are written to track the conversion rates (LESS/MORE/EQUAL)
        '''
        json_target_file_path = stage_output_file.replace(".out", ".json")
        json_target_file = open(json_target_file_path, mode='w', encoding='utf-8')
        json_target_file.write('{"docs":[\n')

        count = 0
        tmp_row_count_file = open(download_file, encoding='utf-8')
        row_count = sum(1 for row in tmp_row_count_file)
        logger.debug('Number of lines in the file ' + str(row_count))

        load_mapping = True

        gene_sets = []
        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                gene_sets.extend(row[2:])
        csvfile.close()
        ens_look_up = Gene._entrez_ensembl_lookup(gene_sets, section, config)

        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)

            for row in reader:
                path_object = dict()
                pathway_name = row[0]
                pathway_url = row[1]
                gene_sets = row[2:]

                converted_genesets = [ens_look_up[entrez] for entrez in gene_sets if entrez in ens_look_up]
                path_object["pathway_name"] = pathway_name
                path_object["pathway_url"] = pathway_url
                path_object["gene_sets"] = converted_genesets
                path_object["source"] = source
                path_object["is_public"] = is_public
                json_target_file.write(json.dumps(path_object))
                count += 1
                if row_count == count:
                    json_target_file.write('\n')
                else:
                    json_target_file.write(',\n')

            json_target_file.write('\n]}')

        logger.debug("No. genes to load "+str(count))
        logger.debug("Json written to " + json_target_file_path)
        logger.debug("Load mappings")

        if load_mapping:
            status = cls._load_pathway_mappings(section)
            print(status)
    def get_ensemb_ids(self, entrez_list):
        config = {}
        section = {}
        section['index'] = 'genes_hg38_v0.0.2'
        section['index_type'] = 'gene_history'
        config['GENE_HISTORY'] = section

        result_dict = Gene._entrez_ensembl_lookup(entrez_list, section, config)
        return result_dict
    def _process_bioplex(cls, download_file, stage_output_file, section, config):
        '''Function to process bioplex data files. Interactors are in first two columns, they are converted to
        ensembl ids and stored in temperory.out files
        Input File format:
        GeneA    GeneB    UniprotA    UniprotB    SymbolA    SymbolB    pW    pNI    pInt
        100    728378    P00813    A5A3E0    ADA    POTEF    2.38086E-09    0.000331856    0.999668142
        100    345651    P00813    Q562R1    ADA    ACTBL2    9.79E-18    0.211914437    0.788085563

        Output file format:
        interactorA    interactorB
        ENSG00000196839    ENSG00000196604
        ENSG00000196839    ENSG00000169067
        '''
        stage_output_file_handler = open(stage_output_file, 'w')
        mapped_counter = 0
        unmapped_ids = []
        stage_output_file_handler.write('interactorA' + '\t' + 'interactorB\n')

        gene_sets = []
        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                gene_sets.extend([row['GeneA'], row['GeneB']])
        csvfile.close()

        ens_look_up = Gene._entrez_ensembl_lookup(gene_sets, section, config)

        with open(download_file, encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile, delimiter='\t', quoting=csv.QUOTE_NONE)
            for row in reader:
                interactor_a = row['GeneA']
                interactor_b = row['GeneB']
                if interactor_a in ens_look_up and interactor_b in ens_look_up:
                    line = ens_look_up[interactor_a] + '\t' + ens_look_up[interactor_b] + '\n'
                    stage_output_file_handler.write(line)
                    mapped_counter += 1
                else:
                    line = interactor_a + '\t' + interactor_b + '\n'
                    unmapped_ids.append(interactor_a)
                    unmapped_ids.append(interactor_b)

        logger.debug("\n".join(unmapped_ids))
        logger.debug("Mapped {}  Unmapped {} " . format(mapped_counter, len(unmapped_ids)))

        stage_output_file_handler.close()
        cls._process_interaction_out_file(stage_output_file, section, False)