Exemplo n.º 1
0
    def post_dump(self, *args, **kwargs):
        gunzipall(self.new_data_folder)
        self.logger.info("Merging files")
        FREQ = os.path.join(self.new_data_folder,"meddra_freq.tsv")
        ALL_SE = os.path.join(self.new_data_folder,"meddra_all_se.tsv")
        ALL_INDICATIONS = os.path.join(self.new_data_folder,"meddra_all_indications.tsv")
        MERGED = os.path.join(self.new_data_folder,"merged_freq_all_se_indications.tsv")
        #merge first two files- side effect and side effect with frequency
        #add header to csv files
        df1 = pd.read_csv(FREQ, delimiter='\t')
        df1.columns = ['stitch_id(flat)','stitch_id(stereo)','umls_id(label)','is_placebo',
                'desc_type','lower','upper','meddra_type','umls_id(meddra)','se_name']
        df2 = pd.read_csv(ALL_SE, delimiter='\t')
        df2.columns = ['stitch_id(flat)','stitch_id(stereo)','umls_id(label)','meddra_type',
                'umls_id(meddra)','se_name']
        s1 = pd.merge(df1, df2, how='outer',on=['stitch_id(flat)','stitch_id(stereo)','umls_id(label)','meddra_type','umls_id(meddra)','se_name'])

        #merge above merged file with indication file
        df4 = pd.read_csv(ALL_INDICATIONS,delimiter='\t')
        df4.columns =['stitch_id(flat)','umls_id(label)','method_of_detection','concept_name',
                'meddra_type','umls_id(meddra)','concept_name(meddra)']
        s2 = pd.merge(s1,df4,how='outer',on=['stitch_id(flat)','umls_id(label)','meddra_type','umls_id(meddra)'])
        s3 = s2.sort('stitch_id(flat)')
        s3.to_csv(MERGED)
        self.logger.info("Files successfully merged, ready to be uploaded")
Exemplo n.º 2
0
 def post_dump(self, *args, **kwargs):
     self.logger.info("Unzipping files in '%s'" % self.new_data_folder)
     gunzipall(self.new_data_folder)
     input_file = os.path.join(self.new_data_folder, "mvi_ca")
     self.logger.info("Split file in chunks")
     subprocess.check_call([
         "split", "-l",
         "%s" % self.__class__.CHUNK_SIZE, input_file,
         "%s.split." % input_file
     ])
Exemplo n.º 3
0
    def post_dump(self, *args, **kwargs):
        gunzipall(self.new_data_folder)
        self.logger.info("Merging files")
        FREQ = os.path.join(self.new_data_folder, "meddra_freq.tsv")
        ALL_SE = os.path.join(self.new_data_folder, "meddra_all_se.tsv")
        ALL_INDICATIONS = os.path.join(self.new_data_folder,
                                       "meddra_all_indications.tsv")
        MERGED = os.path.join(self.new_data_folder,
                              "merged_freq_all_se_indications.tsv")
        #merge first two files- side effect and side effect with frequency
        #add header to csv files
        df1 = pd.read_csv(FREQ, delimiter='\t')
        df1.columns = [
            'stitch_id(flat)', 'stitch_id(stereo)', 'umls_id(label)',
            'is_placebo', 'desc_type', 'lower', 'upper', 'meddra_type',
            'umls_id(meddra)', 'se_name'
        ]
        df2 = pd.read_csv(ALL_SE, delimiter='\t')
        df2.columns = [
            'stitch_id(flat)', 'stitch_id(stereo)', 'umls_id(label)',
            'meddra_type', 'umls_id(meddra)', 'se_name'
        ]
        s1 = pd.merge(df1,
                      df2,
                      how='outer',
                      on=[
                          'stitch_id(flat)', 'stitch_id(stereo)',
                          'umls_id(label)', 'meddra_type', 'umls_id(meddra)',
                          'se_name'
                      ])

        #merge above merged file with indication file
        df4 = pd.read_csv(ALL_INDICATIONS, delimiter='\t')
        df4.columns = [
            'stitch_id(flat)', 'umls_id(label)', 'method_of_detection',
            'concept_name', 'meddra_type', 'umls_id(meddra)',
            'concept_name(meddra)'
        ]
        s2 = pd.merge(s1,
                      df4,
                      how='outer',
                      on=[
                          'stitch_id(flat)', 'umls_id(label)', 'meddra_type',
                          'umls_id(meddra)'
                      ])
        s3 = s2.sort_values('stitch_id(flat)')
        s3.to_csv(MERGED)
        self.logger.info("Files successfully merged, ready to be uploaded")
Exemplo n.º 4
0
 def post_dump(self):
     self.logger.info("Uncompressing files in '%s'" % self.new_data_folder) 
     gunzipall(self.new_data_folder)
Exemplo n.º 5
0
 def post_dump(self):
     gunzipall(self.new_data_folder)
Exemplo n.º 6
0
 def post_dump(self, *args, **kwargs):
     gunzipall(self.new_data_folder)
Exemplo n.º 7
0
 def post_dump(self):
     gunzipall(self.new_data_folder)