def CreateMetadata(pd_seq_list,metadata_destination,extract_all_samples,pd_envoi_qenome_quebec,pd_sgil_extract):
    
    MySQLcovid19.SetConnection()
    seq_list = BuildSeqList(pd_seq_list)
    pd_metadata = MySQLcovid19Selector.GetMetadataAsPdDataFrame(MySQLcovid19.GetConnection(),seq_list,metadata_destination,extract_all_samples)
    
 
    pd_metadata['sample'] = pd_metadata['sample'].str.replace('LSPQ-','')
    pd_missing_spec = CheckMissingSpec(pd_metadata,seq_list)

    if (pd_envoi_qenome_quebec is not None) and (pd_sgil_extract is not None):
        pd_missing_get_from_sgil_extract = AddMissingFromSgilExtract(pd_missing_spec,pd_sgil_extract,pd_metadata.columns)
        print(pd_missing_get_from_sgil_extract)
        pd_metadata = pd.concat([pd_metadata,pd_missing_get_from_sgil_extract])
        pd_metadata['sample'] = pd_metadata['sample'].str.replace('LSPQ-','')
        pd_missing_spec = CheckMissingSpec(pd_metadata,seq_list)
        pd_missing_get_from_EnvoisGenomeQuebec = AddMissingFromEnvoisGenomeQuebec(pd_missing_spec,pd_envoi_qenome_quebec,pd_metadata.columns)
        pd_metadata = pd.concat([pd_metadata,pd_missing_get_from_EnvoisGenomeQuebec])
        pd_metadata['sample'] = pd_metadata['sample'].str.replace('LSPQ-','')
        pd_missing_spec = CheckMissingSpec(pd_metadata,seq_list)

    pd_metadata['sample_date'] = pd.to_datetime(pd_metadata.sample_date)
    pd_metadata['sample_date'] = pd_metadata['sample_date'].dt.strftime('%Y-%m-%d')

    pd_metadata = pd_metadata.drop_duplicates(subset='sample',keep='first')
    #print(pd_metadata)
    return([pd_metadata,pd_missing_spec])
def Main():

    MySQLcovid19.SetConnection()

    global basedir
    basedir = PlateDirManager.GetBaseDir(_DEBUG)

    plate_manager = PlateManager()

    for plate in os.listdir(basedir):
        plate_manager.AddPlate(plate)

    BuildSeqReports(plate_manager)
def GetQcDataframeFromDSPdb(id_list):
    MySQLcovid19.SetConnection()
    pd_df = MySQLcovid19Selector.GetMetadataAsPdDataFrame(
        MySQLcovid19.GetConnection(), id_list)
    return (pd_df)
 def GetSampleDate(self):
     sample_date = MySQLcovid19Selector.GetSampleDate(
         MySQLcovid19.GetCursor(), self.sample.GetSampleName())
     return (sample_date)
def CreateMetadataForAllDspDbSamples(metadata_destination):
    MySQLcovid19.SetConnection()
    pd_metadata = MySQLcovid19Selector.GetMetadataAsPdDataFrame(MySQLcovid19.GetConnection(),[""],metadata_destination,True)

    return(pd_metadata)
def GetMetadataDfFromCovBank(id_list):
    MySQLcovid19.SetConnection()
    pd_df = MySQLcovid19Selector.GetMetadataAsPdDataFrame(
        MySQLcovid19.GetConnection(), id_list)
    return (pd_df)
    def CreateMetadata(self, max_sample_date, tolerated_rej_samples):
        MySQLcovid19.SetConnection()

        year_2020 = datetime.datetime.strptime("2020", "%Y")

        sgil_corrected_samples_list = list(
            map(self.GetSGILfoldernoFromOrdno, self.samples_list))
        upper_corrected_samples_list = list(
            map(self.GetUpperCorrectedSamplesList, self.samples_list))
        zip_list = list(zip(upper_corrected_samples_list, self.samples_list))
        zip_list.sort()
        self.mysqlId_to_belugalistId = dict(zip_list)
        self.samples_list = [x[1] for x in zip_list]

        self.pd_metadata = MySQLcovid19Selector.GetMetadataAsPdDataFrame(
            MySQLcovid19.GetConnection(), upper_corrected_samples_list, 'LSPQ',
            False)
        #self.pd_metadata['sample'] = self.pd_metadata['sample'].str.replace('LSPQ-','') pas necessaire

        self.pd_metadata['sample'] = self.pd_metadata['sample'].str.strip(' ')
        self.pd_metadata['temp'] = self.pd_metadata['sample'].apply(
            self.GetBelugaIdFromMySQLid)
        self.pd_metadata['sample'] = self.pd_metadata['temp']
        self.pd_metadata = self.pd_metadata.drop(columns=['temp'])

        pd_missing_samples = self.CheckMissingSpec(self.pd_metadata,
                                                   self.samples_list)

        pd_missing_get_from_sgil_extract = self.AddMissingFromSgilExtract(
            pd_missing_samples, self.pd_sgil_extract, self.pd_metadata.columns)
        self.pd_metadata = pd.concat(
            [self.pd_metadata, pd_missing_get_from_sgil_extract])

        #self.pd_metadata.to_csv("/home/[email protected]/temp/20201111/test.tsv",sep="\t",index=False)
        #self.pd_metadata['sample'] = self.pd_metadata['sample'].str.replace('LSPQ-','')
        self.pd_metadata['sample'] = self.pd_metadata['sample'].str.strip(' ')
        pd_missing_samples = self.CheckMissingSpec(self.pd_metadata,
                                                   self.samples_list)

        pd_missing_get_from_EnvoisGenomeQuebec = self.AddMissingFromEnvoisGenomeQuebec(
            pd_missing_samples, self.pd_envoi_qenome_quebec,
            self.pd_metadata.columns)
        self.pd_metadata = pd.concat(
            [self.pd_metadata, pd_missing_get_from_EnvoisGenomeQuebec])

        #self.pd_metadata['sample'] = self.pd_metadata['sample'].str.replace('LSPQ-','') pas necessaire
        self.pd_metadata['sample'] = self.pd_metadata['sample'].str.strip(' ')
        self.pd_missing_samples = self.CheckMissingSpec(
            self.pd_metadata, self.samples_list)

        self.pd_metadata['sample_date'] = pd.to_datetime(
            self.pd_metadata.sample_date)
        self.pd_metadata['sample_date'] = self.pd_metadata[
            'sample_date'].dt.strftime('%Y-%m-%d')

        self.pd_metadata = self.pd_metadata.drop_duplicates(subset='sample',
                                                            keep='first')

        self.pd_metadata.reset_index(drop=True, inplace=True)

        self.pd_samples_missing_rss = self.pd_metadata.loc[
            self.pd_metadata['rss'] == 'INDETERMINE', ['sample']]
        self.pd_metadata = self.pd_metadata.loc[
            self.pd_metadata['rss'] != 'INDETERMINE', :]
        #print(self.pd_metadata.index)
        #print(self.pd_metadata[self.pd_metadata.index.duplicated()])
        self.pd_metadata.loc[self.pd_metadata['sample'].str.contains('HGA-'),
                             'sample'] = self.pd_metadata['sample'] + '2D'
        self.pd_metadata = self.pd_metadata.sort_values(by=['sample'])

        self.pd_metadata['sample_date'] = self.pd_metadata[
            'sample_date'].astype('datetime64[ns]')
        #print(self.pd_metadata.dtypes)

        #self.pd_metadata = self.pd_metadata.loc[(self.pd_metadata['sample_date'] <= max_sample_date) & (self.pd_metadata['sample_date'] >= year_2020 ),:]
        self.pd_metadata = self.pd_metadata.loc[
            (self.pd_metadata['sample_date'] >= min_sample_date) &
            (self.pd_metadata['sample_date'] <= max_sample_date) &
            (self.pd_metadata['sample_date'] >= year_2020), :]
        #print(self.pd_metadata)
        #print(self.pd_samples_missing_rss)

        self.pd_metadata.loc[(self.pd_metadata['OUTBREAK'].isnull()) |
                             (self.pd_metadata['OUTBREAK'] == 'NA'),
                             ['OUTBREAK']] = 'NoOutbreakRelated'

        if _outbreak_:
            self.pd_metadata = self.pd_metadata.loc[~(
                (self.pd_metadata['OUTBREAK'] == 'NoOutbreakRelated') &
                (self.pd_metadata['sample'].isin(tolerated_rej_samples))), :]