def _serialize_acr(args): acr_result, work_dir = args out_param_file = \ os.path.join(work_dir, get_pastml_parameter_file(method=acr_result[METHOD], model=acr_result[MODEL] if MODEL in acr_result else None, column=acr_result[CHARACTER])) # Not using DataFrames to speed up document writing with open(out_param_file, 'w+') as f: f.write('parameter\tvalue\n') f.write('pastml_version\t{}\n'.format(PASTML_VERSION)) for name in sorted(acr_result.keys()): if name not in [FREQUENCIES, STATES, MARGINAL_PROBABILITIES]: if NUM_SCENARIOS == name: f.write('{}\t{:g}\n'.format(name, acr_result[name])) else: f.write('{}\t{}\n'.format(name, acr_result[name])) if is_ml(acr_result[METHOD]): for state, freq in zip(acr_result[STATES], acr_result[FREQUENCIES]): f.write('{}\t{}\n'.format(state, freq)) logging.getLogger('pastml').debug('Serialized ACR parameters and statistics for {} to {}.' .format(acr_result[CHARACTER], out_param_file)) if is_marginal(acr_result[METHOD]): out_mp_file = \ os.path.join(work_dir, get_pastml_marginal_prob_file(method=acr_result[METHOD], model=acr_result[MODEL], column=acr_result[CHARACTER])) acr_result[MARGINAL_PROBABILITIES].to_csv(out_mp_file, sep='\t', index_label='node') logging.getLogger('pastml').debug('Serialized marginal probabilities for {} to {}.' .format(acr_result[CHARACTER], out_mp_file))
def detail_full(request, id): analysis = get_object_or_404(Analysis, pk=id) if os.path.exists(analysis.html_compressed): columns = [ column.column for column in Column.objects.filter(analysis=analysis) ] context = { 'id': id, 'full': 1, 'model': analysis.model if is_ml(analysis.prediction_method) else None, 'prediction_method': analysis.prediction_method, 'columns': ', '.join(columns) } if not os.path.exists( analysis.html_compressed.replace( '{}.compressed.html'.format(analysis.id), 'pastml_{}.zip'.format(analysis.id))): context['rec_error'] = True else: context = {} return render( request, 'pastmlapp/layout.html', { 'title': 'Results', 'content': render_to_string( 'pastmlapp/detail.html', request=request, context=context) })
def get_states(method, model, column): df_states = [_ for _ in df[column].unique() if pd.notnull(_) and _ != ''] if not is_ml(method) or model not in {HKY, JTT}: return np.sort(df_states) states = HKY_STATES if HKY == model else JTT_STATES if not set(df_states) & set(states): raise ValueError('The allowed states for model {} are {}, ' 'but your annotation file specifies {} as states in column {}.' .format(model, ', '.join(states), ', '.join(df_states), column)) state_set = set(states) df[column] = df[column].apply(lambda _: _ if _ in state_set else '') return states
def get_pastml_parameter_file(method, model, column): """ Get the filename where the PastML parameters are saved (for non-ML methods and input parameters will be None, as they have no parameters). This file is inside the work_dir that can be specified for the pastml_pipeline method. :param method: str, the ancestral state prediction method used by PASTML. :param model: str, the state evolution model used by PASTML. :param column: str, the column for which ancestral states are reconstructed with PASTML. :return: str, filename or None for non-ML methods """ ml = is_ml(method) template = PASTML_ML_PARAMS_TAB if ml else PASTML_MP_PARAMS_TAB column, method = get_column_method(column, method) return template.format(state=column, method=method, model=model)
def detail(request, id): analysis = get_object_or_404(Analysis, pk=id) if os.path.exists(analysis.html_compressed): columns = [ column.column for column in Column.objects.filter(analysis=analysis) ] context = { 'id': id, 'model': analysis.model if is_ml(analysis.prediction_method) else None, 'prediction_method': analysis.prediction_method, 'columns': ', '.join(columns) } itol_id_file = os.path.join(os.path.dirname(analysis.html_compressed), 'pastml_{}_itol.txt'.format(id)) if os.path.exists(itol_id_file): with open(itol_id_file, 'r') as f: context['itol'] = f.readline().strip('\n') if os.path.exists( analysis.html_compressed.replace('.compressed.html', '.full.html')): context['other_html'] = 1 if not os.path.exists( analysis.html_compressed.replace( '{}.compressed.html'.format(analysis.id), 'pastml_{}.zip'.format(analysis.id))): context['rec_error'] = True else: context = {} return render( request, 'pastmlapp/layout.html', { 'title': 'Results', 'content': render_to_string( 'pastmlapp/detail.html', request=request, context=context) })
def clean_prediction_method(self): m = self.cleaned_data['prediction_method'] if not is_ml(m): self.fields['model'].required = False return m
def reconstruct_ancestral_states(tree, character, states, prediction_method=MPPA, model=F81, params=None, avg_br_len=None, num_nodes=None, num_tips=None, force_joint=True): """ Reconstructs ancestral states for the given character on the given tree. :param character: character whose ancestral states are to be reconstructed. :type character: str :param tree: tree whose ancestral state are to be reconstructed, annotated with the feature specified as `character` containing node states when known. :type tree: ete3.Tree :param states: possible character states. :type states: numpy.array :param avg_br_len: (optional) average non-zero branch length for this tree. If not specified, will be calculated. :type avg_br_len: float :param model: (optional, default is F81) state evolution model to be used by PASTML. :type model: str :param prediction_method: (optional, default is MPPA) ancestral state prediction method to be used by PASTML. :type prediction_method: str :param num_nodes: (optional) total number of nodes in the given tree (including tips). If not specified, will be calculated. :type num_nodes: int :param num_tips: (optional) total number of tips in the given tree. If not specified, will be calculated. :type num_tips: int :param params: an optional way to fix some parameters, must be in a form {param: value}, where param can be a state (then the value should specify its frequency between 0 and 1), or "scaling factor" (then the value should be the scaling factor for three branches, e.g. set to 1 to keep the original branches). Could also be in a form path_to_param_file. Only makes sense for ML methods. :type params: dict or str :return: ACR result dictionary whose values depend on the prediction method. :rtype: dict """ logging.getLogger('pastml').debug('ACR settings for {}:\n\tMethod:\t{}{}.' .format(character, prediction_method, '\n\tModel:\t{}'.format(model) if model and is_ml(prediction_method) else '')) if COPY == prediction_method: return {CHARACTER: character, STATES: states, METHOD: prediction_method} if not num_nodes: num_nodes = sum(1 for _ in tree.traverse()) if not num_tips: num_tips = len(tree) if is_ml(prediction_method): if avg_br_len is None: avg_br_len = np.mean(n.dist for n in tree.traverse() if n.dist) freqs, sf, kappa = None, None, None if params is not None: freqs, sf, kappa = _parse_pastml_parameters(params, states) return ml_acr(tree=tree, character=character, prediction_method=prediction_method, model=model, states=states, avg_br_len=avg_br_len, num_nodes=num_nodes, num_tips=num_tips, freqs=freqs, sf=sf, kappa=kappa, force_joint=force_joint) if is_parsimonious(prediction_method): return parsimonious_acr(tree, character, prediction_method, states, num_nodes, num_tips) raise ValueError('Method {} is unknown, should be one of ML ({}), one of MP ({}) or {}' .format(prediction_method, ', '.join(ML_METHODS), ', '.join(MP_METHODS), COPY))
def send_analysis_email(email, url, id, title, columns, model, prediction_method, itol_id, error=None): """sends an email when PastML analysis is finished""" logger.info("Sent analysis is ready email") from django.core.mail import EmailMessage from pastml.ml import is_ml result_url = 'http://{}{}'.format(url, reverse('pastmlapp:detail', args=(id, ))) help_url = 'http://{}{}'.format(url, reverse('pastmlapp:help')) feedback_url = 'http://{}{}'.format(url, reverse('pastmlapp:feedback')) itol_url = 'http://itol.embl.de/external.cgi?tree={}'.format( itol_id) if itol_id else None if not error: body = """Dear PastML user, You PastML ancestral scenario reconstruction is now ready and available at {url}. {itol} We reconstructed ancestral characters with {method} for {columns}. If you want to know more about PastML ancestral character reconstruction and visualisation algorithms please have a look at our help page: {help}, or check out the article: Ishikawa SA, Zhukova A, Iwasaki W, Gascuel O, A Fast Likelihood Method to Reconstruct and Visualize Ancestral Scenarios, Molecular Biology and Evolution, msz131 (https://doi.org/10.1093/molbev/msz131). If you have experienced any problem or have suggestions on how to improve PastML, please contact us via the feedback form ({feedback}) or send an email to [email protected]. Kind regards, PastML team -- Evolutionary Bioinformatics Unit Department of Computational Biology, Institut Pasteur Paris, France """.format(url=result_url, help=help_url, feedback=feedback_url, columns=', '.join(columns), method='{} (model {})'.format(prediction_method, model) if is_ml(prediction_method) else prediction_method, itol= 'The full tree visualisation is also available at iTOL: {} (do not forget to turn on the annotations on top right).' .format(itol_url) if itol_url else '') else: body = """Dear PastML user, Unfortunately we did not manage to reconstruct the ancestral scenario for your data (see {url}{itol}). We tried to perform ancestral character reconstruction with {method} for {columns}, but got the following error: "{error}" Please make sure that your input data was correctly formatted (see our help page: {help} for input data format). On our side, we were informed about this problem and are trying to fix it. If you wish to give us any additional details, please contact us via the feedback form ({feedback}) or send an email to [email protected]. Kind regards, PastML team -- Evolutionary Bioinformatics Unit Department of Computational Biology, Institut Pasteur Paris, France """.format(url=result_url, help=help_url, feedback=feedback_url, columns=', '.join(columns), method='{} (model {})'.format(prediction_method, model) if is_ml(prediction_method) else prediction_method, error=error, itol=', {}'.format(itol_url) if itol_url else '') email = EmailMessage( subject='Your PastML analysis is ready' if not title else title, body=body, to=(email, ), attachments=None, headers=None, cc=None, bcc=('*****@*****.**', ) if error else None) return email.send(fail_silently=False)