示例#1
0
 def glue_metric(self,
                 glue_ref: str,
                 metric: str,
                 run: mlflow.entities.run.Run = None):
     """Glue the parameter 'metric' to the glue reference name 'glue_ref'
     By default for the configured latest run, but you are free to supply any Run object."""
     if run is None: run = self.latest_run
     glue(glue_ref, run.data.metrics[metric], display=False)
示例#2
0
 def glue_all_metrics(self,
                      glue_ref: str,
                      transpose: bool = False,
                      run: mlflow.entities.run.Run = None):
     """Glue all metrics as a dataframe table. Transpose option.
     By default for the configured latest run, but you are free to supply any Run object."""
     if run is None: run = self.latest_run
     metrics_df = self.get_metrics_as_df(run)
     if transpose: metrics_df = metrics_df.transpose()
     glue(glue_ref, metrics_df, display=False)
示例#3
0
def glue_image_by_uri(glue_name, image_uri):
    """Glue image by absolute URI

    Args:
        glue_name: string
        image_uri: absolute path of the image file.

    Returns:
        -
    """
    im = Image.open(image_uri)
    glue(glue_name, im, display=False)
示例#4
0
    def glue_model_reference_metadata(self,
                                      glue_ref: str,
                                      run: mlflow.entities.run.Run = None):
        """Glue commit hash, run id and the run's end time as a dictionary.
        By default for the configured latest run, but you are free to supply any Run object.

        Args:
            run: any MLFlow Run object.
            glue_ref: name by which to refer to the glued object.

        Returns:
            -
        """
        if run is None: run = self.latest_run
        internal_meta_data = dict(
            commit_hash=run.data.tags.get('mlflow.source.git.commit', 'N/A'),
            run_id=run.info.run_id,
            run_end_time_local=time.strftime(
                '%Y-%m-%d %H:%M:%S',
                time.localtime(run.info.end_time / 1000.)),
        )
        if glue_ref is None: glue_ref = 'model_ref_metadata'
        glue(glue_ref, internal_meta_data, display=False)
# [section-9]
from scipy.spatial.transform import Rotation

R = Rotation.from_euler("ZXZ", [-omega, -i, -Omega])
r_rot = r_w @ R.as_matrix()
v_rot = v_w @ R.as_matrix()

# [section-10]
try:
    import warnings

    warnings.simplefilter("ignore")
    from myst_nb import glue

    glue("orbital-elements-radius", r, display=False)
    glue("orbital-elements-velocity", v, display=False)
    glue("orbital-elements-v_r", v_r, display=False)
    glue("orbital-elements-v_p", v_p, display=False)
    glue("orbital-elements-h_vec-I", h_vec[0], display=False)
    glue("orbital-elements-h_vec-J", h_vec[1], display=False)
    glue("orbital-elements-h_vec-K", abs(h_vec[2]), display=False)
    glue("orbital-elements-h", h, display=False)
    glue("orbital-elements-i", np.degrees(i), display=False)
    glue("orbital-elements-N_Y", N_vec[1], display=False)
    glue("orbital-elements-raan", np.degrees(Omega), display=False)
    glue("orbital-elements-e", e, display=False)
    glue("orbital-elements-e_Z", e_vec[2], display=False)
    glue("orbital-elements-aop", np.degrees(omega), display=False)
    glue("orbital-elements-true-anomaly", np.degrees(nu), display=False)
    glue("orbital-elements-r_w-I", r_w[0], display=False)
示例#6
0
- **`NOT`**, produce una salida que es de valor opuesto a su entrada. Por lo tanto, representa la negación de una entrada.

Las posibles entradas y valores de salidas de las operaciones booleandas son presentados en la siguiente tabla.

from myst_nb import glue

import pandas as pd
booleanas = pd.DataFrame({
    'P': [False, False, True, True,], 
    'Q': [False, True, False, True]
    })
booleanas['P AND Q'] = booleanas['P'] & booleanas['Q']
booleanas['P OR Q'] = booleanas['P'] | booleanas['Q']
booleanas['P XOR Q'] = booleanas['P'] != booleanas['Q']
booleanas['NOT P'] = ~ booleanas['P']
glue('tab-booleanas', booleanas.style.hide_index())

```{glue:figure} tab-booleanas
:figwidth: 370px
:align: center
Entradas y salidas de operaciones booleanas.
```

## Notación hexadecimal

Las actividades internas de un computador operan con patrones de _bits_, algunos de los cuales puedes contener varios dígitos, por ejemplo, `1011 0100 1000`. Debido a la complejidad en la comprensión de estas cadenas de dígitos, su representación se simplifica por medio de la notación hexadecimal.

La notación hexadecimal utiliza un símbolo para representar un patrón de cuatro _bits_. Así por ejemplo, una cadena de doce _bits_ puede ser representada por tres símbolos hexadecimales. En la siguiente tabla, la columna de la izquierda reproduce todos los patrones de _bits_ posibles de longitud cuatro; la columna de la derecha muestra el símbolo utilizado en notación hexadecimal para representar el patrón de _bits_ a su izquierda.

from src.data_convert import dec2hex, dec2bin
示例#7
0
plt.xkcd()
fig = plt.figure(figsize=(10,4))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2,sharey=ax1)
ax1.plot(xx, norm.pdf(xx,scale=1), 'k')
ax1.set_title('Normal Distribution', size=25)

ax2.plot(xx, norm.pdf(xx,scale=1), 'k')
ax2.scatter(-0.4, 0.28, s=300, linewidth=2.5, facecolors='none', edgecolors='k')
ax2.scatter(0.4, 0.28, s=300, linewidth=2.5, facecolors='none', edgecolors='k')
ax2.plot(xx,-0.02*np.cos(3*xx), 'k')
ax2.set_title('Paranormal Distribution', size=25)
plt.show()

from myst_nb import glue
glue("paranormal", fig, display=False)

````{toggle} Normal vs Paranormal
```{glue:} paranormal
```
````


````{toggle}
```
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

xx = np.linspace(-2.75,2.75,100)
plt.xkcd()
示例#8
0
from myst_nb import glue
fid = open('./data/binary-msg.txt', 'r')
msg = fid.readlines()[0]
fid.close()
glue('msg-binary', msg)

# Ejercicios

1. ¿Cuál es el valor del _bit_ menos significativo en los patrones de _bits_ representados por las siguientes notaciones hexadecimales?
    - 9A
    - 1B
    - 6E

1. ¿Qué mejora se obtiene aumentando la velocidad de rotación de un disco o CD?

1. ¿Qué factores permiten que todos los discos {term}`CD`, {term}`DVD` y Blu-ray sean leídos por una misma unidad? 

1. ¿Qué ventajas proporcionan las memoriasen comparación con los otros sistemas de almacenamiento masivo?

1. ¿Qué características de las unidades de {term}`HDD` hacen que sigan siendo competitivas?

1. El siguiente código muestra un mensaje codificado en {term}`ASCII` utilizando 8 _bits_ por símbolo. Interprete lo que dice.
    {glue:text}`msg-binary`

1. Codifique las siguientes sentencias en {term}`ASCII`.
    - "Espera!" semáforo en rojo.
    - 12 + 21 = 33?

1. Convertir las siguientes representaciones binarias a su equivalente en base 10.
    - 101010
    - 100001
示例#9
0
    for j_col, name in enumerate(row_names):
        file_ = meta_results[name]
        display = plotting.plot_stat_map(
            file_,
            annotate=False,
            axes=axes[i_row, j_col],
            cmap="RdBu_r",
            cut_coords=[5, -15, 10],
            draw_cross=False,
            figure=fig,
        )
        axes[i_row, j_col].set_title(name)

        colorbar = display._cbar
        colorbar_ticks = colorbar.get_ticks()
        if colorbar_ticks[0] < 0:
            new_ticks = [colorbar_ticks[0], 0, colorbar_ticks[-1]]
        else:
            new_ticks = [colorbar_ticks[0], colorbar_ticks[-1]]
        colorbar.set_ticks(new_ticks, update_ticks=True)

glue("figure_uncorr_ibma", fig, display=False)

# ```{glue:figure} figure_uncorr_ibma
# :name: figure_uncorr_ibma
# :align: center
#
# An array of plots of the statistical maps produced by the image-based meta-analysis methods.
# The likelihood-based meta-analyses are run on atlases instead of voxelwise.
# ```
Habiendo revisado técnicas para almacenar _bits_, ahora se revisan las formas para codificar información como patrones de _bits_. Se presentan algunos métodos populares para codificar texto, datos numéricos e imágenes.

## Texto

El texto normalmente se representa mediante un código en el que a cada uno de los diferentes símbolos (e.g., letras del alfabeto, símbolos, numeros) se le asigna un patrón de _bits_ único. El texto se representa como una larga cadena de _bits_ en la que los patrones sucesivos representan los símbolos sucesivos en el texto original.

import pandas as pd
from src.data_convert import word2charlist, dec2bin
from myst_nb import glue

word = word2charlist('Hola!')
table = pd.DataFrame({'Codificación ASCII': word})
for i in range(len(table)):
    decimal = ord(table.loc[i,'Codificación ASCII'])
    table.loc[i, 'Patrón binario'] = dec2bin(decimal, 8)
glue('tab_hola_ascii', table.style.hide_index())

```{glue:figure} tab_hola_ascii
:name: tab-hola_ascii
:align: left

Palabra "Hola!" codificada en {term}`ASCII`.
``` 

Entre los años 1940 y 1950, muchos de estos códigos fueron diseñados y utilizados en conexión con diferentes equipos, produciendo la correspondiente proliferación de problemas de comunicación. Para aliviar esta situación, el {term}`ANSI` adoptó el {term}`ASCII`. Este código ([ver código](https://www.ascii-code.com/)) utiliza patrones de siete _bits_ para representar las letras mayúsculas y minúsculas del alfabeto inglés, los símbolos de puntuación, los dígitos del 0 al 9 y cierta información de control, como los avances de línea y los retornos de carro. Este código se extiende a un formato de ocho _bits_ por símbolo agregando un 0 en el extremo más significativo de cada uno de los patrones de siete _bits_. Esta técnica no solo produce un código en el que cada patrón encaja convenientemente en una celda de memoria típica de tamaño de un _byte_, sino que también proporciona 128 patrones de _bits_ adicionales (obtenidos al asignar al _bit_ adicional el valor 1) que se pueden usar para representar símbolos más allá de el alfabeto inglés y su puntuación.

La [ISO](https://www.iso.org/) ha desarrollado una serie de extensiones de {term}`ASCII`, cada una fue diseñada para adaptar a un grupo principal de idiomas. Por ejemplo, para incluir símbolos necesarios para expresar el texto de la mayoría de los idiomas. Sin embargo, surgieron dos inconvenientes. El primero, el número de patrones de _bits_ adicionales disponibles en {term}`ASCII` extendido es insuficiente para adaptar el alfabeto de muchos idiomas (e.g., asiáticos, Europa oriental). El segundo, debido a que un documento dado estaba limitado a usar símbolos en el único estándar seleccionado, los documentos que contenían texto de  grupos de idiomas dispares no pudieron ser soportados. Para abordar esta deficiencia, [Unicode](https://home.unicode.org/) se desarrolló a través de la cooperación de varios de los principales fabricantes de hardware y software {cite}`vacca_unicode_1991`. Este código utiliza un patrón único de hasta 21 _bits_ para representar cada símbolo. Cuando el juego de caracteres Unicode se combina con el estándar de codificación UTF-8, los caracteres {term}`ASCII` originales todavía se pueden representar con 8 _bits_, mientras que los miles de caracteres adicionales de idiomas como chino, japonés y hebreo se puede representar con 16 _bits_. Más allá de los caracteres necesarios para todos los idiomas de uso común en el mundo, UTF-8 utiliza patrones de 24 o 32 _bits_ para representar símbolos Unicode más oscuros, lo que deja un amplio espacio para futuras expansiones.

```{note}
UTF-8 es un formato de codificación de caracteres Unicode e [ISO 10646](https://www.iso.org/standard/69119.html) que utiliza símbolos de longitud variable. Divide los caracteres Unicode en varios grupos, en función del número de _bytes_ necesarios para codificarlos. El número de _bytes_ depende exclusivamente del código de carácter asignado por Unicode y del número de _bytes_ necesario para representarlo.
```
示例#11
0
df.head()

# We drew some scatterplots to help us examine the relationship between the amount of sleep I get, and my grumpiness the following day.

# In[2]:

from myst_nb import glue
import seaborn as sns
sns.set_context("notebook", font_scale=1.5)
ax = sns.scatterplot(data=df, x='dan_sleep', y='dan_grump')
ax.set(title='Grumpiness and sleep',
       ylabel='My grumpiness (0-100)',
       xlabel='My sleep (hours)')
sns.despine()

glue("sleepycorrelation_fig", ax, display=False)

#  ```{glue:figure} sleepycorrelation-fig
# :figwidth: 600px
# :name: fig-sleepycorrelation
#
# Scatterplot showing grumpiness as a function of hours slept.
#
# ```
#

# The actual scatterplot that we draw is the one shown in {numref}`fig-sleepycorrelation`, and as we saw previously this corresponds to a correlation of $r=-.90$, but what we find ourselves secretly imagining is something that looks closer to the left panel in {numref}`fig-sleep_regressions_1`. That is, we mentally draw a straight line through the middle of the data. In statistics, this line that we're drawing is called a **_regression line_**. Notice that -- since we're not idiots -- the regression line goes through the middle of the data. We don't find ourselves imagining anything like the rather silly plot shown in the right panel in {numref}`fig-sleep_regressions_1`.

# In[3]:

import numpy as np
示例#12
0
def embed_py(path, id_=None):
    content = read_text(path, pre_sty='```python\n', post_sty='\n```')
    if id_ is None:
        glue(str(path), content)
    else:
        glue(id_, content)
示例#13
0
# In[4]:

cogatlas = extract.download_cognitive_atlas(data_dir=data_path,
                                            overwrite=False)
id_df = pd.read_csv(cogatlas["ids"])
rel_df = pd.read_csv(cogatlas["relationships"])

cogat_counts_df, rep_text_df = annotate.cogat.extract_cogat(
    neurosynth_dset_first_500.texts, id_df, text_column="abstract")

# In[5]:

example_forms = id_df.loc[id_df["name"] == "dot motion task"][[
    "id", "name", "alias"
]]
glue("table_cogat_forms", example_forms)

# ```{glue:figure} table_cogat_forms
# :name: "tbl:table_cogat_forms"
# :align: center
#
# An example of alternate forms characterized by the Cognitive Atlas and extrapolated by NiMARE.
# Certain alternate forms (i.e., synonyms) are specified within the Cognitive Atlas, while others are inferred automatically by NiMARE according to certain rules (e.g., removing parentheses).
# ```

# In[6]:

# Define a weighting scheme.
# In this scheme, observed terms will also count toward any hypernyms (isKindOf),
# holonyms (isPartOf), and parent categories (inCategory) as well.
weights = {"isKindOf": 1, "isPartOf": 1, "inCategory": 1}
示例#14
0
import pandas as pd
import geopandas as gpd
from myst_nb import glue

data_path = "./../../data/eia"

# %%
gf = a.GenFuel(loc=data_path)

# %%
gf.df.head()

# %%
start_year = gf.df.query("quantity > 0").year.min()
end_year = gf.df.query("quantity > 0").year.max()
glue("start_year", start_year)
glue("end_year", end_year)

subtitle = (
    f"{start_year}-{end_year}, US EIA https://www.eia.gov/electricity/data/eia923/"
)

# %% [markdown]
"""
## Largest NYC plants (by generation)
"""

# %%
gf.df_nyc.head()

# %%
示例#15
0
    amygdala_mask,
    annotate=False,
    draw_cross=False,
    axes=axes[0],
    figure=fig,
)
axes[0].set_title("Amygdala ROI")
display = plotting.plot_roi(
    sphere_img,
    annotate=False,
    draw_cross=False,
    axes=axes[1],
    figure=fig,
)
axes[1].set_title("Spherical ROI")
glue("figure_macm_rois", fig, display=False)

# ```{glue:figure} figure_macm_rois
# :name: figure_macm_rois
# :align: center
#
# Region of interest masks for (1) a target mask-based MACM and (2) a coordinate-based MACM.
# ```

# Once the `Dataset` has been reduced to studies with coordinates within the mask or sphere requested, any of the supported CBMA Estimators can be run.

# In[5]:

from nimare import meta

meta_amyg = meta.cbma.ale.ALE(kernel__sample_size=20)
示例#16
0
#
# When decoding unthresholded statistical maps (such as {numref}`figure_map_to_decode`), the most common approaches are to simply correlate the input map with maps from the database, or to compute the dot product between the two maps.
# In Neurosynth, meta-analyses are performed for each label (i.e., term or topic) in the database and then the input image is correlated with the resulting unthresholded statistical map from each meta-analysis.
# Performing statistical inference on the resulting correlations is not straightforward, however, as voxels display strong spatial correlations, and the true degrees of freedom are consequently unknown (and likely far smaller than the nominal number of voxels).
# In order to interpret the results of this decoding approach, users typically select some arbitrary number of top correlation coefficients ahead of time, and use the associated labels to describe the input map.
# However, such results should be interpreted with great caution.

# In[3]:

fig, ax = plt.subplots(figsize=(10, 6))
plotting.plot_stat_map(continuous_map,
                       axes=ax,
                       figure=fig,
                       annotate=False,
                       draw_cross=False)
glue("figure_map_to_decode", fig, display=False)

# ```{glue:figure} figure_map_to_decode
# :name: figure_map_to_decode
# :align: center
#
# The unthresholded statistical map that will be used for continuous decoding.
# ```
#
# This approach can also be applied to an image-based database like NeuroVault, either by correlating input data with meta-analyzed statistical maps, or by deriving distributions of correlation coefficients by grouping statistical maps in the database according to label.
# Using these distributions, it is possible to statistically compare labels in order to assess label significance.
# NiMARE includes methods for both correlation-based decoding and correlation distribution-based decoding, although the correlation-based decoding is better established and should be preferred over the correlation distribution-based decoding.
# As such, we will only show the {py:class}`~nimare.decode.continuous.CorrelationDecoder` here.
#
# ```{important}
# {py:class}`~nimare.decode.continuous.CorrelationDecoder` currently runs _very_ slowly.
示例#17
0
# from myst_nb import glue
# my_variable = "here is some text!"
# glue("glued_text", my_variable)
# ```
#
# Here is an example of how to glue text: {glue:}`glued_text`
# ``````
#
# **Result**:

# In[2]:

from myst_nb import glue

my_variable = "here is some text!"
glue("glued_text", my_variable)

# Here is an example of how to glue text: {glue:}`glued_text`
#
# See {ref}`glue/gluing` for more information.
#
# ### Gluing numbers
#
# **Example**:
#
# ``````md
# ```{code-cell} ipython3
# from myst_nb import glue
# import numpy as np
# import pandas as pd
#
示例#18
0
**The priors might vary between the target population and the set available.** In this experiment, we model that 5% of the kids in $\mathcal{U}$ have ASD. In our sample $(X,Y)$ on the other hand, we simulated a larger proportion of ASD children: 20%. This means that for the data we have available, we have four times more chances of sampling an ASD child if we select a child at random. Overrepresenting the minority class when empirical data is collected is a common and reasonable practice. We want to learn key features that represent both classes, and we need a considerable number of samples from both categories. Sampling at random from $\mathcal{U}$ would be very inefficient in terms of getting samples from the minority class (since we are considering imbalanced problems). Instead, we can work with a set that over-represents the minority class and correct the metrics, so the performance matches the prior in the actual population, as we discuss in the following. ADDREF

## Identifying and assessing descriptive features. 
Let's start to look into the data and try to get some idea about the following questions: **Q1: Is the head turn delay a useful feature for the diagnosis of ASD? If the answer is "Yes," Q2: how reliable this biomarker is?**

To answer the previous questions, we can start by looking at the data we have available $(X,Y)$:

# Compute and plot the histogram of X values for each clas. 
df = pd.DataFrame({'X':X,'Y':Y}); 
fig = plt.figure()
sns.distplot(df.query('Y==0')['X'],bins=100, kde=True); 
sns.distplot(df.query('Y==1')['X'],bins=100, kde=True); 
plt.xlabel('Head turn delay in seconds (X)'); plt.xlim([0,5]);

glue("hist_head_turn_lab", fig, display=False)

```{glue:figure} hist_head_turn_lab
:figwidth: 50%
:name: "fig:hist_head_turn_lab"

Distribution of the head turn values on the sample data (X,Y). The blue and orange distributions represent the histogram across the non-ASD and ASD groups respectively, (P(X|Y)).
```

Looking at the results of {numref}`fig:hist_head_turn_lab`, the answer to Q1 seems to be YES! Head-turn delay looks like a descriptive biomarker for autism (recall this is just simulated data and a toy example). For the moment, we are answering the question informally, just by looking at the blue and orange distributions and observing that _they look different_. Of course a more formal statistical approach can be adopted, which is the subject of Section {ref}`sec:hypothesis_testing`.  

This toy data suggests that kids in the non-ASD group are turning their heads faster after a name call. It also looks that if the delay is below one second, we are almost certain the kid belongs to the non-ASD group. At the same time, for values larger than 2s, there is a higher chance for the kid to be on the ASD group. Finally, if $x_i\approx 2$ we do not have much information about whether the subject $s_i$ is in one group or the other. According to these observations, answering Q2:how accurate is this feature? Is not trivial, the first answer seems to be: "it depends where in this distributions you fall." We will address this formally in section {ref}`subsec:posterior_odds`.  

## The importance of priors on imbalance problems

The distributions shown in {numref}`fig:hist_head_turn_lab` can be miss-leading, since they only illustrate the distribution of the feature ($X$) withing the classes (i.e., $P(X|y=0)$ and $P(X|y=1)$), but they do not take into account that the occurrence of one of the classes is much more frequent than the other (i.e., $p(y=1) \ll p(y=0)$). 
示例#19
0
fig, axes = plt.subplots(figsize=(6, 4), nrows=2)

for i_meta, (name, file_) in enumerate(meta_results.items()):
    display = plotting.plot_stat_map(
        file_,
        annotate=False,
        axes=axes[i_meta],
        draw_cross=False,
        cmap="Reds",
        cut_coords=[0, 0, 0],
        figure=fig,
    )
    axes[i_meta].set_title(name)

    colorbar = display._cbar
    colorbar_ticks = colorbar.get_ticks()
    if colorbar_ticks[0] < 0:
        new_ticks = [colorbar_ticks[0], 0, colorbar_ticks[-1]]
    else:
        new_ticks = [colorbar_ticks[0], colorbar_ticks[-1]]
    colorbar.set_ticks(new_ticks, update_ticks=True)

glue("figure_corr_cbma", fig, display=False)

# ```{glue:figure} figure_corr_cbma
# :name: figure_corr_cbma
# :align: center
#
# An array of plots of the corrected statistical maps produced by the different multiple comparisons correction methods.
# ```
示例#20
0
    return np.flip(top_eigvals)

X_eigvals = get_eigvals(XXt, n_eigvals=4)
L_eigvals = get_eigvals(L, n_eigvals=4)
n_covariates = X.shape[1]
n_components = 3

amin = (L_eigvals[n_components - 1] - L_eigvals[n_components]) / X_eigvals[0]
if n_covariates > n_components:
    amax = L_eigvals[0] / (
        X_eigvals[n_components - 1] - X_eigvals[n_components]
    )
else:
    amax = L_top / X_eigvals[n_covariates - 1]
    
glue("amin", amin, display=False)
glue("amax", amax, display=False)

Using these equations, we get a minimum weight of {glue:}`amin` and a maximum weight of {glue:}`amax`.

#### Searching with K-Means

We have a range of possible weights to search through, but we don't have the best one. To find it, we'll embed with Covariate-Assisted Clustering, using all the tricks described previously, for as many alpha-values in our range as we're willing to test. Then, we'll simply pick the value which best lets us distinguish between the different communities in our network. 

To figure out which $\alpha$ is best, we need to cluster our data using a machine learning algorithm. The algorithm of choice will be scikit-learn's faster implementation of k-means. K-means is a simple algorithm capable of clustering most datasets very quickly and efficiently, often in only a few iterations. It works by initially sticking some number of predetermined cluster centers in essentially random places in our data, and then iterating through a searching procedure until all the cluster centers are in nice places. If you want more information, you can check out the original paper by Stuart Lloyd<sup>2</sup>, or scikit-learn's tutorial describing K-means<sup>3</sup>.

We also need to define exactly what it means to check which values produce the best clustering. Fortunately, K-means comes out-of-the-box with a fine definition: its objective function, the sum of squared distances of each point from its cluster center. In KMeans, this is called the "inertia".

Below is Python code which searches through our range of possible $\alpha$ values, and then tests a clustering using each value. Because it's quicker, we'll only look through ten values, but in principle the more values you test the better (and the slower).

from sklearn.cluster import KMeans
Algunos sistemas abren implícitamente un archivo cuando se hace la primera referencia a él. El archivo se cierra automáticamente cuando termina el trabajo o el programa que lo abrió. Sin embargo, la mayoría de los sistemas requieren que el programador abra un archivo explícitamente con la llamada de sistema antes de que se pueda usar ese archivo. Esta operación toma el nombre de archivo y busca en el directorio, copiando la entrada del directorio en la tabla de archivos abiertos. Esta llamada además puede aceptar el acceso a la información en modo de creación, sólo lectura, lectura-escritura, sólo adición, etc. Este modo se comprueba con los permisos del archivo. Si el modo de solicitud está permitido, el archivo se abre para el proceso.

La información asociadas a un archivo abierto puede resumirse en: el rastreo de la ultima posición de lectura/escritura, un recuento de los archivos abiertos, la información necesaria para localizar el archivo en disco y los tipos de permisos de acceso al archivo para autorizar o denegar solicitudes. Algunos {term`}OS`s ofrecen facilidades para bloquear un archivo, impidiendo que otros procesos accedan a él. Esta funcionalidad impide, por ejemplo, que archivos compartidos por varios procesos puedan modificarlo de forma simultánea, hasta que se libere el bloqueo. En otros casos, podría proporcionarse accesos de lectura de un archivo compartido, que se encuentra bloqueado por algún proceso.

## Tipos de archivos

Si un {term}`OS` reconoce un tipo de un archivo, puede entonces operar con el archivo de maneras razonables. Una técnica común para implementar los tipos de archivo es incluir su tipo como parte del nombre del archivo. El nombre de un archivo se divide en el nombre y una extensión, normalmente separados por un punto. De esta forma, el usuario y el {term}`OS` pueden saber, sólo por el nombre, qué tipo de archivo es.

La mayoría de los sistemas operativos permiten a los usuarios especificar un nombre de archivo como una secuencia de caracteres seguida de un punto y terminada por una extensión compuesta de caracteres adicionales.

from myst_nb import glue

import pandas as pd

tipos = pd.read_csv('./data/tipos_archivos.csv', delimiter="\t")
glue('tab-tipos-archivos', tipos.style.hide_index())

```{glue:figure} tab-tipos-archivos
:name: tab-hola_ascii
:align: left

Ejemplos de tipos de archivos.
``` 

El {term}`OS` utiliza la extensión para indicar el tipo de archivo y el tipo de opera- ciones que se pueden realizar en ese archivo. Los programas de aplicación también utilizan extensiones para indicar los tipos de archivos en los que están interesados. Estas extensiones no siempre son necesarias, por lo que un usuario puede especificar un archivo sin la extensión (para evitar tener que escribir), y la aplicación buscará un archivo con el nombre y la extensión que espera.

## Estructura de archivos

Los tipos de archivos también pueden utilizarse para indicar la estructura interna del archivo. Los siguientes términos ({numref}`fig-estructura-archivo`) son de uso común en este contexto {cite}`stallings_operating_2015`:

```{figure} ../images/estructura-archivo.png
示例#22
0
# In[1]:

from myst_nb import glue
import pandas as pd

data = {
    'Department': ['A', 'B', 'C', 'D', 'E', 'F'],
    'Male Applicants': [825, 560, 325, 417, 191, 272],
    'Male Percent Admitted': ['62%', '63%', '37%', '33%', '28%', '6%'],
    'Female Applicants': [108, 25, 593, 375, 393, 341],
    'Female Percent Admitted': ['82%', '68%', '34%', '35%', '24%', '7%']
}

df = pd.DataFrame(data)

glue("berkley-table", df, display=False)

# ```{glue:figure} berkley-table
# :figwidth: 600px
# :name: fig-berkleytable
#
# Admission figures for the six largest departments by gender
# ```

# Remarkably, most departments had a *higher* rate of admissions for females than for males! Yet the overall rate of admission across the university for females was *lower* than for males. How can this be? How can both of these statements be true at the same time?
#
# Here's what's going on. Firstly, notice that the departments are *not* equal to one another in terms of their admission percentages: some departments (e.g., engineering, chemistry) tended to admit a high percentage of the qualified applicants, whereas others (e.g., English) tended to reject most of the candidates, even if they were high quality. So, among the six departments shown above, notice that department A is the most generous, followed by B, C, D, E and F in that order. Next, notice that males and females tended to apply to different departments. If we rank the departments in terms of the total number of male applicants, we get **A**>**B**>D>C>F>E (the "easy" departments are in bold). On the whole, males tended to apply to the departments that had high admission rates. Now compare this to how the female applicants distributed themselves. Ranking the departments in terms of the total number of female applicants produces a quite different ordering C>E>D>F>**A**>**B**. In other words, what these data seem to be suggesting is that the female applicants tended to apply to "harder" departments. And in fact, if we look at the figure below, we see that this trend is systematic, and quite striking. This effect is known as Simpson's paradox. It's not common, but it does happen in real life, and most people are very surprised by it when they first encounter it, and many people refuse to even believe that it's real. It is very real. And while there are lots of very subtle statistical lessons buried in there, I want to use it to make a much more important point ...doing research is hard, and there are *lots* of subtle, counterintuitive traps lying in wait for the unwary.  That's reason #2 why scientists love statistics, and why we teach research methods. Because science is hard, and the truth is sometimes cunningly hidden in the nooks and crannies of complicated data.

# In[2]:

from myst_nb import glue
             matsimp[i,j]=matriu[i,j].full_simplify()
    
    return matsimp

Donat que el càlcul dels modes de vibració per primers principis comença per establir la geomeria del cristall en equilibri, comprobem amb les dades proporcionades que el $BN$ monocapa es tracta d'un cristall bidimensional hexagonal de base diatómica, la cel·la unitat del qual ve donada per (dades proporcionades):

$$
\vec a_1=a(1,0);\qquad\vec a_2=a\left(-\frac{1}{2},\frac{\sqrt{3}}{2}\right);
$$

var('a', domain='positive')
a_1=a*vector([1,0])
a_2=a*vector([-1/2,sqrt(3)/2])

angle=arccos(a_1*a_2/(norm(a_1)*norm(a_2)))
glue("angle_cela", angle, display=False)
#In-line text; {glue:}`angle`

Comprobem que efectivament els dos vectors base formen un angle de {glue:}`angle_cela` radians

Numerem les cel·les unitat amb un índex vectorial $\vec l=\left( l_1, l_2\right)$.


Las posiciones de los nudos son $\vec R_{\vec l}=l_1\vec{a}_1+l_2\vec{a}_2$.
Visualizamos una región de la red hexagonal, con los correspondientes nudos (que no átomos), así como la correspondiente celda unidad,

nucs=points([l_1*a_1/a+l_2*a_2/a for l_1 in range(-3, 4) for l_2 in range(-3,4)], 
             size=40, color="blue", frame=False)

show(nucs+
    line([(0,0),(a_1/a)],color="red")+
示例#24
0
        cut_coords=[5, 0, 29],
        draw_cross=False,
        figure=fig,
        vmax=vmax,
    )
    axes[i_meta].set_title(name)

    colorbar = display._cbar
    colorbar_ticks = colorbar.get_ticks()
    if colorbar_ticks[0] < 0:
        new_ticks = [colorbar_ticks[0], 0, colorbar_ticks[-1]]
    else:
        new_ticks = [colorbar_ticks[0], colorbar_ticks[-1]]
    colorbar.set_ticks(new_ticks, update_ticks=True)

glue("figure_ma_maps", fig, display=False)

# In[5]:

# Here we delete the recent variables for the sake of reducing memory usage
del mkda_ma_maps, kda_ma_maps, ale_ma_maps

# ```{glue:figure} figure_ma_maps
# :name: "figure_ma_maps"
# :align: center
#
# Modeled activation maps produced by NiMARE's `KernelTransformer` classes.
# ```

# In[6]:
示例#25
0
    sub_results.get_map("z_desc-group1MinusGroup2", return_type="image"),
    annotate=False,
    axes=ax,
    cmap="RdBu_r",
    cut_coords=[0, 0, 0],
    draw_cross=False,
    figure=fig,
)
ax.set_title("ALE Subtraction")

colorbar = display._cbar
colorbar_ticks = colorbar.get_ticks()
if colorbar_ticks[0] < 0:
    new_ticks = [colorbar_ticks[0], 0, colorbar_ticks[-1]]
else:
    new_ticks = [colorbar_ticks[0], colorbar_ticks[-1]]
colorbar.set_ticks(new_ticks, update_ticks=True)
glue("figure_subtraction", fig, display=False)

# ```{glue:figure} figure_subtraction
# :name: figure_subtraction
# :align: center
#
# Unthresholded z-statistic map for the subtraction analysis of the two example Sleuth-based `Datasets`.
# ```
#
# Alternatively, MKDA Chi-squared analysis is inherently a subtraction analysis method, in that it compares foci from two groups of studies.
# Generally, one of these groups is a sample of interest, while the other is a meta-analytic database (minus the studies in the sample).
# With this setup, meta-analysts can infer whether there is greater convergence of foci in a voxel as compared to the baseline across the field (as estimated with the meta-analytic database), much like SCALE.
# However, if the database is replaced with a second sample of interest, the analysis ends up comparing convergence between the two groups.
示例#26
0
# %%
fm.head()

# %%
fm_yr = u.get_fuel_mix(data_folder_path=dfp, agg="year")

# %%
fm_yr.info()

# %%
fm_yr.head()

# %%
start_year = f"{fm_yr.year.min():.0f}"
end_year = f"{fm_yr.year.max():.0f}"
glue("nyiso_start_year", start_year)
glue("nyiso_end_year", end_year)
subtitle = f"{start_year}-{end_year}, NYISO, http://mis.nyiso.com/public/P-63list.htm"

# %%
fm_yr_general = fm_yr.groupby(["year", "general"],
                              as_index=False).agg({"gen_gw": "sum"})

# %%
color = alt.Color("general",
                  legend=alt.Legend(title="Fuel type"),
                  sort="descending")

# %%
fm_yr_chrt = (alt.Chart(fm_yr_general).mark_bar().encode(
    x="year:O",
示例#27
0
# (glue/gluing)=
# ### Gluing variables in your notebook
#
# You can use `myst_nb.glue()` to assign the value of a variable to
# a key of your choice. `glue` will store all of the information that is normally used to **display**
# that variable (i.e., whatever happens when you display the variable by putting it at the end of a
# code cell). Choose a key that you will remember, as you will use it later.
#
# The following code glues a variable inside the notebook to the key `"cool_text"`:

# In[7]:

from myst_nb import glue
my_variable = "here is some text!"
glue("cool_text", my_variable)

# You can then insert it into your text. Adding
# `` {glue:}`cool_text` `` to your content results in the
# following: {glue:}`cool_text`.
#
# #### Gluing numbers, plots, and tables
#
# You can glue anything in your notebook and display it later with `{glue:}`. Here
# we'll show how to glue and paste **numbers and images**. We'll simulate some
# data and run a simple bootstrap on it. We'll hide most of this process below,
# to focus on the glueing part.

# In[8]:

# Simulate some data and bootstrap the mean of the data