Skip to content

asaf.mpd

Module for handling Macrostate Probability Distribution (MPD) data.

MPD

MPD(
    dataframe: DataFrame,
    temperature: float,
    beta_mu: Optional[float] = None,
    fugacity: Optional[float] = None,
    metadata: Optional[dict[str, Any]] = None,
    order: int = 50,
    tolerance: float = 10.0,
)

Class for storing and processing macrostate probability distribution.

Parameters:

  • dataframe (DataFrame) –

    a pandas dataframe with state specific data

  • temperature (float) –

    temperature (in K) at which the simulation was performed

  • beta_mu (Optional[float], default: None ) –

    beta_mu (unitless) at which the simulation was performed. At least one of beta_mu or fugacity must be specified

  • fugacity (Optional[float], default: None ) –

    fugacity (in Pa) at which the simulation was performed. At least one of beta_mu or fugacity must be specified

  • metadata (Optional[dict[str, Any]], default: None ) –

    a dictionary with the simulation metadata

  • order (int, default: 50 ) –

    how many points on each side use to find minimum in lnp

  • tolerance (float, default: 10.0 ) –

    used when checking the probability at lnp tail

Source code in src/asaf/mpd.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def __init__(
    self,
    dataframe: pd.DataFrame,
    temperature: float,
    beta_mu: Optional[float] = None,
    fugacity: Optional[float] = None,
    metadata: Optional[dict[str, Any]] = None,
    order: int = 50,
    tolerance: float = 10.0,
) -> None:
    """Initialize the MPD class.

    Parameters
    ----------
    dataframe
        a pandas dataframe with state specific data
    temperature
        temperature (in K) at which the simulation was performed
    beta_mu
        beta_mu (unitless) at which the simulation was performed. At least one of beta_mu
        or fugacity must be specified
    fugacity
        fugacity (in Pa) at which the simulation was performed. At least one of beta_mu
        or fugacity must be specified
    metadata
        a dictionary with the simulation metadata
    order
        how many points on each side use to find minimum in lnp
    tolerance
        used when checking the probability at lnp tail
    """
    self.temperature = temperature

    if (beta_mu is None) and (fugacity is None):
        raise ValueError("Must provide `beta_mu` or/and `fugacity`.")
    if beta_mu is None:
        self.fugacity = fugacity
        self._beta_mu = self.beta * self.mu
    else:
        self._beta_mu = beta_mu
        self.mu = beta_mu / self.beta

    lnp_headers = ["macrostate", "lnp"]
    prob_headers = ["macrostate", "P_up", "P_down"]

    have_lnp = set(lnp_headers).issubset(dataframe.columns)
    have_prob = set(prob_headers).issubset(dataframe.columns)

    if not (have_lnp or have_prob):
        all_required = set(lnp_headers) | set(prob_headers)
        missing = all_required - set(dataframe.columns)
        raise ValueError(f"Some of the columns names {missing} are missing.")

    self._dataframe = dataframe

    if not have_lnp:
        lnp_df = calculate_lnp(dataframe[prob_headers])
        merged = lnp_df.merge(
            dataframe, on="macrostate", how="left", suffixes=("", "_inp")
        )
        self._dataframe = merged

    self._metadata = metadata or {}
    self.order = order
    self.tolerance = tolerance

    self._system_size_prod = 1

    if "system_size" in self.metadata:
        self.system_size = self.metadata["system_size"]
    else:
        self.system_size = [1, 1, 1]

    self.check_tail(order, tolerance)

beta property writable

beta: float

Return the beta (in J^-1).

beta_mu property

beta_mu: float

Return the beta_mu (unitless).

fugacity property writable

fugacity: float

Return the fugacity (in Pa).

lnp property

lnp: DataFrame

Return a dataframe with the natural logarithm of the macrostate probability.

metadata property writable

metadata: Dict[str, Any]

Return the metadata dictionary.

mu property writable

mu: float

Return the chemical potential (in J A^-3).

order property writable

order: int

Return the order used to find minimum in lnp.

system_size property writable

system_size: List[int]

Return the system size as a list of integers.

temperature property writable

temperature: float

Return the temperature (in K).

tolerance property writable

tolerance: float

Return the tolerance used when checking the probability at lnp tail.

average_macrostate

average_macrostate(
    lnp: Optional[DataFrame] = None,
) -> float

Calculate the average macrostate from the MPD data.

Note that this function does not check for multiple phases. Use average_macrostate_at_fugacity to calculate the average macrostate at a given fugacity, which checks for multiple phases.

Source code in src/asaf/mpd.py
337
338
339
340
341
342
343
344
345
def average_macrostate(self, lnp: Optional[pd.DataFrame] = None) -> float:
    """Calculate the average macrostate from the MPD data.

    Note that this function does not check for multiple phases. Use `average_macrostate_at_fugacity`
    to calculate the average macrostate at a given fugacity, which checks for multiple phases.
    """
    if lnp is None:
        lnp = self.lnp
    return (np.exp(lnp["lnp"]) * lnp["macrostate"]).sum()

average_macrostate_at_fugacity

average_macrostate_at_fugacity(
    fug: float, order: Optional[int] = None
) -> List[float]

Calculate the average macrostate at a given fugacity.

Source code in src/asaf/mpd.py
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
def average_macrostate_at_fugacity(
    self, fug: float, order: Optional[int] = None
) -> List[float]:
    """Calculate the average macrostate at a given fugacity."""
    beta_0 = self._beta
    mu_0 = self._mu
    mu = fugacity_to_mu(fug, beta_0)
    delta_beta_mu = beta_0 * (mu - mu_0)
    lnp_rw = self.reweight(delta_beta_mu)
    if order is None:
        order = self.order
    mins = self.minimums(lnp=lnp_rw["lnp"], order=order)

    if len(mins) == 0:
        return [self.average_macrostate(lnp_rw) / self._system_size_prod]
    else:
        minn = mins[mins.lnp == mins.lnp.min()].index[0]
        lnp_a = lnp_rw[:minn].copy()
        lnp_b = lnp_rw[minn + 1 :].copy()

        p_a = np.exp(lnp_a["lnp"]).sum()
        p_b = np.exp(lnp_b["lnp"]).sum()

        lnp_a["lnp"] = normalize(lnp_a["lnp"])
        lnp_b["lnp"] = normalize(lnp_b["lnp"])

        if p_a > p_b:
            return [
                self.average_macrostate(lnp_a) / self._system_size_prod,
                self.average_macrostate(lnp_b) / self._system_size_prod,
            ]
        else:
            return [
                self.average_macrostate(lnp_b) / self._system_size_prod,
                self.average_macrostate(lnp_a) / self._system_size_prod,
            ]

calculate_isotherm

calculate_isotherm(
    fugacity: ArrayLike,
    saturation_fugacity: Optional[float] = None,
    pressure: Optional[ArrayLike] = None,
    order: Optional[int] = None,
    return_dataframe: bool = True,
) -> Union[DataFrame | Isotherm]

Calculate the adsorption isotherm.

Parameters:

  • fugacity (ArrayLike) –

    Array of fugacities.

  • saturation_fugacity (Optional[float], default: None ) –

    Saturation pressure to calculate the pressure in relative scale (p/p0).

  • pressure (Optional[ArrayLike], default: None ) –

    Array of pressures corresponding to the fugacities.

  • order (Optional[int], default: None ) –

    How many points on each side use to find minimum in lnp.

  • return_dataframe (bool, default: True ) –

    Whether to return the adsorption isotherm as a dataframe or Isotherm instance.

Returns:

  • DataFrame or Isotherm

    DataFrame containing the adsorption isotherm or Isotherm instance if return_dataframe is False.

  • Args ( Union[DataFrame | Isotherm] ) –

    pressure:

Source code in src/asaf/mpd.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
def calculate_isotherm(
    self,
    fugacity: ArrayLike,
    saturation_fugacity: Optional[float] = None,
    pressure: Optional[ArrayLike] = None,
    order: Optional[int] = None,
    return_dataframe: bool = True,
) -> Union[pd.DataFrame | Isotherm]:
    """Calculate the adsorption isotherm.

    Parameters
    ----------
    fugacity
        Array of fugacities.
    saturation_fugacity
        Saturation pressure to calculate the pressure in relative scale (p/p0).
    pressure
        Array of pressures corresponding to the fugacities.
    order
        How many points on each side use to find minimum in lnp.
    return_dataframe
        Whether to return the adsorption isotherm as a dataframe or Isotherm instance.

    Returns
    -------
    pd.DataFrame or Isotherm
        DataFrame containing the adsorption isotherm or Isotherm instance if return_dataframe is False.

    Args:
        pressure:
    """
    from asaf import Isotherm

    stable_phase = []
    metastable_gas = []
    metastable_liq = []

    if order is None:
        order = self.order

    for fug in fugacity:
        uptake = self.average_macrostate_at_fugacity(fug, order=order)
        if len(uptake) > 1:
            if uptake[0] > uptake[1]:
                stable_phase.append([fug, uptake[0]])
                metastable_gas.append([fug, uptake[1]])
            else:
                stable_phase.append([fug, uptake[0]])
                metastable_liq.append([fug, uptake[1]])
        else:
            stable_phase.append([fug, uptake[0]])

    isotherm = pd.DataFrame(stable_phase, columns=["fugacity", "uptake"])

    if len(metastable_gas) > 0:
        iso_metastable_gas = pd.DataFrame(
            metastable_gas, columns=["fugacity", "metastable_gas"]
        )

        isotherm = pd.merge(
            isotherm, iso_metastable_gas, on="fugacity", how="outer"
        )

    if len(metastable_liq) > 0:
        iso_metastable_liq = pd.DataFrame(
            metastable_liq, columns=["fugacity", "metastable_liq"]
        )

        isotherm = pd.merge(
            isotherm, iso_metastable_liq, on="fugacity", how="outer"
        )

    if saturation_fugacity is not None:
        isotherm.insert(1, "f/f0", isotherm["fugacity"] / saturation_fugacity)

    if pressure is not None:
        isotherm.insert(1, "pressure", np.array(pressure))

    if return_dataframe:
        return isotherm
    else:
        return Isotherm(
            data=isotherm,
            saturation_fugacity=saturation_fugacity,
            metadata=self.metadata,
        )

check_tail

check_tail(
    order: int,
    tolerance: float,
    lnp: Optional[DataFrame] = None,
) -> None

Check the probability at the tail of the lnp distribution.

Source code in src/asaf/mpd.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
def check_tail(
    self, order: int, tolerance: float, lnp: Optional[pd.DataFrame] = None
) -> None:
    """Check the probability at the tail of the lnp distribution."""
    if lnp is None:
        lnp = self.lnp
    mins = self.minimums(lnp=lnp["lnp"], order=order)
    if len(mins) == 0:
        difference = lnp["lnp"].max() - lnp["lnp"].iloc[-1]
    else:
        minn = mins[mins.lnp == mins.lnp.min()].index[0]
        lnp_b = lnp[minn + 1 :].copy()
        difference = lnp_b["lnp"].max() - lnp_b["lnp"].iloc[-1]

    if difference < tolerance:
        print(
            f"WARNING! lnPi at N_max has a relative value higher ({difference:.1f}) than tolerance ({tolerance:.1f})."
        )
        print(
            "The results may be erroneous. Provide data for higher macrostate values."
        )

dataframe

dataframe() -> DataFrame

Return dataframe.

Source code in src/asaf/mpd.py
137
138
139
def dataframe(self) -> pd.DataFrame:
    """Return dataframe."""
    return self._dataframe

extrapolate

extrapolate(
    temperature: float,
    energy: Optional[DataFrame | Series] = None,
    terms: int = 1,
) -> "MPD"

Extrapolates the MPD to a new temperature.

Parameters:

  • temperature (float) –

    Temperature (in K) to which to extrapolate MPD.

  • energy (Optional[DataFrame | Series], default: None ) –

    Energy fluctuation data. If None ASAF will look for data in prob_df. Unit must be J.

  • terms (int, default: 1 ) –

    Number of Taylor series terms used for extrapolation. Note that energy must contain columns named term_1, term_2, ..., term_n where n is the number of terms.

Returns:

  • MPD

    Extrapolated MPD.

Source code in src/asaf/mpd.py
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
def extrapolate(
    self,
    temperature: float,
    energy: Optional[pd.DataFrame | pd.Series] = None,
    terms: int = 1,
) -> "MPD":
    """Extrapolates the MPD to a new temperature.

    Parameters
    ----------
    temperature
        Temperature (in K) to which to extrapolate MPD.
    energy
        Energy fluctuation data. If None ASAF will look for data in prob_df. Unit must be J.
    terms
        Number of Taylor series terms used for extrapolation. Note that `energy` must contain columns
        named `term_1`, `term_2`, ..., `term_n` where n is the number of terms.

    Returns
    -------
    MPD
        Extrapolated MPD.
    """
    from math import factorial

    if terms < 1:
        raise ValueError("Number of terms must be at least 1.")

    if energy is None:
        if "term_1" in self._dataframe.columns:
            energy = self._dataframe[["macrostate", "term_1"]].copy()
        else:
            raise ValueError("Energy related data is missing.")

    beta = temperature_to_beta(temperature)
    delta_beta = beta - self.beta
    lnp_extrapolated = self.lnp.copy()
    lnp_extrapolated["lnp"] += (
        self.mu * lnp_extrapolated["macrostate"] - energy["term_1"]
    ) * delta_beta
    lnp_extrapolated["lnp"] = normalize(lnp_extrapolated["lnp"])

    for i in range(2, terms + 1):
        lnp_extrapolated["lnp"] += (
            1 / factorial(i) * energy[f"term_{i}"] * np.power(delta_beta, i)
        )
        lnp_extrapolated["lnp"] = normalize(lnp_extrapolated["lnp"])

    return MPD(
        dataframe=lnp_extrapolated,
        temperature=temperature,
        fugacity=mu_to_fugacity(self.mu, beta),
        metadata=self.metadata,
    )

find_phase_equilibrium

find_phase_equilibrium(
    tolerance: float = 1e-06,
    max_iterations: int = 100,
    return_probabilities: bool = False,
) -> Union[Tuple[float, float, float], float]

Find the fugacity at which the two phases are in equilibrium.

Parameters:

  • tolerance (float, default: 1e-06 ) –

    Tolerance for the root finding algorithm.

  • max_iterations (int, default: 100 ) –

    Maximum number of iterations for the root finding algorithm.

  • return_probabilities (bool, default: False ) –

    Whether to return the probabilities of the two phases at equilibrium.

Returns:

  • float or Tuple[float, float, float]

    The fugacity at which the two phases are in equilibrium. If return_probabilities is True, also returns the probabilities of the two phases at equilibrium.

Source code in src/asaf/mpd.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
def find_phase_equilibrium(
    self,
    tolerance: float = 1e-6,
    max_iterations: int = 100,
    return_probabilities: bool = False,
) -> Union[Tuple[float, float, float], float]:
    """Find the fugacity at which the two phases are in equilibrium.

    Parameters
    ----------
    tolerance
        Tolerance for the root finding algorithm.
    max_iterations
        Maximum number of iterations for the root finding algorithm.
    return_probabilities
        Whether to return the probabilities of the two phases at equilibrium.

    Returns
    -------
    float or Tuple[float, float, float]
        The fugacity at which the two phases are in equilibrium. If `return_probabilities`
        is True, also returns the probabilities of the two phases at equilibrium.
    """
    from scipy.optimize import newton
    from scipy.special import logsumexp

    def objective(beta_mu) -> float:
        delta_beta_mu = beta_mu - self.beta_mu
        lnp = self.reweight(delta_beta_mu)
        min_index = self.minimums(order=self._order, lnp=lnp["lnp"])
        if len(min_index) == 0:
            return float(lnp.lnp.values[0] - lnp.lnp.values[-1]) ** 2
        min_index = min_index[min_index.lnp == min_index.lnp.min()].index[0]
        logsum_low = logsumexp(lnp.lnp[:min_index])
        logsum_high = logsumexp(lnp.lnp[min_index:])
        return logsum_low - logsum_high

    equilibrium_beta_mu = newton(
        objective, self.beta_mu, tol=tolerance, maxiter=max_iterations
    )
    equilibrium_fugacity = mu_to_fugacity(
        equilibrium_beta_mu / self._beta, self._beta
    )

    equilibrium_lnp = self.reweight_to_fug(equilibrium_fugacity, inplace=False)
    equilibrium_min_index = self.minimums(order=self._order, lnp=equilibrium_lnp)
    equilibrium_min_index = equilibrium_min_index[
        equilibrium_min_index.lnp == equilibrium_min_index.lnp.min()
    ].index[0]
    equilibrium_logsum_low = logsumexp(equilibrium_lnp.lnp[:equilibrium_min_index])
    equilibrium_logsum_high = logsumexp(equilibrium_lnp.lnp[equilibrium_min_index:])
    equilibrium_p_low = float(np.exp(equilibrium_logsum_low))
    equilibrium_p_high = float(np.exp(equilibrium_logsum_high))

    if return_probabilities:
        return equilibrium_fugacity, equilibrium_p_low, equilibrium_p_high
    else:
        return equilibrium_fugacity

free_energy_at_fugacity

free_energy_at_fugacity(fug: float) -> DataFrame

Calculate the free energy profile at a given fugacity.

Source code in src/asaf/mpd.py
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
def free_energy_at_fugacity(self, fug: float) -> pd.DataFrame:
    """Calculate the free energy profile at a given fugacity."""
    beta_0 = self._beta
    mu_0 = self._mu
    mu = fugacity_to_mu(fug, beta_0)
    delta_beta_mu = beta_0 * (mu - mu_0)
    lnp_rw = self.reweight(delta_beta_mu)
    free_en = (
        -0.001
        * _BOLTZMANN_CONSTANT
        * _AVOGADRO_CONSTANT
        * self._temperature
        * lnp_rw["lnp"]
    )
    free_en -= free_en.min()
    free_energy = pd.DataFrame(
        {"macrostate": lnp_rw["macrostate"].copy(), "free_energy_kJ/mol": free_en}
    )

    return free_energy

from_csv classmethod

from_csv(file_name: str, **kwargs: object) -> MPD

Read natural logarithm of macrostates probability or transition probabilities from a csv file.

Source code in src/asaf/mpd.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
@classmethod
def from_csv(cls, file_name: str, **kwargs: object) -> MPD:
    """Read natural logarithm of macrostates probability or transition probabilities from a csv file."""
    df = pd.read_csv(file_name, **kwargs)

    metadata_filename = file_name.removesuffix(".csv") + ".metadata.json"
    with open(metadata_filename) as f:
        metadata = json.load(f)

    temperature = metadata.get("temperature")

    if temperature is None:
        raise ValueError("Metadata must contain 'temperature'.")

    beta_mu = metadata.get("beta_mu")
    fugacity = metadata.get("fugacity")

    if (beta_mu is None) and (fugacity is None):
        raise ValueError("Metadata must contain 'beta_mu' or/and 'fugacity'.")

    return cls(
        dataframe=df,
        temperature=temperature,
        beta_mu=beta_mu,
        fugacity=fugacity,
        metadata=metadata,
    )

minimums

minimums(
    order: int, lnp: Optional[Series] = None
) -> DataFrame

Find the local minimums in the lnp data.

Source code in src/asaf/mpd.py
347
348
349
350
351
352
353
354
def minimums(self, order: int, lnp: Optional[pd.Series] = None) -> pd.DataFrame:
    """Find the local minimums in the lnp data."""
    if lnp is None:
        lnp = self._dataframe["lnp"]
    min_loc = argrelextrema(lnp.values, np.less, order=order)[0]
    min_loc = min_loc[(10 < min_loc) & (min_loc < lnp.shape[0] - 10)]

    return self._dataframe.iloc[min_loc]

plot

plot(
    fig: Optional[Figure] = None,
    name: Optional[str] = None,
    show: bool = True,
) -> None

Plot the MPD data using plotly.

Source code in src/asaf/mpd.py
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
def plot(
    self,
    fig: Optional[go.Figure] = None,
    name: Optional[str] = None,
    show: bool = True,
) -> None:
    """Plot the MPD data using plotly."""
    font = {"family": "Helvetica Neue", "size": 14, "color": "black"}

    axes = {
        "showline": True,
        "linewidth": 1,
        "linecolor": "black",
        "gridcolor": "lightgrey",
        "mirror": True,
        "zeroline": False,
        "ticks": "inside",
    }

    if fig is None:
        fig = go.Figure()

    fig.add_trace(
        go.Scatter(
            x=self.lnp["macrostate"],
            y=self.lnp["lnp"],
            mode="lines",
            name=name,
        )
    )

    xaxis_title = "Macrostate"
    yaxis_title = "lnΠ"

    fig.update_layout(
        font=font,
        xaxis=axes,
        xaxis_title=xaxis_title,
        yaxis=axes,
        yaxis_title=yaxis_title,
        plot_bgcolor="white",
        width=700,
        height=500,
        margin=dict(l=30, r=30, t=30, b=30),
    )

    if show:
        fig.show()

reweight

reweight(delta_beta_mu: float) -> DataFrame

Reweight the MPD to a new mu / fugacity value using delta_beta_mu.

Source code in src/asaf/mpd.py
252
253
254
255
256
257
258
259
def reweight(self, delta_beta_mu: float) -> pd.DataFrame:
    """Reweight the MPD to a new mu / fugacity value using `delta_beta_mu`."""
    lnp_rw = self.lnp.copy()
    lnp_rw["lnp"] += delta_beta_mu * lnp_rw["macrostate"]
    lnp_rw["lnp"] = normalize(lnp_rw["lnp"])
    self.check_tail(lnp=lnp_rw, order=self.order, tolerance=self.tolerance)

    return lnp_rw

reweight_to_fug

reweight_to_fug(
    fugacity: float, inplace: bool = True
) -> None | DataFrame

Reweight the MPD to a new mu / fugacity value using desired fugacity.

Source code in src/asaf/mpd.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def reweight_to_fug(
    self, fugacity: float, inplace: bool = True
) -> None | pd.DataFrame:
    """Reweight the MPD to a new mu / fugacity value using desired fugacity."""
    beta_0 = self.beta
    mu_0 = self.mu
    mu = fugacity_to_mu(fugacity, beta_0)
    delta_beta_mu = beta_0 * (mu - mu_0)
    lnp_rw = self.reweight(delta_beta_mu)
    if inplace:
        self._dataframe["lnp"] = lnp_rw["lnp"]
        self.fugacity = fugacity
        return None
    else:
        return lnp_rw

options: filters: ["!^_"]