Source code for mdgo.util

# coding: utf-8
# Copyright (c) Tingzheng Hou.
# Distributed under the terms of the MIT License.

"""
This module implements utility functions for other modules in the package.
"""


import string
from io import StringIO
import os
import re
import math
import sys
from typing import List, Dict, Union, Tuple, Optional, Any
from typing_extensions import Final
import numpy as np
import pandas as pd

from pymatgen.core import Molecule
from pymatgen.io.lammps.data import CombinedData

from MDAnalysis import Universe
from MDAnalysis.core.groups import Atom, Residue, AtomGroup

from mdgo.volume import molecular_volume

__author__ = "Tingzheng Hou"
__version__ = "1.0"
__maintainer__ = "Tingzheng Hou"
__email__ = "tingzheng_hou@berkeley.edu"
__date__ = "Feb 9, 2021"

MM_of_Elements: Final[Dict[str, float]] = {
    "H": 1.00794,
    "He": 4.002602,
    "Li": 6.941,
    "Be": 9.012182,
    "B": 10.811,
    "C": 12.0107,
    "N": 14.0067,
    "O": 15.9994,
    "F": 18.9984032,
    "Ne": 20.1797,
    "Na": 22.98976928,
    "Mg": 24.305,
    "Al": 26.9815386,
    "Si": 28.0855,
    "P": 30.973762,
    "S": 32.065,
    "Cl": 35.453,
    "Ar": 39.948,
    "K": 39.0983,
    "Ca": 40.078,
    "Sc": 44.955912,
    "Ti": 47.867,
    "V": 50.9415,
    "Cr": 51.9961,
    "Mn": 54.938045,
    "Fe": 55.845,
    "Co": 58.933195,
    "Ni": 58.6934,
    "Cu": 63.546,
    "Zn": 65.409,
    "Ga": 69.723,
    "Ge": 72.64,
    "As": 74.9216,
    "Se": 78.96,
    "Br": 79.904,
    "Kr": 83.798,
    "Rb": 85.4678,
    "Sr": 87.62,
    "Y": 88.90585,
    "Zr": 91.224,
    "Nb": 92.90638,
    "Mo": 95.94,
    "Tc": 98.9063,
    "Ru": 101.07,
    "Rh": 102.9055,
    "Pd": 106.42,
    "Ag": 107.8682,
    "Cd": 112.411,
    "In": 114.818,
    "Sn": 118.71,
    "Sb": 121.760,
    "Te": 127.6,
    "I": 126.90447,
    "Xe": 131.293,
    "Cs": 132.9054519,
    "Ba": 137.327,
    "La": 138.90547,
    "Ce": 140.116,
    "Pr": 140.90465,
    "Nd": 144.242,
    "Pm": 146.9151,
    "Sm": 150.36,
    "Eu": 151.964,
    "Gd": 157.25,
    "Tb": 158.92535,
    "Dy": 162.5,
    "Ho": 164.93032,
    "Er": 167.259,
    "Tm": 168.93421,
    "Yb": 173.04,
    "Lu": 174.967,
    "Hf": 178.49,
    "Ta": 180.9479,
    "W": 183.84,
    "Re": 186.207,
    "Os": 190.23,
    "Ir": 192.217,
    "Pt": 195.084,
    "Au": 196.966569,
    "Hg": 200.59,
    "Tl": 204.3833,
    "Pb": 207.2,
    "Bi": 208.9804,
    "Po": 208.9824,
    "At": 209.9871,
    "Rn": 222.0176,
    "Fr": 223.0197,
    "Ra": 226.0254,
    "Ac": 227.0278,
    "Th": 232.03806,
    "Pa": 231.03588,
    "U": 238.02891,
    "Np": 237.0482,
    "Pu": 244.0642,
    "Am": 243.0614,
    "Cm": 247.0703,
    "Bk": 247.0703,
    "Cf": 251.0796,
    "Es": 252.0829,
    "Fm": 257.0951,
    "Md": 258.0951,
    "No": 259.1009,
    "Lr": 262,
    "Rf": 267,
    "Db": 268,
    "Sg": 271,
    "Bh": 270,
    "Hs": 269,
    "Mt": 278,
    "Ds": 281,
    "Rg": 281,
    "Cn": 285,
    "Nh": 284,
    "Fl": 289,
    "Mc": 289,
    "Lv": 292,
    "Ts": 294,
    "Og": 294,
    "ZERO": 0,
}

SECTION_SORTER: Final[Dict[str, Dict[str, Any]]] = {
    "atoms": {
        "in_kw": None,
        "in_header": ["atom", "charge", "sigma", "epsilon"],
        "sec_number": None,
        "desired_split": None,
        "desired_cols": None,
        "out_kw": None,
        "ff_header": ["epsilon", "sigma"],
        "topo_header": ["mol-id", "type", "charge", "x", "y", "z"],
    },
    "bonds": {
        "in_kw": "Stretch",
        "in_header": ["atom1", "atom2", "k", "r0"],
        "sec_number": 5,
        "desired_split": 2,
        "desired_cols": 4,
        "out_kw": ["Bond Coeffs", "Bonds"],
        "ff_header": ["k", "r0"],
        "topo_header": ["type", "atom1", "atom2"],
    },
    "angles": {
        "in_kw": "Bending",
        "in_header": ["atom1", "atom2", "atom3", "k", "theta0"],
        "sec_number": 6,
        "desired_split": 1,
        "desired_cols": 5,
        "out_kw": ["Angle Coeffs", "Angles"],
        "ff_header": ["k", "theta0"],
        "topo_header": ["type", "atom1", "atom2", "atom3"],
    },
    "dihedrals": {
        "in_kw": "proper Torsion",
        "in_header": ["atom1", "atom2", "atom3", "atom4", "v1", "v2", "v3", "v4"],
        "sec_number": 7,
        "desired_split": 1,
        "desired_cols": 8,
        "out_kw": ["Dihedral Coeffs", "Dihedrals"],
        "ff_header": ["v1", "v2", "v3", "v4"],
        "topo_header": ["type", "atom1", "atom2", "atom3", "atom4"],
    },
    "impropers": {
        "in_kw": "improper Torsion",
        "in_header": ["atom1", "atom2", "atom3", "atom4", "v2"],
        "sec_number": 8,
        "desired_split": 1,
        "desired_cols": 5,
        "out_kw": ["Improper Coeffs", "Impropers"],
        "ff_header": ["v1", "v2", "v3"],
        "topo_header": ["type", "atom1", "atom2", "atom3", "atom4"],
    },
}

BOX: Final[
    str
] = """{0:6f} {1:6f} xlo xhi
{0:6f} {1:6f} ylo yhi
{0:6f} {1:6f} zlo zhi"""

MOLAR_VOLUME: Final[Dict[str, float]] = {"lipf6": 18, "litfsi": 100}  # empirical value

ALIAS: Final[Dict[str, str]] = {
    "ethylene carbonate": "ec",
    "ec": "ec",
    "propylene carbonate": "pc",
    "pc": "pc",
    "dimethyl carbonate": "dmc",
    "dmc": "dmc",
    "diethyl carbonate": "dec",
    "dec": "dec",
    "ethyl methyl carbonate": "emc",
    "emc": "emc",
    "fluoroethylene carbonate": "fec",
    "fec": "fec",
    "vinyl carbonate": "vc",
    "vinylene carbonate": "vc",
    "vc": "vc",
    "1,3-dioxolane": "dol",
    "dioxolane": "dol",
    "dol": "dol",
    "ethylene glycol monomethyl ether": "egme",
    "2-methoxyethanol": "egme",
    "egme": "egme",
    "dme": "dme",
    "1,2-dimethoxyethane": "dme",
    "glyme": "dme",
    "monoglyme": "dme",
    "2-methoxyethyl ether": "diglyme",
    "diglyme": "diglyme",
    "triglyme": "triglyme",
    "tetraglyme": "tetraglyme",
    "acetonitrile": "acn",
    "acn": "acn",
    "water": "water",
    "h2o": "water",
}

# From PubChem
MOLAR_MASS: Final[Dict[str, float]] = {
    "ec": 88.06,
    "pc": 102.09,
    "dec": 118.13,
    "dmc": 90.08,
    "emc": 104.05,
    "fec": 106.05,
    "vc": 86.05,
    "dol": 74.08,
    "egme": 76.09,
    "dme": 90.12,
    "diglyme": 134.17,
    "triglyme": 178.23,
    "tetraglyme": 222.28,
    "acn": 41.05,
    "water": 18.01528,
}

# from Sigma-Aldrich
DENSITY: Final[Dict[str, float]] = {
    "ec": 1.321,
    "pc": 1.204,
    "dec": 0.975,
    "dmc": 1.069,
    "emc": 1.006,
    "fec": 1.454,  # from qm-ht.com
    "vc": 1.355,
    "dol": 1.06,
    "dme": 0.867,
    "egme": 0.965,
    "diglyme": 0.939,
    "triglyme": 0.986,
    "tetraglyme": 1.009,
    "acn": 0.786,
    "water": 0.99707,
}


[docs]def atom_vec(atom1: Atom, atom2: Atom, dimension: np.ndarray) -> np.ndarray: """ Calculate the vector of the positions from atom2 to atom1. Args: atom1: Atom obj 1. atom2: Atom obj 2. dimension: box dimension. Return: The obtained vector """ vec = [0, 0, 0] for i in range(3): diff = atom1.position[i] - atom2.position[i] if diff > dimension[i] / 2: vec[i] = diff - dimension[i] elif diff < -dimension[i] / 2: vec[i] = diff + dimension[i] else: vec[i] = diff return np.array(vec)
[docs]def position_vec( pos1: Union[List[float], np.ndarray], pos2: Union[List[float], np.ndarray], dimension: Union[List[float], np.ndarray], ) -> np.ndarray: """ Calculate the vector from pos2 to pos2. Args: pos1: Array of 3d coordinates 1. pos2: Array of 3d coordinates 2. dimension: box dimension. Return: The obtained vector. """ vec: List[Union[int, float, np.floating]] = [0, 0, 0] for i in range(3): diff = pos1[i] - pos2[i] if diff > dimension[i] / 2: vec[i] = diff - dimension[i] elif diff < -dimension[i] / 2: vec[i] = diff + dimension[i] else: vec[i] = diff return np.array(vec)
[docs]def angle(a: np.ndarray, b: np.ndarray, c: np.ndarray) -> np.floating: """ Calculate the angle between three atoms. Args: a: Coordinates of atom A. b: Coordinates of atom B. c: Coordinates of atom C. Returns: The degree A-B-C. """ ba = a - b bc = c - b cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc)) cosine_angle = np.clip(cosine_angle, -1.0, 1.0) angle_in_radian = np.arccos(cosine_angle) return np.degrees(angle_in_radian)
[docs]def mass_to_name(masses: np.ndarray) -> np.ndarray: """ Map atom names to element names. Args: masses: The masses array of atoms in an ``Universe``. Return: The element name array. """ names = [] for mass in masses: for item in MM_of_Elements.items(): if math.isclose(mass, item[1], abs_tol=0.1): names.append(item[0]) assert len(masses) == len(names), "Invalid mass found." return np.array(names)
[docs]def lmp_mass_to_name(df: pd.DataFrame) -> Dict[int, str]: """ Create a dict for mapping atom type id to element from the mass information. Args: df: The masses attribute from LammpsData object Return: The element dict. """ atoms = {} for row in df.index: for item in MM_of_Elements.items(): if math.isclose(df["mass"][row], item[1], abs_tol=0.01): atoms[int(row)] = item[0] return atoms
[docs]def assign_name(u: Universe, names: np.ndarray): """ Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing names. Args: u: The universe object to assign resnames to. names: The element name array. """ u.add_TopologyAttr("name", values=names)
[docs]def assign_resname(u: Universe, res_dict: Dict[str, str]): """ Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing resnames. Args: u: The universe object to assign resnames to. res_dict: A dictionary of resnames, where each resname is a key and the corresponding values are the selection language. """ u.add_TopologyAttr("resname") for key, val in res_dict.items(): res_group = u.select_atoms(val) res_names = res_group.residues.resnames res_names[res_names == ""] = key res_group.residues.resnames = res_names
[docs]def res_dict_from_select_dict(u: Universe, select_dict: Dict[str, str]) -> Dict[str, str]: """ Infer res_dict (residue selection) from select_dict (atom selection) in a MDAnalysis.universe object. Args: u: The universe object to assign resnames to. select_dict: A dictionary of atom species, where each atom species name is a key and the corresponding values are the selection language. return: A dictionary of resnames. """ saved_select = [] res_dict = {} for key, val in select_dict.items(): res_select = "same resid as (" + val + ")" res_group = u.select_atoms(res_select) if key in ["cation", "anion"] or res_group not in saved_select: saved_select.append(res_group) res_dict[key] = res_select if ( "cation" in res_dict and "anion" in res_dict and u.select_atoms(res_dict.get("cation")) == u.select_atoms(res_dict.get("anion")) ): res_dict.pop("anion") res_dict["salt"] = res_dict.pop("cation") return res_dict
[docs]def res_dict_from_datafile(filename: str) -> Dict[str, str]: """ Infer res_dict (residue selection) from a LAMMPS data file. Args: filename: Path to the data file. The data file must be generated by a CombinedData object. return: A dictionary of resnames. """ res_dict = {} with open(filename, "r") as f: lines = f.readlines() if lines[0] == "Generated by pymatgen.io.lammps.data.LammpsData\n" and lines[1].startswith("#"): elyte_info = re.findall(r"\w+", lines[1]) it = iter(elyte_info) idx = 1 for num in it: name = next(it) if name.isnumeric(): frag = int(name) name = next(it) names = [name + c for c in string.ascii_lowercase[0:frag]] start = idx idx += int(num) * frag for i, n in enumerate(names): res_dict[n] = "same mass as resid " + str(start + i) else: start = idx idx += int(num) end = idx res_dict[name] = "resid " + str(start) + "-" + str(end - 1) return res_dict raise ValueError("The LAMMPS data file should be generated by pymatgen.io.lammps.data.")
[docs]def res_dict_from_lammpsdata(lammps_data: CombinedData) -> Dict[str, str]: """ Infer res_dict (residue selection) from a LAMMPS data file. Args: lammps_data: A CombinedData object. return: A dictionary of resnames. """ assert isinstance(lammps_data, CombinedData) idx = 1 res_dict = {} if hasattr(lammps_data, "frags"): for name, num, frag in zip(lammps_data.names, lammps_data.nums, lammps_data.frags): if frag == 1: start = idx idx += num end = idx res_dict[name] = "resid " + str(start) + "-" + str(end - 1) else: names = [name + c for c in string.ascii_lowercase[0:frag]] start = idx idx += int(num) * frag for i, n in enumerate(names): res_dict[n] = "same mass as resid " + str(start + i) else: for name, num in zip(lammps_data.names, lammps_data.nums): start = idx idx += num end = idx res_dict[name] = "resid " + str(start) + "-" + str(end - 1) return res_dict
[docs]def select_dict_from_resname(u: Universe) -> Dict[str, str]: """ Infer select_dict (possibly interested atom species selection) from resnames in a MDAnalysis.universe object. The resname must be pre-assigned already. Args: u: The universe object to work with. return: A dictionary of atom species. """ select_dict: Dict[str, str] = {} resnames = np.unique(u.residues.resnames) for resname in resnames: if resname == "": continue residue = u.select_atoms("resname " + resname).residues[0] if np.isclose(residue.charge, 0, atol=1e-5): # np.sum(residue.atoms.charges) if len(residue.atoms.fragments) == 2: for i, frag in enumerate(residue.atoms.fragments): charge = np.sum(frag.charges) if charge > 0.001: extract_atom_from_ion(True, frag, select_dict) elif charge < -0.001: extract_atom_from_ion(False, frag, select_dict) else: extract_atom_from_molecule(resname, frag, select_dict, number=i + 1) elif len(residue.atoms.fragments) >= 2: cation_number = 1 anion_number = 1 molecule_number = 1 for frag in residue.atoms.fragments: charge = np.sum(frag.charges) if charge > 0.001: extract_atom_from_ion(True, frag, select_dict, cation_number) cation_number += 1 elif charge < -0.001: extract_atom_from_ion(False, frag, select_dict, anion_number) anion_number += 1 else: extract_atom_from_molecule(resname, frag, select_dict, molecule_number) molecule_number += 1 else: extract_atom_from_molecule(resname, residue, select_dict) elif residue.charge > 0: extract_atom_from_ion(True, residue, select_dict) else: extract_atom_from_ion(False, residue, select_dict) return select_dict
[docs]def extract_atom_from_ion(positive: bool, ion: Union[Residue, AtomGroup], select_dict: Dict[str, str], number: int = 0): """ Assign the most most charged atom and/or one unique atom in the ion into select_dict. Args: positive: Whether the charge of ion is positive. Otherwise negative. Default to True. ion: Residue or AtomGroup select_dict: A dictionary of atom species, where each atom species name is a key and the corresponding values are the selection language. number: The serial number of the ion. """ if positive: if number == 0: cation_name = "cation" else: cation_name = "cation_" + str(number) if len(ion.atoms.types) == 1: select_dict[cation_name] = "type " + ion.atoms.types[0] else: # The most positively charged atom in the cation pos_center = ion.atoms[np.argmax(ion.atoms.charges)] unique_types = np.unique(ion.atoms.types, return_counts=True) # One unique atom in the cation uni_center = unique_types[0][np.argmin(unique_types[1])] if pos_center.type == uni_center: select_dict[cation_name] = "type " + uni_center else: select_dict[cation_name + "_" + pos_center.name + pos_center.type] = "type " + pos_center.type select_dict[cation_name] = "type " + uni_center else: if number == 0: anion_name = "anion" else: anion_name = "anion_" + str(number) if len(ion.atoms.types) == 1: select_dict[anion_name] = "type " + ion.atoms.types[0] else: # The most negatively charged atom in the anion neg_center = ion.atoms[np.argmin(ion.atoms.charges)] unique_types = np.unique(ion.atoms.types, return_counts=True) # One unique atom in the anion uni_center = unique_types[0][np.argmin(unique_types[1])] if neg_center.type == uni_center: select_dict[anion_name] = "type " + uni_center else: select_dict[anion_name + "_" + neg_center.name + neg_center.type] = "type " + neg_center.type select_dict[anion_name] = "type " + uni_center
[docs]def extract_atom_from_molecule( resname: str, molecule: Union[Residue, AtomGroup], select_dict: Dict[str, str], number: int = 0 ): """ Assign the most negatively charged atom in the molecule into select_dict Args: resname: The name of the molecule molecule: The Residue or AtomGroup obj of the molecule. select_dict: A dictionary of atom species, where each atom species name is a key and the corresponding values are the selection language. number: The serial number of the molecule under the name of resname. """ # neg_center = residue.atoms[np.argmin(residue.atoms.charges)] # select_dict[resname + "-" + neg_center.name + neg_center.type] = "type " + neg_center.type # pos_center = residue.atoms[np.argmax(residue.atoms.charges)] # select_dict[resname + "+" + pos_center.name + pos_center.type] = "type " + pos_center.type # The most negatively charged atom in the anion if number > 0: resname = resname + "_" + str(number) neg_center = molecule.atoms[np.argmin(molecule.atoms.charges)] select_dict[resname] = "type " + neg_center.type
[docs]def ff_parser(ff_dir: str, xyz_dir: str) -> str: """ A parser to convert a force field field from Maestro format to LAMMPS data format. Args: ff_dir: The path to the Maestro force field file. xyz_dir: The path to the xyz structure file. Return: The output LAMMPS data string. """ with open(xyz_dir, "r") as f_xyz: molecule = pd.read_table(f_xyz, skiprows=2, delim_whitespace=True, names=["atom", "x", "y", "z"]) coordinates = molecule[["x", "y", "z"]] lo = coordinates.min().min() - 0.5 hi = coordinates.max().max() + 0.5 with open(ff_dir, "r") as f: lines_org = f.read() lines = lines_org.split("\n\n") atoms = "\n".join(lines[4].split("\n", 4)[4].split("\n")[:-1]) dfs = {} dfs["atoms"] = pd.read_csv( StringIO(atoms), names=SECTION_SORTER.get("atoms", {}).get("in_header"), delim_whitespace=True, usecols=[0, 4, 5, 6], ) dfs["atoms"] = pd.concat([dfs["atoms"], coordinates], axis=1) dfs["atoms"].index += 1 dfs["atoms"].index.name = "type" dfs["atoms"] = dfs["atoms"].reset_index() dfs["atoms"].index += 1 types = dfs["atoms"].copy().reset_index().set_index("atom")["type"] replace_dict = { "atom1": dict(types), "atom2": dict(types), "atom3": dict(types), "atom4": dict(types), } counts = {} counts["atoms"] = len(dfs["atoms"].index) mass_list = [] for index, row in dfs["atoms"].iterrows(): mass_list.append(MM_of_Elements.get(re.split(r"(\d+)", row["atom"])[0])) mass_df = pd.DataFrame(mass_list) mass_df.index += 1 mass_string = mass_df.to_string(header=False, index_names=False, float_format="{:.3f}".format) masses = ["Masses", mass_string] ff = ["Pair Coeffs"] dfs["atoms"]["mol-id"] = 1 atom_ff_string = dfs["atoms"][SECTION_SORTER["atoms"]["ff_header"]].to_string(header=False, index_names=False) ff.append(atom_ff_string) topo = ["Atoms"] atom_topo_string = dfs["atoms"][SECTION_SORTER["atoms"]["topo_header"]].to_string( header=False, index_names=False ) topo.append(atom_topo_string) for section in list(SECTION_SORTER.keys())[1:]: if SECTION_SORTER[section]["in_kw"] in lines_org: a, b, c, d = ( SECTION_SORTER[section]["sec_number"], SECTION_SORTER[section]["desired_split"], SECTION_SORTER[section]["desired_cols"], SECTION_SORTER[section]["in_header"], ) section_str = lines[a].split("\n", b)[b] dfs[section] = pd.read_csv( StringIO(section_str), names=d, delim_whitespace=True, usecols=list(range(c)), ) dfs[section].index += 1 dfs[section].index.name = "type" dfs[section] = dfs[section].replace(replace_dict) dfs[section] = dfs[section].reset_index() dfs[section].index += 1 if section == "impropers": dfs[section]["v1"] = dfs[section]["v2"] / 2 dfs[section]["v2"] = -1 dfs[section]["v3"] = 2 ff_string = dfs[section][SECTION_SORTER[section]["ff_header"]].to_string( header=False, index_names=False ) ff.append(SECTION_SORTER[section]["out_kw"][0]) ff.append(ff_string) topo_string = dfs[section][SECTION_SORTER[section]["topo_header"]].to_string( header=False, index_names=False ) topo.append(SECTION_SORTER[section]["out_kw"][1]) topo.append(topo_string) counts[section] = len(dfs[section].index) max_stats = len(str(max(list(counts.values())))) stats_template = "{:>" + str(max_stats) + "} {}" count_lines = [stats_template.format(v, k) for k, v in counts.items()] type_lines = [stats_template.format(v, k[:-1] + " types") for k, v in counts.items()] stats = "\n".join(count_lines + [""] + type_lines) header = [ f"LAMMPS data file created by mdgo (by {__author__})\n" "# OPLS force field: harmonic, harmonic, opls, cvff", stats, BOX.format(lo, hi), ] data_string = "\n\n".join(header + masses + ff + topo) + "\n" return data_string
[docs]def concentration_matcher( concentration: float, salt: Union[float, int, str, Molecule], solvents: List[Union[str, Dict[str, float]]], solv_ratio: List[float], num_salt: int = 100, mode: str = "v", radii_type: str = "Bondi", ) -> Tuple[List, float]: """ Estimate the number of molecules of each species in a box, given the salt concentration, salt type, solvent molecular weight, solvent density, solvent ratio and total number of salt. TODO: Auto box size according to Debye screening length Args: concentration: Salt concentration in mol/L. salt: Four types of input are accepted: 1. The salt name in string ('lipf6' or 'litfsi') 2. Salt molar volume in as a float/int (cm^3/mol) 3. A pymatgen Molecule object of the salt structure 4. The path to the salt structure xyz file Valid names are listed in the MOLAR_VOLUME dictionary at the beginning of this file and currently include only 'lipf6' or 'litfsi' If a Molecule or structure file is provided, mdgo will estimate the molar volume according to the VdW radii of the atoms. The specific radii used depend on the value of the 'radii_type' kwarg (see below). solvents: A list of solvent molecules. A molecule could either be a name (e.g. "water" or "ethylene carbonate") or a dict containing two keys "mass" and "density" in g/mol and g/mL, respectively. Valid names are listed in the ALIAS dictionary at the beginning of this file. solv_ratio: A list of relative weights or volumes of solvents. Must be the same length as solvents. For example, for a 30% / 70% (w/w) mix of two solvent, pass [0.3, 0.7] or [30, 70]. The sum of weights / volumes does not need to be normalized. num_salt: The number of salt in the box. mode: Weight mode (Weight/weight/W/w/W./w.) or volume mode (Volume/volume/V/v/V./v.) for interpreting the ratio of solvents. radii_type: "Bondi", "Lange", or "pymatgen". Bondi and Lange vdW radii are compiled in this package for H, B, C, N, O, F, Si, P, S, Cl, Br, and I. Choose 'pymatgen' to use the vdW radii from pymatgen.Element, which are available for most elements and reflect the latest values in the CRC handbook. Returns: A list of number of molecules in the simulation box, starting with the salt and followed by each solvent in 'solvents'. The list is followed by a float of the approximate length of one side of the box in Å. """ n_solvent = [] n = len(solv_ratio) if n != len(solvents): raise ValueError("solvents and solv_ratio must be the same length!") if isinstance(salt, (float, int)): salt_molar_volume = salt elif isinstance(salt, Molecule): salt_molar_volume = molecular_volume(salt, salt.composition.reduced_formula, radii_type=radii_type) elif isinstance(salt, str): if MOLAR_VOLUME.get(salt.lower()): salt_molar_volume = MOLAR_VOLUME.get(salt.lower(), 0) else: if not os.path.exists(salt): print(f"\nError: Input file '{salt}' not found.\n") sys.exit(1) name = os.path.splitext(os.path.split(salt)[-1])[0] ext = os.path.splitext(os.path.split(salt)[-1])[1] if not ext == ".xyz": print("Error: Wrong file format, please use a .xyz file.\n") sys.exit(1) salt_molar_volume = molecular_volume(salt, name, radii_type=radii_type) else: raise ValueError("Invalid salt type! Salt must be a number, string, or Molecule.") solv_mass = [] solv_density = [] for solv in solvents: if isinstance(solv, dict): solv_mass.append(solv.get("mass")) solv_density.append(solv.get("density")) else: solv_mass.append(MOLAR_MASS[ALIAS[solv.lower()]]) solv_density.append(DENSITY[ALIAS[solv.lower()]]) if mode.lower().startswith("v"): for i in range(n): n_solvent.append(solv_ratio[i] * solv_density[i] / solv_mass[i]) # type: ignore v_solv = sum(solv_ratio) n_salt = v_solv / (1000 / concentration - salt_molar_volume) n_all = [int(m / n_salt * num_salt) for m in n_solvent] n_all.insert(0, num_salt) volume = ((v_solv + salt_molar_volume * n_salt) / n_salt * num_salt) / 6.022e23 return n_all, volume ** (1 / 3) * 1e8 if mode.lower().startswith("w"): for i in range(n): n_solvent.append(solv_ratio[i] / solv_mass[i]) # type: ignore v_solv = np.divide(solv_ratio, solv_density).sum() n_salt = v_solv / (1000 / concentration - salt_molar_volume) n_all = [int(m / n_salt * num_salt) for m in n_solvent] n_all.insert(0, num_salt) volume = ((v_solv + salt_molar_volume * n_salt) / n_salt * num_salt) / 6.022e23 return n_all, volume ** (1 / 3) * 1e8 mode = input("Volume or weight ratio? (w or v): ") return concentration_matcher(concentration, salt_molar_volume, solvents, solv_ratio, num_salt=num_salt, mode=mode)
[docs]def sdf_to_pdb( sdf_file: str, pdb_file: str, write_title: bool = True, version: bool = True, credit: bool = True, pubchem: bool = True, ): """ Convert SDF file to PDB file. Args: sdf_file: Path to the input sdf file. pdb_file: Path to the output pdb file. write_title: Whether to write title in the pdb file. Default to True. version: Whether to version line (remark 4) line in the pdb file. Default to True. credit: Whether to credit line (remark 888) in the pdb file. Default to True. pubchem: Whether the sdf is downloaded from PubChem. Default to True. """ # parse sdf file file with open(sdf_file, "r") as inp: sdf_lines = inp.readlines() sdf = list(map(str.strip, sdf_lines)) if pubchem: title = "cid_" else: title = "" pdb_atoms: List[Dict[str, Any]] = [] # create pdb list of dictionaries atoms = 0 bonds = 0 atom1s = [] atom2s = [] orders = [] for i, line in enumerate(sdf): if i == 0: title += line.strip() + " " elif i in [1, 2]: pass elif i == 3: line_split = line.split() atoms = int(line_split[0]) bonds = int(line_split[1]) elif line.startswith("M END"): break elif i in list(range(4, 4 + atoms)): line_split = line.split() newline = { "ATOM": "HETATM", "serial": int(i - 3), "name": str(line_split[3]), "resName": "UNK", "resSeq": 900, "x": float(line_split[0]), "y": float(line_split[1]), "z": float(line_split[2]), "occupancy": 1.00, "tempFactor": 0.00, "altLoc": str(""), "chainID": str(""), "iCode": str(""), "element": str(line_split[3]), "charge": str(""), "segment": str(""), } pdb_atoms.append(newline) elif i in list(range(4 + atoms, 4 + atoms + bonds)): line_split = line.split() atom1 = int(line_split[0]) atom2 = int(line_split[1]) order = int(line_split[2]) atom1s.append(atom1) atom2s.append(atom2) while order > 1: orders.append([atom1, atom2]) orders.append([atom2, atom1]) order -= 1 else: pass # write pdb file with open(pdb_file, "wt") as outp: if write_title: outp.write(f"TITLE {title:70s}\n") if version: outp.write("REMARK 4 COMPLIES WITH FORMAT V. 3.3, 21-NOV-2012\n") if credit: outp.write("REMARK 888\n" "REMARK 888 WRITTEN BY MDGO (CREATED BY TINGZHENG HOU)\n") for n in range(atoms): line_dict = pdb_atoms[n].copy() if len(line_dict["name"]) == 3: line_dict["name"] = " " + line_dict["name"] # format pdb formatted_line = ( "{:<6s}{:>5d} {:^4s}{:1s}{:>3s} {:1s}{:>4.4}{:1s} " # pylint: disable=C0209 "{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} " "{:<4s}{:>2s}{:<2s}" ).format( line_dict["ATOM"], line_dict["serial"], line_dict["name"], line_dict["altLoc"], line_dict["resName"], line_dict["chainID"], str(line_dict["resSeq"]), line_dict["iCode"], line_dict["x"], line_dict["y"], line_dict["z"], line_dict["occupancy"], line_dict["tempFactor"], line_dict["segment"], line_dict["element"], line_dict["charge"], ) # write outp.write(formatted_line + "\n") bond_lines = [[i] for i in range(atoms + 1)] for i, atom in enumerate(atom1s): bond_lines[atom].append(atom2s[i]) for i, atom in enumerate(atom2s): bond_lines[atom].append(atom1s[i]) for i, odr in enumerate(orders): for j, ln in enumerate(bond_lines): if ln[0] == odr[0]: bond_lines.insert(j + 1, odr) break for i in range(1, len(bond_lines)): bond_lines[i][1:] = sorted(bond_lines[i][1:]) for i in range(1, len(bond_lines)): outp.write("CONECT" + "".join(f"{num:>5d}" for num in bond_lines[i]) + "\n") outp.write("END\n") # final 'END'
[docs]def strip_zeros(items: Union[List[Union[str, float, int]], str]) -> Optional[List[int]]: """ Strip the trailing zeros of a sequence. Args: items: The sequence. Return: A new list of numbers. """ new_items = [int(i) for i in items] while new_items[-1] == 0: new_items.pop() while new_items[0] == 0: new_items.pop(0) if len(new_items) == 0: return None return new_items
if __name__ == "__main__": """ litfsi = Molecule.from_file( "/Users/th/Downloads/package/packmol-17.163/LiTFSI.xyz" ) mols, box_len = concentration_matcher(1.083, "litfsi", ["ec", "emc"], [0.3, 0.7], num_salt=166, mode="w") print(mols) print(box_len) """ sdf_to_pdb( "/Users/th/Downloads/test_mdgo/EC_7303.sdf", "/Users/th/Downloads/test_mdgo/test_util.pdb", )