Source code for bioat.foldtools
import inspect
import Bio
from Bio.Seq import Seq
from bioat.lib.libpdb import pdb2fasta, show_ref_cut
from bioat.logger import LoggerManager
lm = LoggerManager(mod_name="bioat.foldtools")
[docs]
class FoldTools:
"""Folding toolbox."""
lm.set_names(cls_name="FoldTools")
def __init__(self):
pass
[docs]
def show_ref_cut(
self,
ref_seq: str | Seq,
ref_pdb: str | Bio.PDB.Structure.Structure,
cut_seq: str | Seq | None = None,
cut_pdb: str | Bio.PDB.Structure.Structure | None = None,
ref_color: str = "blue",
ref_color_base: str | None = None,
ref_value_dict: dict | None = None,
cut_color="green",
gap_color="red",
ref_style="cartoon",
cut_style="cartoon",
gap_style="cartoon",
ref_value_random: bool = False,
output_fig: str | None = None,
log_level="WARNING",
**kwargs,
):
"""
Visualizes the alignment of sequences and highlights changes in PDB structures using py3Dmol.
Args:
ref_seq (str or Seq): Amino acid sequence content for the ref protein.
ref_pdb (str or Bio.PDB.Structure.Structure): Path to the PDB file of the reference structure.
cut_seq (str, Seq or None, optional): Amino acid sequence content for the cut protein.
cut_pdb (str, Bio.PDB.Structure.Structure or None, optional): Path to the PDB file of the cut structure.
ref_color (str, optional): Color for reference residues.
ref_color_base (str or None, optional): ref_color_base will be used as base color, and ref_color will be used as target color. If None, do not apply color mapping. Defaults to None.
ref_value_dict (dict or None, optional): A dictionary of values for the ref color map, it will be normalized to the range of [0 - 1]. If None, all residues will be colored with the same color. e.g. ref_value_dict = {'V_0': 0.4177215189873418, 'S_1': 0.8185654008438819, 'K_2': 0.9915611814345991, 'G_3': 0.42616033755274263, ...}
cut_color (str, optional): Color for cut residues.
gap_color (str, optional): Color for gaps or removed residues.
ref_style (str, optional): "stick", "sphere", "cartoon", or "line"
cut_style (str, optional): "stick", "sphere", "cartoon", or "line"
gap_style (str, optional): "stick", "sphere", "cartoon", or "line"
ref_value_random (bool, optional): If True, ref_value_dict will be randomly generated. Defaults to False.
output_fig (str or None, optional): Output figure file path. If None, the figure will not be saved in html format. Defaults to None.
log_level (str, optional): Log level. Defaults to "WARNING".
"""
print(f"Calling show_ref_cut with {kwargs}")
print(f"Actual show_ref_cut module: {inspect.getmodule(show_ref_cut)}")
lm.set_names(func_name="show_ref_cut")
lm.set_level(log_level)
show_ref_cut(
ref_seq=ref_seq,
cut_seq=cut_seq,
ref_pdb=ref_pdb,
cut_pdb=cut_pdb,
ref_color=ref_color,
ref_color_base=ref_color_base,
ref_value_dict=ref_value_dict,
cut_color=cut_color,
gap_color=gap_color,
ref_style=ref_style,
cut_style=cut_style,
gap_style=gap_style,
ref_value_random=ref_value_random,
output_fig=output_fig,
log_level=log_level,
)
lm.logger.debug(
f"""\
Params:
-------
ref_seq: {ref_seq}
ref_pdb: {ref_pdb}
cut_seq: {cut_seq}
cut_pdb: {cut_pdb}
ref_color: {ref_color}
ref_color_base: {ref_color_base}
cut_color: {cut_color}
gap_color: {gap_color}
ref_style: {ref_style}
cut_style: {cut_style}
gap_style: {gap_style}
ref_value_random: {ref_value_random},
output_fig : {output_fig},
log_level: {log_level}"""
)
[docs]
def pdb2fasta(self, input: str, output: str | None = None, log_level="WARNING"):
"""Converts a PDB file to a FASTA file.
This function processes the provided PDB file and extracts protein, DNA,
RNA sequences, and other molecules appropriately to create a FASTA file.
Details:
1. **Proteins**:
The protein sequence for each chain will be extracted as "Chain X Protein".
2. **DNA and RNA**:
Bases for DNA (A, T, G, C) will be saved as "Chain X DNA", and bases for RNA (A, U, G, C) will be saved as "Chain X RNA".
3. **Other molecules**:
Any unrecognized molecules (e.g., ions, modified molecules) will be labeled as [residue] and stored as "Chain X Other molecules".
4. **Multi-chain complexes**:
The program supports multi-chain structures in complexes, and the content of each chain will be recorded separately.
Args:
input (str):
Input file path.
output (str, optional):
Output file path. If None, the output file will be named as the
basename of the input file with a ".fa" extension. Defaults to None.
log_level (str, optional):
Logging level. Defaults to "WARNING".
Returns:
None
"""
lm.set_names(func_name="pdb2fasta")
lm.set_level(log_level)
lm.logger.debug(
f"""\
Params:
-------
input: {input}
output: {output}
log_level: {log_level}"""
)
pdb2fasta(pdb_file=input, output_fasta=output, log_level=log_level)