Source code for bioat.crisprtools
"""crisprtools.py.
This module provides a toolbox for mining CRISPR-related sequences in metagenomic data.
It includes functionality for identifying Cas candidates associated with CRISPR loci
and for annotating Cas13 candidates from protein sequences. The main class, `CrisprTools`,
provides methods for calling external executables such as Prodigal and Pilercr for protein
prediction and CRISPR identification.
Classes:
CrisprTools: A class containing methods for identifying Cas candidates and Cas13
candidates from genomic data.
Methods:
cas_finder: A method for de novo annotation of Cas candidates from CRISPR loci,
utilizing input fasta files and producing various output files.
cas13_finder: A method for the annotation of Cas13 candidates from protein
fasta files.
Usage:
To use this module, create an instance of `CrisprTools` and call the methods
`cas_finder` or `cas13_finder` with the appropriate parameters to perform
CRISPR analysis on your dataset.
"""
from bioat.lib.libfastx import cas13_finder, cas_finder
from bioat.lib.libpath import check_executable
[docs]
class CrisprTools:
"""CRISPR mining toolbox.
This class provides methods for performing CRISPR analysis on datasets, including
finding Cas proteins and CRISPR sequences.
Attributes:
None
"""
def __init__(self):
pass
[docs]
def cas_finder(
self,
input_fa,
output_faa=None,
output_contig_fa=None,
output_crispr_info_tab=None,
lmin=3000, # 3001 in Nature Methods paper
lmax=None,
extend=10_000,
temp_dir=None,
prodigal=None,
prodigal_mode="meta",
pilercr=None,
rm_temp=True,
log_level="INFO",
):
"""De novo annotation for Cas candidates from neighbors of CRISPR loci.
Args:
input_fa (str): Path to the input metagenome fasta file containing many contigs.
output_faa (str, optional): Path to save the de novo annotated Cas candidates.
output_contig_fa (str, optional): Path to save the whole contigs of de novo annotated Cas candidates.
output_crispr_info_tab (str, optional): Path to save the de novo annotated CRISPR info table (CSV format).
lmin (int, optional): Minimum length for a contig. Defaults to 3000.
lmax (int, optional): Maximum length for a contig. Defaults to None.
extend (int, optional): Distance over which proteins are considered from the start/end of the CRISPR loci. Defaults to 10000.
temp_dir (str, optional): Directory to store temporary files. Defaults to None.
prodigal (str, optional): Path to the Prodigal executable. Defaults to None.
prodigal_mode (str, optional): Mode for Prodigal annotation. Can be "meta" or "single". Defaults to "meta".
pilercr (str, optional): Path to the Pilercr executable. Defaults to None.
rm_temp (bool, optional): If False, temporary files will be kept. Defaults to True.
log_level (str, optional): Logging level; set to "DEBUG" to see logs from prodigal and pilercr. Defaults to "INFO".
"""
check_executable(name=prodigal)
check_executable(name=pilercr)
cas_finder(
input_fa=input_fa,
output_faa=output_faa,
output_contig_fa=output_contig_fa,
output_crispr_info_tab=output_crispr_info_tab,
lmin=lmin,
lmax=lmax,
extend=extend,
temp_dir=temp_dir,
prodigal=prodigal,
prodigal_mode=prodigal_mode,
pilercr=pilercr,
rm_temp=rm_temp,
log_level=log_level,
)
# def cas12_finder(self):
# """Cas12 mining toolbox.
#
# Under development!
# """
# pass
[docs]
def cas13_finder(
self,
input_faa,
output_faa=None,
lmin=200,
lmax=1500,
log_level="INFO",
):
"""De novo annotation for Cas13 candidates from proteins.faa.
Args:
input_faa (str): The input file containing Cas candidates in .faa format.
output_faa (str, optional): The output file for Cas13 candidates in .faa format. Defaults to None.
lmin (int, optional): Minimum length for a Cas candidate. Defaults to 200.
lmax (int, optional): Maximum length for a Cas candidate. Defaults to 1500.
log_level (str, optional): The logging level. Options are 'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'. Defaults to 'INFO'.
"""
cas13_finder(
input_faa=input_faa,
output_faa=output_faa,
lmin=lmin,
lmax=lmax,
log_level=log_level,
)
if __name__ == "__main__":
pass