Source code for bioat.metatools
from bioat.lib.libjgi import JGIOperator
from bioat.logger import LoggerManager
lm = LoggerManager(mod_name="bioat.metatools")
[docs]
class MetaTools:
"""Metagenome toolbox."""
lm.set_names(cls_name="MetaTools")
[docs]
def JGI_query(
self,
query_info: str | None = None,
xml: str | None = None,
log_fails: str | None = None,
nretry: int = 4,
timeout: int = 60,
regex: str | None = None,
all_get: bool = False,
overwrite_conf: bool = False,
filter_files: bool = False,
proxy_pool: str | None = None,
just_query_xml: bool = False,
syntax_help: bool = False,
usage: bool = False,
log_level: str = "INFO",
):
"""JGI_query: Tool for downloading files from the JGI-IMG database.
This function lists and retrieves files from JGI using the curl API and
returns a list of all files available for download for a given query organism.
The source code is adapted from https://github.com/glarue/jgi-query.
Args:
query_info (str | None):
Organism name formatted per JGI's abbreviation.
Example: 'Nematostella vectensis' is abbreviated by JGI as 'Nemve1'.
The correct abbreviation can be found by searching for the organism on JGI;
the name used in the URL of the 'Info' page for that organism is
the correct abbreviation. The full URL may also be used for this argument.
xml (str | None):
Specify a local XML file for the query instead of retrieving
a new copy from JGI.
log_fails (str | None):
Log file containing URLs to retry downloading from in case of failure.
nretry (int):
Number of times to retry downloading files with errors.
Use 0 to skip such files.
timeout (int):
Timeout (in seconds) for downloading. Set to -1 to disable.
regex (str | None):
Regex pattern to use for auto-selecting and downloading files
without interaction.
all_get (bool):
If True, auto-select and download all files for the query
without interaction.
overwrite_conf (bool):
If True, initiate configuration dialog to overwrite
existing user/password configuration.
filter_files (bool):
Under development. Filter organism results by config categories
instead of reporting all files listed by JGI for the query.
proxy_pool (str | None):
URL for the proxy pool, e.g., http://abc.com:port. See
https://github.com/hermanzhaozzzz/proxy_pool.
just_query_xml (bool):
Set True if you just want to save the XML file.
syntax_help (bool):
If True, provide syntax help in doc mode.
usage (bool):
If True, print verbose usage information and exit.
log_level (str):
Set the logging level. Options include 'CRITICAL', 'ERROR',
'WARNING', 'INFO', 'DEBUG', 'NOTSET'.
"""
# Load or create JGI account info and
# auto check if user info needs to be overwritten.
operator = JGIOperator(
query_info=query_info,
xml=xml,
log_fails=log_fails,
nretry=nretry,
timeout=timeout,
regex=regex,
all_get=all_get,
overwrite_conf=overwrite_conf,
filter_files=filter_files,
proxy_pool=proxy_pool,
just_query_xml=just_query_xml,
syntax_help=syntax_help,
usage=usage,
log_level=log_level,
)
lm.set_names(func_name="JGI_query")
lm.set_log_level(log_level)
lm.logger.debug("run query")
operator.query()
lm.logger.debug("parse xml to json")
operator.parse_xml()
lm.logger.debug(
"start to download; calculate and display total size of selected data"
)
operator.download()