pycdhit#
A Python interface for CD-HIT package.
Functions
|
Parse file in fasta format. |
|
Write fasta data to a file. |
|
Parse file in clstr format. |
|
Run command cd-hit. |
|
Run command cd-hit-2d. |
|
Run command cd-hit-est. |
|
Run command cd-hit-est-2d. |
|
Run command cd-hit-div. |
|
Run command cd-hit-454. |
|
Run command cd-hit-dup. |
|
Run command cd-hit-lap. |
Classes
|
Base class for command-line programs. |
|
Class for CD-HIT programs. |
- class CommandBase(prog: str, path: str | None = None)#
Base class for command-line programs.
- Parameters:
prog – Name of the program.
path – Path of the program. Default
None
.
- help()#
Print help message.
- set_options(**kwargs) CommandBase #
Set and update options and arguments.
- Parameters:
**kwargs – Options and arguments of the command.
- Returns:
Instance of self with
options
updated.
- run()#
Run the program.
- Returns:
The
CompletedProcess
.
- class CDHIT(prog: str = 'cd-hit', path: str | None = None)#
Class for CD-HIT programs.
- Parameters:
prog – Name of the program. {‘cd-hit’, ‘cd-hit-2d’, ‘cd-hit-est’, ‘cd-hit-est-2d’, ‘cd-hit-454’}, default ‘cd-hit’.
path – Path of the program. Default
None
.
- help()#
Print help message.
- set_options(**kwargs) CDHIT #
Set and update options and arguments.
- Parameters:
**kwargs – Options and arguments of the command.
- Returns:
Instance of self with
options
updated.
- cluster(input1: pandas.DataFrame, input2: pandas.DataFrame | None = None) Tuple[pandas.DataFrame, ...] #
Run the program with
DataFrame
input.- Parameters:
input1 – Input fasta data.
input2 – Input fasta data. Required for 2D programs.
- Returns:
The output fasta and clstr data, as tuple of
DataFrame
.
Note
Specifiy the option ‘o’ to keep the output files.
- run()#
Run the program.
- Returns:
The
CompletedProcess
.
- read_fasta(file: str | Path) pandas.DataFrame #
Parse file in fasta format.
- Parameters:
file – A file path.
- Returns:
The data of fasta file.
- write_fasta(file: str | Path, data: pandas.DataFrame)#
Write fasta data to a file.
- Parameters:
file – A file path.
data – A
DataFrame
with columns ‘identifier’ and ‘sequence’.
- read_clstr(file: str | Path) pandas.DataFrame #
Parse file in clstr format.
- Parameters:
file – A file path.
- Returns:
The data of clstr file.
- cd_hit(**kwargs) CompletedProcess #
Run command cd-hit.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_2d(**kwargs) CompletedProcess #
Run command cd-hit-2d.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_est(**kwargs) CompletedProcess #
Run command cd-hit-est.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_est_2d(**kwargs) CompletedProcess #
Run command cd-hit-est-2d.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_div(**kwargs) CompletedProcess #
Run command cd-hit-div.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_454(**kwargs) CompletedProcess #
Run command cd-hit-454.
If environment variable
CD_HIT_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_dup(**kwargs) CompletedProcess #
Run command cd-hit-dup.
If environment variable
CD_HIT_AUXTOOLS_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.
- cd_hit_lap(**kwargs) CompletedProcess #
Run command cd-hit-lap.
If environment variable
CD_HIT_AUXTOOLS_DIR
exists, it will be used as the path of the program.- Parameters:
**kwargs – Parameters and arguments of the command.
- Returns:
The
CompletedProcess
.- Raises:
CalledProcessError – If command returns non-zero exit status.
FileNotFoundError – If program is not installed.