ALIGN_CODES = | 'all' | codes of proteins in the alignment |
MATRIX_FILE = | 'family.mat' | the filename of the pairwise distance matrix |
OUTPUT_DIRECTORY = | '' | output directory |
The ALIGN_CODES variable is only used for output, not in calculations, so it does not have to be set.
In addition to the output in the log file, this routine creates file OUTPUT_DIRECTORY/MATRIX_FILE with pairwise sequence distances that can be used directly as the input to the tree making programs of the PHYLIP package, such as KITSCH [Felsenstein, 1985], and also for the DENDROGRAM and PRINCIPAL_COMPONENTS commands. A more general version of this command, which allows a user specified measure for residue-residue differences is SEQUENCE_COMPARISON.
# Example for: ID_TABLE, SEQUENCE_COMPARISON, PRINCIPAL_COMPONENTS, DENDROGRAM # Pairwise sequence identity between sequences in the alignment. # Read all entries in this alignment: READ_ALIGNMENT FILE = 'toxin.ali' # Calculate pairwise sequence identities: ID_TABLE MATRIX_FILE = 'toxin_id.mat' # Calculate pairwise sequence similarities: SET RR_FILE = '$(LIB)/as1.sim.mat', MAX_GAPS_MATCH = 1 READ_MODEL FILE = '2ctx', MODEL_SEGMENT = '1:' '71:' SEQUENCE_COMPARISON MATRIX_FILE = 'toxin.mat', VARIABILITY_FILE = 'toxin.var' WRITE_MODEL FILE = '2ctx.var' # Do principal components clustering using sequence similarities: PRINCIPAL_COMPONENTS MATRIX_FILE = 'toxin.mat', FILE = 'toxin.princ' # Dendrogram in the log file: DENDROGRAM