/**********************************************************************
 * File: distancemat.h
 * Author: Kevin Howe
 * Copyright (C) Genome Research Limited, 2002-
 *-------------------------------------------------------------------
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *   http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *-------------------------------------------------------------------
 * NOTES:
 * Functions and types for the manipulation of Distance Matrices
 **********************************************************************/
#ifndef _DISTANCEMAT
#define _DISTANCEMAT

#include <math.h>

#include "util.h"
#include "align.h"
#include "time.h"

#define MAX_PHYLIP_NAME_LEN 100

/******************* structure definitions ****************************/

typedef float Distance;

struct DistanceMatrix {
  Distance **data;
  int size;
};


/********************** function prototypes ***************************/


/*********************************************************************
 FUNCTION: calc_DistanceMatrix
 DESCRIPTION: 
   Produces a distance matrix from the given multiple alignment
 RETURNS: struct DistanceMatrix
 ARGS:
   A DistanceMatrix to fill in
   A multiple alignment
   A boolean indicating whether or not random columns should be used
     for purposes of bootstrapping
   A boolean indicating whether the Kimura distance adjustment is to 
       be used or not.
 NOTES:
   0. the given DistanceMatrix and Alignment should be of the same order

   1. The matrix produced is in bottom-left triangular format; don't you
   go trying to access that top-right section (I'm warning you...)

   2. At the moment, the function calculates distance based on sequence
   identity, using Kimura's function if that option is raised.

   3. If use_rand_cols is true, then the matrix is constructed using
   random sampling  of columns, for the purposes of bootstrapping. At 
   the moment, the native function 'rand' is used to do this, suitable 
   seeded by time (by the caller). This may prove unsatisfactory...

   4. Where no information is available to determine the distance 
   between two sequences, a value of twice the maximum observed 
   distance is assigned (inspiration from ISMB99 poster by Huson,
   Smith and Warnow).

 *********************************************************************/
void calc_DistanceMatrix(struct DistanceMatrix *, 
			 struct Alignment *,
			 unsigned int,
			 unsigned int );

/*********************************************************************
 FUNCTION: clone_DistanceMatrix
 DESCRIPTION: 
   Produces a brand new DistanceMatrix, identical to the source
 RETURNS: struct DistanceMatrix
 ARGS: 
   A source distane matrix
 NOTES: 
   1. The matrix produced is in bottom-left triangular format; don't you
   go trying to access that top-right section (I'm warning you...)
 *********************************************************************/
struct DistanceMatrix *clone_DistanceMatrix( struct DistanceMatrix *);

/*********************************************************************
 FUNCTION: empty_DistanceMatrix
 DESCRIPTION: 
   Produces an empty distance matrixof the given size, uninitialised
 RETURNS: struct DistanceMatrix
 ARGS: 
   The size of the matrix to be created
 NOTES: 
   1. The matrix produced is in bottom-left triangular format; don't you
   go trying to access that top-right section (I'm warning you...)
 *********************************************************************/
struct DistanceMatrix *empty_DistanceMatrix( unsigned int );

/*********************************************************************
 FUNCTION: free_DistanceMatrix
 DESCRIPTION: 
   Frees the memory for the given distance matrix
 RETURNS:
 ARGS: 
   struct DistanceMatrix *
 NOTES: 
 *********************************************************************/
void *free_DistanceMatrix( struct DistanceMatrix *);

/********************************************************************** 
 FUNCTION: index_DistanceMatrix
 DESCRIPTION: 
   indexes the given distance matrix with the given indices,
   returning the appropraite distance.
 RETURNS: distance (float)
 ARGS: 
   A distance matrix *
   row index
   column index
 NOTES: 
   This function is necessary to account for the fact that the distance 
   matrix may be implemented as a symmtrical or triangular matrix.
   It therefore abstracts the internals of the distance matrix, at the
   cost of a function call for each lookup (is this wise...?)
 **********************************************************************/
Distance index_DistanceMatrix( struct DistanceMatrix *, unsigned int, unsigned int );

/*********************************************************************
 FUNCTION: print_DistanceMatrix
 DESCRIPTION: 
   Prints the given distance matrix.
 RETURNS:
 ARGS: 
   struct DistanceMatrix *
 NOTES: 
   A DistanceMatrix does not exist in isolation in practice but as
   part of a Cluster (this is to maintain the tight coupling between 
   the matrix and the sequences for which it is expressing the distances). 
   Therefore, to read or write a useful distance
   matrix (for compatibility with the phylip package for example)
   use write_phylip_Cluster
 *********************************************************************/
void print_DistanceMatrix( FILE *handle,  struct DistanceMatrix * );


/********************************************************************* 
 FUNCTION: read_phylip_DistanceMatrix
 DESCRIPTION: 
   This function creates a DistanceMatrix from the given input file.
   It also crates a dummy alignment (sequences with just names) and
   puts it in the given Alignment pointer
 RETURNS: struct Cluster *
 ARGS: 
   A file handle
   A pointer to an Alignment pointer
 NOTES: 
   The file is assumed to be the distance matrix file format  used
   by the phlip package:

     4
  Name_1  0.0000   0.6776   0.6786  0.2342
  Name_2  0.6776   0.0000   0.1111  0.9999
  Name_3  0.6786   0.1111   0.0000  0.4444
  Name_4  0.2342   0.9999   0.4444  0.0000
 *********************************************************************/
struct DistanceMatrix *read_phylip_DistanceMatrix( FILE *, struct Alignment **);

/********************************************************************* 
 FUNCTION: write_phylip_DistanceMatrix
 DESCRIPTION: 
   This function takes the given DistanceMatrix and writes it to the
   given file handle in phylip format. The alignment is needed for the
   Sequence names
   format
 RETURNS: 
 ARGS: 
   A file handle
   A DistanceMatrix pointer (cluster.h)
   An Alignment pointer
 NOTES: 
   The file is written in the distance matrix file format used
   by the phlip package:

     4
  Name_1  0.0000   0.6776   0.6786  0.2342
  Name_1  0.6776   0.0000   0.1111  0.9999
  Name_1  0.6786   0.1111   0.0000  0.4444
  Name_1  0.2342   0.9999   0.4444  0.0000
*********************************************************************/

void write_phylip_DistanceMatrix( FILE *, struct DistanceMatrix *, struct Alignment *);


#endif
