// Copyright (C) 2014  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_LDA_ABSTRACT_Hh_
#ifdef DLIB_LDA_ABSTRACT_Hh_

#include <map>
#include "../matrix.h"
#include <vector>

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename T
        >
    void compute_lda_transform (
        matrix<T>& X,
        matrix<T,0,1>& M,
        const std::vector<unsigned long>& row_labels,
        unsigned long lda_dims = 500,
        unsigned long extra_pca_dims = 200
    );
    /*!
        requires
            - X.size() != 0
            - row_labels.size() == X.nr()
            - The number of distinct values in row_labels > 1
            - lda_dims != 0
        ensures
            - We interpret X as a collection X.nr() of input vectors, where each row of X
              is one of the vectors.
            - We interpret row_labels[i] as the label of the vector rowm(X,i).
            - This function performs the dimensionality reducing version of linear
              discriminant analysis.  That is, you give it a set of labeled vectors and it
              returns a linear transform that maps the input vectors into a new space that
              is good for distinguishing between the different classes.  In particular,
              this function finds matrices Z and M such that:
                - Given an input vector x, Z*x-M, is the transformed version of x.  That is,
                  Z*x-M maps x into a space where x vectors that share the same class label
                  are near each other. 
                - Z*x-M results in the transformed vectors having zero expected mean.
                - Z.nr() <= lda_dims
                  (it might be less than lda_dims if there are not enough distinct class
                  labels to support lda_dims dimensions).
                - Z.nc() == X.nc()
                - We overwrite the input matrix X and store Z in it.  Therefore, the
                  outputs of this function are in X and M.
            - In order to deal with very high dimensional inputs, we perform PCA internally
              to map the input vectors into a space of at most lda_dims+extra_pca_dims
              prior to performing LDA.
    !*/

// ----------------------------------------------------------------------------------------

    std::pair<double,double> equal_error_rate (
        const std::vector<double>& low_vals,
        const std::vector<double>& high_vals 
    );
    /*!
        ensures
            - This function finds a threshold T that best separates the elements of
              low_vals from high_vals by selecting the threshold with equal error rate.  In
              particular, we try to pick a threshold T such that:
                - for all valid i:
                    - high_vals[i] >= T
                - for all valid i:
                    - low_vals[i] < T
              Where the best T is determined such that the fraction of low_vals >= T is the
              same as the fraction of high_vals < T.
            - Let ERR == the equal error rate.  I.e. the fraction of times low_vals >= T
              and high_vals < T.  Note that 0 <= ERR <= 1.
            - returns make_pair(ERR,T) 
    !*/

// ----------------------------------------------------------------------------------------

    struct roc_point
    {
        double true_positive_rate;
        double false_positive_rate;
        double detection_threshold;
    };

    std::vector<roc_point> compute_roc_curve (
        const std::vector<double>& true_detections,
        const std::vector<double>& false_detections 
    );
    /*!
        requires
            - true_detections.size() != 0
            - false_detections.size() != 0
        ensures
            - This function computes the ROC curve (receiver operating characteristic)
              curve of the given data.  Therefore, we interpret true_detections as
              containing detection scores for a bunch of true detections and
              false_detections as detection scores from a bunch of false detections.  A
              perfect detector would always give higher scores to true detections than to
              false detections, resulting in a true positive rate of 1 and a false positive
              rate of 0, for some appropriate detection threshold.
            - Returns an array, ROC, such that:
                - ROC.size() == true_detections.size()+false_detections.size()
                - for all valid i:
                    - If you were to accept all detections with a score >= ROC[i].detection_threshold 
                      then you would obtain a true positive rate of ROC[i].true_positive_rate and a 
                      false positive rate of ROC[i].false_positive_rate.
                - ROC is ordered such that low detection rates come first.  That is, the
                  curve is swept from a high detection threshold to a low threshold.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_LDA_ABSTRACT_Hh_