// Copyright (C) 2010  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_
#ifdef DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_

#include <chrono>

// ----------------------------------------------------------------------------------------

namespace dlib
{
    template <typename matrix_type>
    class oca_problem
    {
        /*!
            REQUIREMENTS ON matrix_type
                - matrix_type == a dlib::matrix capable of storing column vectors

            WHAT THIS OBJECT REPRESENTS
                This object is the interface used to define the optimization 
                problems solved by the oca optimizer defined later in this file.

                OCA solves optimization problems with the following form:
                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)

                    Where R(w) is a user-supplied convex function and C > 0.  Optionally,
                    there can also be non-negativity constraints on some or all of the 
                    elements of w.

                Or it can alternatively solve:
                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)

                    Where prior is a user supplied vector and R(w) has the same
                    interpretation as above.
                       
                Or it can use the elastic net regularizer:
                    Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)

                    Where lasso_lambda is a number in the range [0, 1) and controls
                    trade-off between doing L1 and L2 regularization.  R(w) has the same
                    interpretation as above.
                       

                Note that the stopping condition must be provided by the user
                in the form of the optimization_status() function.
        !*/

    public:

        typedef typename matrix_type::type scalar_type;

        virtual ~oca_problem() {}

        virtual bool risk_has_lower_bound (
            scalar_type& lower_bound
        ) const { return false; }
        /*!
            ensures
                - if (R(w) >= a constant for all values of w) then
                    - returns true
                    - #lower_bound == the constant that lower bounds R(w)
                - else
                    - returns false
        !*/

        virtual bool optimization_status (
            scalar_type current_objective_value,
            scalar_type current_error_gap,
            scalar_type current_risk_value,
            scalar_type current_risk_gap,
            unsigned long num_cutting_planes,
            unsigned long num_iterations
        ) const = 0;
        /*!
            requires
                - This function is called by the OCA optimizer each iteration.  
                - current_objective_value == the current value of the objective function f(w)
                - current_error_gap == The bound on how much lower the objective function
                  can drop before we reach the optimal point.  At the optimal solution the
                  error gap is equal to 0.
                - current_risk_value == the current value of the R(w) term of the objective function.
                - current_risk_gap == the bound on how much lower the risk term can go.  At the optimal
                  solution the risk gap is zero.
                - num_cutting_planes == the number of cutting planes the algorithm is currently using.
                - num_iterations == A count of the total number of iterations that have executed
                  since we started running the optimization.
            ensures
                - If it is appropriate to terminate the optimization then this function returns true
                  and false otherwise.
        !*/

        virtual scalar_type get_c (
        ) const = 0;
        /*!
            ensures
                - returns the C parameter
        !*/

        virtual long get_num_dimensions (
        ) const = 0;
        /*!
            ensures
                - returns the number of free variables in this optimization problem
        !*/

        virtual void get_risk (
            matrix_type& current_solution,
            scalar_type& risk_value,
            matrix_type& risk_subgradient
        ) const = 0;
        /*!
            requires
                - is_col_vector(current_solution) == true
                - current_solution.size() == get_num_dimensions()
            ensures
                - #current_solution will be set to one of the following:
                    - current_solution (i.e. it won't be modified at all)
                    - The result of a line search passing through current_solution.  
                - #risk_value == R(#current_solution) 
                - #risk_subgradient == an element of the subgradient of R() at the 
                  point #current_solution
                - Note that #risk_value and #risk_subgradient are NOT multiplied by get_c()
        !*/

    };

// ----------------------------------------------------------------------------------------

    class oca
    {
        /*!
            INITIAL VALUE
                - get_subproblem_epsilon() == 1e-2
                - get_subproblem_max_iterations() == 50000
                - get_inactive_plane_threshold() == 20
                - get_max_runtime() == std::chrono::hours(24*356*290)
                  (i.e. 290 years, so basically forever)

            WHAT THIS OBJECT REPRESENTS
                This object is a tool for solving the optimization problem defined above
                by the oca_problem abstract class.  

                For reference, OCA solves optimization problems with the following form:
                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)

                    Where R(w) is a user-supplied convex function and C > 0.  Optionally,
                    this object can also add non-negativity constraints to some or all
                    of the elements of w.

                Or it can alternatively solve:
                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)

                    Where prior is a user supplied vector and R(w) has the same
                    interpretation as above.
                       
                Or it can use the elastic net regularizer:
                    Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)

                    Where lasso_lambda is a number in the range [0, 1) and controls
                    trade-off between doing L1 and L2 regularization.  R(w) has the same
                    interpretation as above.
                       

                For a detailed discussion you should consult the following papers
                from the Journal of Machine Learning Research:
                    Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
                        Vojtech Franc, Soren Sonnenburg; 10(Oct):2157--2192, 2009. 

                    Bundle Methods for Regularized Risk Minimization
                        Choon Hui Teo, S.V.N. Vishwanthan, Alex J. Smola, Quoc V. Le; 11(Jan):311-365, 2010. 
        !*/
    public:

        oca (
        ); 
        /*!
            ensures
                - this object is properly initialized
        !*/

        template <
            typename matrix_type
            >
        typename matrix_type::type operator() (
            const oca_problem<matrix_type>& problem,
            matrix_type& w,
            unsigned long num_nonnegative = 0,
            unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
        ) const;
        /*!
            requires
                - problem.get_c() > 0
                - problem.get_num_dimensions() > 0
            ensures
                - solves the given oca problem and stores the solution in #w.  In particular,
                  this function solves:
                    Minimize: f(w) == 0.5*length_squared(w) + C*R(w)
                - The optimization algorithm runs until problem.optimization_status() 
                  indicates it is time to stop.
                - returns the objective value at the solution #w
                - if (num_nonnegative != 0) then
                    - Adds the constraint that #w(i) >= 0 for all i < num_nonnegative.
                      That is, the first num_nonnegative elements of #w will always be
                      non-negative.  This includes the copies of w passed to get_risk()
                      in the form of the current_solution vector as well as the final
                      output of this function.
                - if (force_weight_to_1 < problem.get_num_dimensions()) then
                    - The optimizer enforces the following constraints:
                        - #w(force_weight_to_1) == 1
                        - for all i > force_weight_to_1:
                            - #w(i) == 0 
                        - That is, the element in the weight vector at the index indicated
                          by force_weight_to_1 will have a value of 1 upon completion of
                          this function, while all subsequent elements of w will have
                          values of 0.
        !*/

        template <
            typename matrix_type
            >
        typename matrix_type::type operator() (
            const oca_problem<matrix_type>& problem,
            matrix_type& w,
            const matrix_type& prior
        ) const;
        /*!
            requires
                - problem.get_c() > 0
                - problem.get_num_dimensions() > 0
                - is_col_vector(prior) == true
                - prior.size() == problem.get_num_dimensions()
            ensures
                - solves the given oca problem and stores the solution in #w.
                - In this mode, we solve a version of the problem with a different
                  regularizer.  In particular, this function solves:
                    Minimize: f(w) == 0.5*length_squared(w-prior) + C*R(w)
                - The optimization algorithm runs until problem.optimization_status() 
                  indicates it is time to stop.
                - returns the objective value at the solution #w
        !*/

        template <
            typename matrix_type
            >
        typename matrix_type::type solve_with_elastic_net (
            const oca_problem<matrix_type>& problem,
            matrix_type& w,
            scalar_type lasso_lambda,
            unsigned long force_weight_to_1 = std::numeric_limits<unsigned long>::max()
        ) const;
        /*!
            requires
                - problem.get_c() > 0
                - problem.get_num_dimensions() > 0
                - 0 <= lasso_lambda < 1
            ensures
                - Solves the given oca problem and stores the solution in #w, but uses an
                  elastic net regularizer instead of the normal L2 regularizer.  In
                  particular, this function solves:
                    Minimize: f(w) == 0.5*(1-lasso_lambda)*length_squared(w) + lasso_lambda*sum(abs(w)) + C*R(w)
                - The optimization algorithm runs until problem.optimization_status() 
                  indicates it is time to stop.
                - returns the objective value at the solution #w
                - if (force_weight_to_1 < problem.get_num_dimensions()) then
                    - The optimizer enforces the following constraints:
                        - #w(force_weight_to_1) == 1
                        - for all i > force_weight_to_1:
                            - #w(i) == 0 
                        - That is, the element in the weight vector at the index indicated
                          by force_weight_to_1 will have a value of 1 upon completion of
                          this function, while all subsequent elements of w will have
                          values of 0.
        !*/

        void set_subproblem_epsilon (
            double eps
        ); 
        /*!
            requires
                - eps > 0
            ensures
                - #get_subproblem_epsilon() == eps 
        !*/

        double get_subproblem_epsilon (
        ) const; 
        /*!
            ensures
                - returns the accuracy used in solving the quadratic programming
                  subproblem that is part of the overall OCA algorithm.
        !*/

        void set_subproblem_max_iterations (
            unsigned long sub_max_iter
        ); 
        /*!
            requires
                - sub_max_iter > 0
            ensures
                - #get_subproblem_max_iterations() == sub_max_iter
        !*/

        unsigned long get_subproblem_max_iterations (
        ) const; 
        /*!
            ensures
                - returns the maximum number of iterations this object will perform
                  while attempting to solve each quadratic programming subproblem.
        !*/

        void set_inactive_plane_threshold (
            unsigned long inactive_thresh
        ); 
        /*!
            requires
                - inactive_thresh > 0
            ensures
                - #get_inactive_plane_threshold() == inactive_thresh
        !*/

        unsigned long get_inactive_plane_threshold (
        ) const; 
        /*!
            ensures
                - As OCA runs it builds up a set of cutting planes.  Typically
                  cutting planes become inactive after a certain point and can then
                  be removed.  This function returns the number of iterations of
                  inactivity required before a cutting plane is removed.
        !*/

        void set_max_runtime (
            const std::chrono::nanoseconds& max_runtime
        ) const;
        /*!
            ensures
                - #get_max_runtime() == max_runtime
        !*/

        std::chrono::nanoseconds get_max_runtime (
        ) const;
        /*!
            ensures
                - returns the maximum amount of time we will let the solver run before 
                  making it terminate.
        !*/

    };
}

// ----------------------------------------------------------------------------------------

#endif // DLIB_OPTIMIZATION_OCA_ABsTRACT_Hh_