/******************************************************************

    CBRLS.C

    Ismo Krkkinen

    Randomized local search for a single number of clusters.

******************************************************************/

#define ProgName       "CBRLS"
#define VersionNumber  "Version 0.2.9"
#define LastUpdated    "6.4.2004" /* iak */

#define  FACTFILE  "cbrls.fac"

#include "parametr.c"
#include "rls.h"
#include "cb.h"
#include "file.h"
#include "interfc.h"
#include "memctrl.h"
#include "random.h"
#include "cb_util.h"
#include "DistCrit/distance.h"
#include "DistCrit/criteria.h"
#include "iter.h"
#include "sortcb.h"

#define MAXFILENAME 512

#ifndef max
#define max(a,b) ((a) > (b) ? (a) : (b))
#endif

void PrintInfo() {
    PrintMessage("%s\t%s\n(last modified %s, compiled %s, %s)\n\n"
        "Randomized local search for fixed number of clusters\n"
        "Usage: %s data.ts out.cb [out.pa [in.cb [in.pa]]] %coptions\n\n",
        ProgName, VersionNumber, LastUpdated, __DATE__, __TIME__,
	ProgName, OPTION_SYMBOL);
    PrintOptions();
    PrintMessage("\n");
}

void PrintOperatingInfo() {
    if (Value(QuietLevel)) {
        PrintMessage("\n%s %s %s\n\n", ProgName, VersionNumber, LastUpdated);
        PrintSelectedOptions();
    }
}

static char DataName[MAXFILENAME] = "\0";
static char OutCBName[MAXFILENAME] = "\0";
static char OutPAName[MAXFILENAME] = "\0";
static char InCBName[MAXFILENAME] = "\0";
static char InPAName[MAXFILENAME] = "\0";

#define PARAMETER_FILENAME_COUNT 5

static ParameterInfo paraminfo[PARAMETER_FILENAME_COUNT] = { 
    { DataName, "", 0, INFILE },
    { OutCBName, "", 0, OUTFILE },
    { OutPAName, "", 1, OUTFILE },
    { InCBName, "", 2, INFILE },
    { InPAName, "", 3, INFILE } };

void CheckParameters() {
    if (Value(Distance) == EvaluationFunctionDefault) SetValue(Distance,
	ciCriterionDefaultDistance(Value(EvaluationFunction)));
    if ((Value(HaltingCriterion) == TTimes) && Value(HaltingLimit) < 1001) {
	ErrorMessage(
	    "Multiplier for T-times should be at least 1001 (1.001).\n");
	ExitProcessing(1);
    }
    if ((Value(SwapMethod) == NoSwap) && !Value(GLAIterations)) {
	ErrorMessage("No swap and no GLA-iterations: nothing happens.\n");
	ExitProcessing(2);
    }
    if (ExistFile(OutCBName) && !Value(Overwrite)) {
	ErrorMessage("Output codebook exists and no overwrite specified.\n");
	ExitProcessing(3);
    }
    if (ExistFile(OutPAName) && !Value(Overwrite)) {
	ErrorMessage("Output partition exists and no overwrite specified.\n");
	ExitProcessing(4);
    }
    if (*InCBName && !ExistFile(InCBName)) {
	ErrorMessage("Input codebook not found.\n");
	ExitProcessing(5);
    }
    if (*InPAName && !ExistFile(InPAName)) {
	ErrorMessage("Input partition not found.\n");
	ExitProcessing(6);
    }
    if (!*DataName || !ExistFile(DataName)) {
	ErrorMessage("Input data file not found.\n");
	ExitProcessing(7);
    }
    if (Value(HaltingCriterion) == TTimes) SetValue(HaltingLimit,
	(int)(1000000 / (Value(HaltingLimit) / 1000.0)));
}

static void Finalize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA);

static float MinimumValue = 0;
static float MaximumValue = 0;
static int FileFormat = 0;

static void Initialize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA) {
    int k;
    float mmin, mmax;
    int foo;
    if (ReadGenericData(DataName, TS,
	&FileFormat, &MinimumValue, &MaximumValue))
    {
	ErrorMessage("Failed to read data.\n");
	ExitProcessing(12);
    }
    if (*InCBName) {
	if (ReadGenericData(InCBName, CB, &foo, &mmin, &mmax)) {
	    ErrorMessage("Failed to read centroids.\n");
	    ExitProcessing(13);
	}
    } else { /* do random solution */
	/* if no input codebook given, no partitioning either */
	if (FileFormat != RGD_CB && !*OutPAName) {
	    if (Value(QuietLevel) > 1) PrintMessage(
		"Data vectors before duplicate removal: %i,", BookSize(TS));
	    RemoveDuplicatesFromCodebook(TS);
	    if (Value(QuietLevel) > 1)
		PrintMessage(" after: %i.\n", BookSize(TS));
	}
	CreateNewCodebook(CB, Value(NumberOfClusters), TS);
    }
    if (BookSize(TS) <= BookSize(CB)) {
        if (Value(ClampBookSize)) {
            if (BookSize(TS) < BookSize(CB))
                DecreaseCodebookSize(CB, BookSize(TS));
            CopyCodebook(TS, CB);
            CreateNewPartitioning(PA, TS, BookSize(CB));
            if (*OutPAName) {
                PutAllInOwnPartition(TS, PA);
                GenerateOptimalPartitioning(TS, CB, PA);
            }
            Finalize(TS, CB, PA);
            ExitProcessing(0);
        } else {
	    ErrorMessage("More clusters than data vectors.\n");
	    ExitProcessing(10);
        }
    } else if (!*InCBName) GenerateRandomCodebook(TS, CB); /* Safe here. */
    if (*InPAName) {
	if (ReadGenericMapping(InPAName, TS, PA, &foo)) {
	    ErrorMessage("Failed to read mapping.\n");
	    ExitProcessing(14);
	}
    } else {
	CreateNewPartitioning(PA, TS, BookSize(CB));
	GenerateOptimalPartitioning(TS, CB, PA);
	if (!*InCBName) GenerateOptimalCodebook(TS, CB, PA);
    }
    if (PartitionCount(PA) != BookSize(CB)) {
	ErrorMessage("Partition and codebook sizes do not match.\n");
	ExitProcessing(11);
    }
    for (k = 0; k < BookSize(CB); k++) VectorFreq(CB, k) = CCFreq(PA, k);
}

static void Finalize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA) {
    if (!*OutPAName) SortCodebook(CB, DATA_ASCENDING);
    else WriteGenericMapping(OutPAName, TS, PA, FileFormat, Value(Overwrite));
    WriteGenericData(OutCBName, CB,
	FileFormat, MinimumValue, MaximumValue, Value(Overwrite));
    FreeCodebook(TS);
    FreeCodebook(CB);
    FreePartitioning(PA);
}


int main(int argc, char** argv) {
    TRAININGSET TS;
    CODEBOOK CB;
    PARTITIONING PA;
    DistanceInfo* DI;
    CriterionInfo* CI;
    Iterations* Iter;
    int ReturnValue;
    float Error;

    ParseParameters(argc, argv, PARAMETER_FILENAME_COUNT, paraminfo);
    initrandom(Value(RandomNumberSeed));
    CheckParameters();
    PrintOperatingInfo();
    Initialize(&TS, &CB, &PA);
    DI = diNew(&TS, Value(Distance), 0, 0, 0, 0);
    CI = ciNew(&TS, DI, Value(EvaluationFunction), 0, 0, 0, 0, 0, 0);
    Iter = itNew(Value(HaltingCriterion) + 1, Value(HaltingIterations),
	Value(HaltingLimit) / 1000000.0f, 0, 0);

    ReturnValue = RandomizedLocalSearch(&TS, &CB, &PA, CI, &Error,
	Iter, Value(GLAIterations), Value(SwapMethod), 
	max(0, Value(QuietLevel) - 2),
	Value(TraceData), Value(TraceDataID));

    if (Value(QuietLevel) && (Value(QuietLevel) < 3))
        PrintMessage("\nError = %.6g\n\n", Error);
    itDelete(Iter);
    ciDelete(CI);
    diDelete(DI);
    Finalize(&TS, &CB, &PA);
    return ReturnValue;
}

