/******************************************************************

    CBGLA.C

    Ismo Krkkinen

    Randomized GLA for a single number of clusters, if input given
    then only improves the input.

******************************************************************/

#define ProgName       "CBGLA"
#define VersionNumber  "Version 0.1.3"
#define LastUpdated    "9.10.2002" /* iak */

#define  FACTFILE  "cbgla.fac"

#include "parametr.c"
#include "gla.h"
#include "cb.h"
#include "file.h"
#include "interfc.h"
#include "memctrl.h"
#include "random.h"
#include "cb_util.h"
#include "DistCrit/distance.h"
#include "DistCrit/criteria.h"
#include "sortcb.h"

#define MAXFILENAME 512

#ifndef max
#define max(a,b) ((a) > (b) ? (a) : (b))
#endif

void PrintInfo() {
    PrintMessage("%s\t%s\n(last modified %s, compiled %s, %s)\n\n"
        "Randomized GLA for fixed number of clusters\n"
        "Usage: %s data.ts out.cb [out.pa [in.cb [in.pa]]] %coptions\n\n",
        ProgName, VersionNumber, LastUpdated, __DATE__, __TIME__,
	ProgName, OPTION_SYMBOL);
    PrintOptions();
    PrintMessage("\n");
}

void PrintOperatingInfo() {
    if (Value(QuietLevel)) {
        PrintMessage("\n%s %s %s\n\n", ProgName, VersionNumber, LastUpdated);
        PrintSelectedOptions();
    }
}

static char DataName[MAXFILENAME] = "\0";
static char OutCBName[MAXFILENAME] = "\0";
static char OutPAName[MAXFILENAME] = "\0";
static char InCBName[MAXFILENAME] = "\0";
static char InPAName[MAXFILENAME] = "\0";

#define PARAMETER_FILENAME_COUNT 5

static ParameterInfo paraminfo[PARAMETER_FILENAME_COUNT] = { 
    { DataName, "", 0, INFILE },
    { OutCBName, "", 0, OUTFILE },
    { OutPAName, "", 1, OUTFILE },
    { InCBName, "", 2, INFILE },
    { InPAName, "", 3, INFILE } };

void CheckParameters() {
    if (Value(Distance) == EvaluationFunctionDefault) SetValue(Distance,
	ciCriterionDefaultDistance(Value(EvaluationFunction)));
    if (ExistFile(OutCBName) && !Value(Overwrite)) {
	ErrorMessage("Output codebook exists and no overwrite specified.\n");
	ExitProcessing(3);
    }
    if (ExistFile(OutPAName) && !Value(Overwrite)) {
	ErrorMessage("Output partition exists and no overwrite specified.\n");
	ExitProcessing(4);
    }
    if (*InCBName && !ExistFile(InCBName)) {
	ErrorMessage("Input codebook not found.\n");
	ExitProcessing(5);
    }
    if (*InPAName && !ExistFile(InPAName)) {
	ErrorMessage("Input partition not found.\n");
	ExitProcessing(6);
    }
    if (!*DataName || !ExistFile(DataName)) {
	ErrorMessage("Input data file not found.\n");
	ExitProcessing(7);
    }
}

static void Finalize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA);

static float MinimumValue = 0;
static float MaximumValue = 0;
static int FileFormat = 0;

static void Initialize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA) {
    float mmin, mmax;
    int foo;
    if (ReadGenericData(DataName, TS,
	&FileFormat, &MinimumValue, &MaximumValue))
    {
	ErrorMessage("Failed to read data.\n");
	ExitProcessing(12);
    }
    if (*InCBName) {
	if (ReadGenericData(InCBName, CB, &foo, &mmin, &mmax)) {
	    ErrorMessage("Failed to read centroids.\n");
	    ExitProcessing(13);
	}
    } else { /* do random solution */
	/* if no input codebook given, no partitioning either */
	if (FileFormat != RGD_CB && !*OutPAName) {
	    if (Value(QuietLevel) > 1) PrintMessage(
		"Data vectors before duplicate removal: %i,", BookSize(TS));
	    RemoveDuplicatesFromCodebook(TS);
	    if (Value(QuietLevel) > 1)
		PrintMessage(" after: %i.\n", BookSize(TS));
	}
	CreateNewCodebook(CB, Value(NumberOfClusters), TS);
    }
    if (BookSize(TS) < BookSize(CB)) {
        if (Value(ClampBookSize)) {
            CreateNewCodebook(CB, BookSize(TS), TS);
            CopyCodebook(TS, CB);
            CreateNewPartitioning(PA, TS, BookSize(CB));
            if (*OutPAName) {
                PutAllInOwnPartition(TS, PA);
                GenerateOptimalPartitioning(TS, CB, PA);
            }
            Finalize(TS, CB, PA);
            ExitProcessing(0);
        } else {
	    ErrorMessage("More clusters than data vectors.\n");
	    ExitProcessing(10);
        }
    }
    if (*InPAName) {
	if (ReadGenericMapping(InPAName, TS, PA, &foo)) {
	    ErrorMessage("Failed to read mapping.\n");
	    ExitProcessing(14);
	}
    } else CreateNewPartitioning(PA, TS, BookSize(CB));
    if (PartitionCount(PA) != BookSize(CB)) {
	ErrorMessage("Partition and codebook sizes do not match.\n");
	ExitProcessing(11);
    }
}

static void Finalize(TRAININGSET* TS, CODEBOOK* CB, PARTITIONING* PA) {
    if (!*OutPAName) SortCodebook(CB, DATA_ASCENDING);
    else WriteGenericMapping(OutPAName, TS, PA, FileFormat, Value(Overwrite));
    WriteGenericData(OutCBName, CB,
        FileFormat, MinimumValue, MaximumValue, Value(Overwrite));
    FreeCodebook(TS);
    FreeCodebook(CB);
    FreePartitioning(PA);
}

int main(int argc, char** argv) {
    TRAININGSET TS;
    CODEBOOK CB;
    PARTITIONING PA;
    DistanceInfo* DI;
    CriterionInfo* CI;
    float Error;

    /*
    PARTITIONING foo;
    CODEBOOK bar;
    int* Changes;
    int k, n;
    */

    ParseParameters(argc, argv, PARAMETER_FILENAME_COUNT, paraminfo);
    initrandom(Value(RandomNumberSeed));
    CheckParameters();
    PrintOperatingInfo();
    Initialize(&TS, &CB, &PA);
    DI = diNew(&TS, Value(Distance), 0, 0, 0, 0);
    CI = ciNew(&TS, DI, Value(EvaluationFunction), 0, 0, 0, 0, 0, 0);

    /*
    CreateNewPartitioning(&foo, &TS, BookSize(&CB));
    GenerateOptimalPartitioningGeneral(&TS, &CB, &foo, MSE);
    Changes = allocate(sizeof(int) * BookSize(&CB));
    memset(Changes, 1, sizeof(int) * BookSize(&CB));
    ciPartitionOptimally(CI, &CB, &PA, Changes);
    for (k = 0; k < BookSize(&TS); ++k)
        if (Map(&PA, k) != Map(&foo, k))
            ErrorMessage("(%i, %i) ", Map(&PA, k), Map(&foo, k));
    for (k = 0; k < PartitionCount(&PA); ++k) {
        if (CCFreq(&PA, k) != CCFreq(&foo, k))
            ErrorMessage("Eroaa, %i\n", k);
        for (n = 0; n < VectorSize(&TS); ++n)
            if (CCScalar(&PA, k, n) != CCScalar(&foo, k, n))
                ErrorMessage("Eroaa, %i, %i\n", k, n);
    }
    CreateNewCodebook(&bar, BookSize(&CB), &TS);
    GenerateOptimalCodebookGeneral(&TS, &bar, &foo, MSE);
    memset(Changes, 1, sizeof(int) * BookSize(&CB));
    ciCalculateOptimalCentroids(CI, &CB, &PA, Changes);
    for (k = 0; k < BookSize(&CB); ++k) {
        if (VectorFreq(&CB, k) != VectorFreq(&bar, k))
            ErrorMessage("Eroaa frekvenssit, %i, %i, %i\n", k, VectorFreq(&CB, k), VectorFreq(&bar, k));
        for (n = 0; n < VectorSize(&CB); ++n)
            if (VectorScalar(&CB, k, n) != VectorScalar(&bar, k, n))
                ErrorMessage("Eroaa arvot, %i, %i, %i, %i\n",
                    k, n, VectorScalar(&CB, k, n), VectorScalar(&bar, k, n));
    }
    deallocate(Changes);
    return 0;
    */

    if (*InCBName) {
        GeneralizedLloydAlgorithm(&TS, &CB, &PA, Value(GLAIterations), CI);
        Error = ciEvaluate(CI, &CB, &PA);
    } else {
        RandomizedGeneralizedLloydAlgorithm(&TS, &CB, &PA, CI,
            &Error, Value(InitialSolutions), Value(GLAIterations),
            max(0, Value(QuietLevel) - 2),
            Value(TraceData), Value(TraceDataID));
    }

    if (Value(QuietLevel) && (*InCBName || (Value(QuietLevel) < 3)))
        PrintMessage("\nError = %.6g\n\n", Error);
    ciDelete(CI);
    diDelete(DI);
    Finalize(&TS, &CB, &PA);
    return 0;
}

