00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include <iostream>
00029 #include <algorithm>
00030 #include <functional>
00031
00032 #include "Utils/Utils.h"
00033 #include "IO/InputData.h"
00034
00035 #include "Defaults.h"
00036
00037 #include "IO/ClassMappings.h"
00038
00039 namespace MultiBoost {
00040
00041
00042
00050 InputData::~InputData()
00051 {
00052 for (int i = 0; i < _numExamples; ++i)
00053 delete [] _data[i].pValues;
00054 }
00055
00056 void InputData::initOptions(nor_utils::Args& args)
00057 {
00059
00060 if ( args.hasArgument("-hasfilename") )
00061 _hasFileName = true;
00062
00064
00065 if ( args.hasArgument("-classend") )
00066 _classInLastColumn = true;
00067 }
00068
00069
00070
00071 void InputData::load(const string& fileName, const eInputType inputType, const int verboseLevel)
00072 {
00073 if (verboseLevel > 0)
00074 {
00075 cout << "Loading file " << fileName << "...";
00076 cout.flush();
00077 }
00078
00079 ifstream inFile(fileName.c_str());
00080 if ( !inFile.is_open() )
00081 {
00082 cerr << "\nERROR: Cannot open file <" << fileName << ">!!" << endl;
00083 exit(1);
00084 }
00085
00086
00087
00088 inFile.imbue( locale(locale(), new nor_utils::white_tab) );
00089
00090 _numColumns = (int)nor_utils::count_columns(inFile);
00091 inFile.clear();
00092 inFile.seekg(0);
00093
00094
00095 if (_hasFileName)
00096 --_numColumns;
00097
00098
00099 --_numColumns;
00100
00101
00102
00103
00104 double* pDataArray = NULL;
00105
00106 string tmpFileName;
00107 string tmpClassName;
00108
00109 int classIndex = 0;
00110 _numExamples = 0;
00111
00112 map<int, int> tmpPointsPerClass;
00113
00115 while( !inFile.eof() )
00116 {
00117 if (_hasFileName)
00118 inFile >> tmpFileName;
00119
00120 if (!_classInLastColumn)
00121 inFile >> tmpClassName;
00122
00123 pDataArray = new double[_numColumns];
00124 if (!pDataArray)
00125 {
00126 cerr << "ERROR: Cannot allocate memory for storage!" << endl;
00127 exit(1);
00128 }
00129
00130 for (int j = 0; j < _numColumns; ++j)
00131 inFile >> pDataArray[j];
00132
00133
00134
00135 if ( inFile.eof() )
00136 {
00137 delete [] pDataArray;
00138 break;
00139 }
00140
00141 if (_classInLastColumn)
00142 inFile >> tmpClassName;
00143
00144 int classIdx = ClassMappings::addClassName(tmpClassName);
00145 tmpPointsPerClass[ classIdx ]++;
00146
00147 _data.push_back(Example(pDataArray, classIdx, tmpFileName));
00148 ++_numExamples;
00149 }
00150
00152
00153 const int numClasses = ClassMappings::getNumClasses();
00154
00155 for (int l = 0; l < numClasses; ++l)
00156 _nExamplesPerClass.push_back( tmpPointsPerClass[l] );
00157
00158
00159 initWeights();
00160
00161 #if MB_DEBUG
00162
00163 checkVariances();
00164 #endif
00165
00166 if (verboseLevel > 0)
00167 {
00168 cout << "Done!" << endl;
00169
00170 if (verboseLevel > 1)
00171 {
00172 cout << "Num Columns = " << _numColumns << endl;
00173
00174 for (int l = 0; l < numClasses; ++l)
00175 cout << "Of class '" << ClassMappings::getClassNameFromIdx(l) << "': "
00176 << _nExamplesPerClass[l] << endl;
00177
00178 cout << "Total: " << _numExamples << " examples read." << endl;
00179 }
00180 }
00181
00182 }
00183
00184
00185
00186
00187 void InputData::initWeights()
00188 {
00189 const int numClasses = ClassMappings::getNumClasses();
00190
00191 for (int i = 0; i < _numExamples; ++i)
00192 {
00193 _data[i].weights.resize(numClasses);
00194
00195 for (int l = 0; l < numClasses; ++l)
00196 {
00197
00198
00199
00200 if (l == _data[i].classIdx)
00201 _data[i].weights[l] = 1 / (2*(double)_numExamples);
00202 else
00203 _data[i].weights[l] = 1 / (2*(double)_numExamples*(double)(numClasses-1));
00204 }
00205 }
00206 }
00207
00208
00209
00210 #if MB_DEBUG
00211
00212 void InputData::checkVariances()
00213 {
00214
00215 for (int j = 0; j < _numColumns; ++j)
00216 {
00217 double valueChk = getValue(0, j);
00218 bool hasVariance = false;
00219
00220 for (int i = 1; i < _numExamples; ++i)
00221 {
00222 if ( getValue(i, j) != valueChk)
00223 {
00224 hasVariance = true;
00225 break;
00226 }
00227 }
00228
00229 if (!hasVariance)
00230 cerr << "WARNING!! Column " << j << " has no variance!" << endl;
00231 }
00232 }
00233 #endif // MB_DEBUG
00234
00235 }