src/IO/InputData.h

Go to the documentation of this file.
00001 /*
00002 * This file is part of MultiBoost, a multi-class 
00003 * AdaBoost learner/classifier
00004 *
00005 * Copyright (C) 2005 Norman Casagrande
00006 * For informations write to nova77@gmail.com
00007 *
00008 * This library is free software; you can redistribute it and/or
00009 * modify it under the terms of the GNU Lesser General Public
00010 * License as published by the Free Software Foundation; either
00011 * version 2.1 of the License, or (at your option) any later version.
00012 *
00013 * This library is distributed in the hope that it will be useful,
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 * Lesser General Public License for more details.
00017 *
00018 * You should have received a copy of the GNU Lesser General Public
00019 * License along with this library; if not, write to the Free Software
00020 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00021 *
00022 */
00023 
00028 #ifndef __INPUT_DATA_H
00029 #define __INPUT_DATA_H
00030 
00031 #include <vector>
00032 #include <map> // for class mappings
00033 #include <utility> // for pair
00034 #include <fstream> // for I/O
00035 
00036 #include "IO/ClassMappings.h"
00037 #include "Utils/Args.h"
00038 
00039 using namespace std;
00040 
00041 namespace MultiBoost {
00042 
00045 
00046 // A couple of useful typedefs
00047 typedef vector< pair<int, double> >::iterator       vpIterator; 
00048 typedef vector< pair<int, double> >::const_iterator cvpIterator; 
00049 
00055 enum eInputType
00056 {
00057    IT_TRAIN, 
00058    IT_TEST 
00059 };
00060 
00081 class InputData
00082 {
00083 public:
00084 
00089    InputData() : _numColumns(0), _numExamples(0),
00090                  _hasFileName(false), _classInLastColumn(false) {}
00091 
00097    virtual ~InputData();
00098 
00106    virtual void initOptions(nor_utils::Args& args);
00107 
00116    virtual void load(const string& fileName, const eInputType inputType = IT_TRAIN, 
00117                      const int verboseLevel = 1);
00118 
00125    virtual const int  getClass(const int idx) const { return _data[idx].classIdx; }
00126 
00133    virtual const double getValue(const int idx, const int columnIdx) const {return _data[idx].pValues[columnIdx]; }
00134 
00147    virtual const int  getBinaryClass(const int idx, const int classIdx) const 
00148                                     { return _data[idx].classIdx == classIdx ? 1: -1;}
00149 
00156    virtual double   getWeight(const int idx, const int classIdx) const { return _data[idx].weights[classIdx]; }
00157 
00165    virtual void setWeight(const int idx, const int classIdx, const double value) 
00166                           { _data[idx].weights[classIdx] = value; }
00167 
00168    int      getNumColumns() const { return _numColumns; }   
00169    int      getNumExamples() const { return _numExamples; } 
00170 
00176    int      getNumExamplesPerClass(const int classIdx) const { return _nExamplesPerClass[classIdx]; }
00177 
00178 protected:
00179 
00180    int      _numColumns;   
00181    int      _numExamples;  
00182    vector<int>   _nExamplesPerClass;   
00183 
00184    bool     _hasFileName;         
00185    bool     _classInLastColumn;   
00186 
00201    virtual void  initWeights();
00202 
00203 #if MB_DEBUG
00204    void checkVariances(); 
00205 #endif
00206 
00207    // --------------------------------------------------------------------
00208 
00213    struct Example
00214    {
00224       Example(double* pValues, const int classIdx, const string& fileName = "") : 
00225               pValues(pValues), classIdx(classIdx), fileName(fileName) {}
00226 
00227       double*        pValues; 
00228       vector<double> weights; 
00229       int            classIdx; 
00230       string         fileName; 
00231    };
00232 
00233 
00234    vector<Example>   _data;       
00235 
00236 };
00237 
00238 } // end of namespace MultiBoost
00239 
00240 #endif // __INPUT_DATA_H

Generated on Mon Nov 28 21:43:46 2005 for MultiBoost by  doxygen 1.4.5