src/IO/Serialization.cpp

00001 /*
00002 * This file is part of MultiBoost, a multi-class 
00003 * AdaBoost learner/classifier
00004 *
00005 * Copyright (C) 2005 Norman Casagrande
00006 * For informations write to nova77@gmail.com
00007 *
00008 * This library is free software; you can redistribute it and/or
00009 * modify it under the terms of the GNU Lesser General Public
00010 * License as published by the Free Software Foundation; either
00011 * version 2.1 of the License, or (at your option) any later version.
00012 *
00013 * This library is distributed in the hope that it will be useful,
00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016 * Lesser General Public License for more details.
00017 *
00018 * You should have received a copy of the GNU Lesser General Public
00019 * License along with this library; if not, write to the Free Software
00020 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00021 *
00022 */
00023 
00024 #include "IO/Serialization.h"
00025 #include "Utils/Utils.h" // for cmp_nocase
00026 
00027 #include <fstream> // input/output on file
00028 #include <cctype> // for isspace
00029 
00030 namespace MultiBoost {
00031 
00032 // -----------------------------------------------------------------------
00033 
00034 Serialization::Serialization(const string& shypFileName, const string& weakLearnerName)
00035 : _shypFileName(shypFileName)
00036 {
00037    // Clear file
00038    ofstream shypFile(shypFileName.c_str());
00039 
00040    // print the header
00041    shypFile << "<?xml version=\"1.0\"?>" << endl;
00042    shypFile << "<multiboost>" << endl;
00043    shypFile << standardTag("algo", weakLearnerName, 1) << endl;
00044 
00045 }
00046 
00047 // -----------------------------------------------------------------------
00048 
00049 Serialization::~Serialization()
00050 {
00051    // close tag
00052    ofstream shypFile(_shypFileName.c_str(), ios_base::app);
00053    shypFile << "</multiboost>" << endl;
00054 }
00055 
00056 // -----------------------------------------------------------------------
00057 
00058 void Serialization::saveHypotheses(vector<BaseLearner*>& weakHypotheses)
00059 {
00060    // save the weak hypotheses one by one.
00061    for (int i = 0; i < (int)weakHypotheses.size(); ++i)
00062       appendHypothesis(i, weakHypotheses[i]);
00063 }
00064 
00065 // -----------------------------------------------------------------------
00066 
00067 void  Serialization::appendHypothesis(const int iteration, BaseLearner* pWeakHypothesis)
00068 {
00069    // open in append mode
00070    ofstream shypFile(_shypFileName.c_str(), ios_base::app);
00071 
00072    // open the hypothesis tag
00073    shypFile << "\t<weakhyp iter=\"" << iteration << "\">" << endl;
00074    // save the hypothesis
00075    pWeakHypothesis->save(shypFile, 2); 
00076    // close the hypothesis tag
00077    shypFile << "\t</weakhyp>"<< endl;
00078 
00079    // add a separation "comment"
00080    shypFile << "\t<!-- ################################## -->" << endl;
00081 }
00082 
00083 // -----------------------------------------------------------------------
00084 // -----------------------------------------------------------------------
00085 // -----------------------------------------------------------------------
00086 
00087 void UnSerialization::loadHypotheses(const string& shypFileName, 
00088                                      vector<BaseLearner*>& weakHypotheses)
00089 {
00090    // open file
00091    ifstream inFile(shypFileName.c_str());
00092    if (!inFile.is_open())
00093    {
00094       cerr << "ERROR: Cannot open strong hypothesis file <" << shypFileName << ">!" << endl;
00095       exit(1);
00096    }
00097 
00098    // Declares the stream tokenizer
00099    nor_utils::StreamTokenizer st(inFile, "<>\n\r\t");
00100 
00101    // Move until it finds the multiboost tag
00102    if ( !seekSimpleTag(st, "multiboost") )
00103    {
00104       // no multiboost tag found: this is not the correct file!
00105       cerr << "ERROR: Not a valid MultiBoost Strong Hypothesis file!!" << endl;
00106       exit(1);
00107    }
00108 
00109    // Move until it finds the algo tag
00110    string basicLearnerName = seekAndParseEnclosedValue<string>(st, "algo");
00111 
00112    // Check if the weak learner exists
00113    if ( !BaseLearner::RegisteredLearners().hasLearner(basicLearnerName) )
00114    {
00115       cerr << "ERROR: Weak learner <" << basicLearnerName << "> not registered!!" << endl;
00116       exit(1);
00117    }
00118 
00119    string rawTag;
00120    string tag, tagParam, tagValue;
00121 
00122    while (true)
00123    {
00124       // move until the next weak hypothesis
00125       if ( seekParamTag(st, "weakhyp") )
00126       {
00127          // allocate the weak learner object
00128          BaseLearner* pWeakHypothesis = 
00129             BaseLearner::RegisteredLearners().getLearner(basicLearnerName)->create();
00130 
00131          // load it
00132          pWeakHypothesis->load(st);
00133 
00134          // store it in the vector
00135          weakHypotheses.push_back(pWeakHypothesis);
00136       }
00137       else
00138          break;
00139    }
00140 
00141 }
00142 
00143 // -----------------------------------------------------------------------
00144 
00145 bool UnSerialization::seekSimpleTag(nor_utils::StreamTokenizer& st, const string& tag)
00146 {
00147    do{
00148       if ( nor_utils::cmp_nocase( st.next_token(), tag ) )
00149          return true;
00150    } while( st.has_token() );
00151    
00152    return false;   
00153 }
00154 
00155 // -----------------------------------------------------------------------
00156 
00157 bool UnSerialization::seekParamTag(nor_utils::StreamTokenizer& st, const string& tag)
00158 {
00159 
00160    do {
00161       // the full tag. I.e. <tag param="val">
00162       string rawTag = st.next_token();
00163       string tagOnly;
00164       string::const_iterator p = rawTag.begin();
00165 
00166       // get tag name
00167       insert_iterator<string> tagIt(tagOnly, tagOnly.begin());
00168       for ( ; p != rawTag.end(); ++p)
00169       {
00170          if ( isspace(*p) )
00171             break;
00172          *tagIt = *p;
00173       }
00174 
00175       // check if it is the one we are looking for
00176       if ( nor_utils::cmp_nocase( tagOnly, tag ) )
00177          return true;
00178       
00179    } while( st.has_token() );
00180 
00181    return false;
00182 }
00183 
00184 // -----------------------------------------------------------------------
00185 
00186 void UnSerialization::parseParamTag(const string& str, string& tag, string& tagParam, string& paramValue)
00187 {
00188    // simple tag. Return just the string
00189    if ( str.find('=') == string::npos )
00190    {
00191       tag = str;
00192       return;
00193    } 
00194 
00195    tag = "";
00196    tagParam = "";
00197    paramValue = "";
00198 
00199    string::const_iterator p = str.begin();
00200 
00201    // get tag name
00202    insert_iterator<string> tagIt(tag, tag.begin());
00203    for ( ; p != str.end(); ++p)
00204    {
00205       if ( isspace(*p) )
00206          break;
00207       *tagIt = *p;
00208    }
00209 
00210    // skip white spaces
00211    for ( ; isspace(*p) && p != str.end() ; ++p );
00212 
00213    // get param name
00214    insert_iterator<string> paramIt(tagParam, tagParam.begin());
00215    for ( ; p != str.end(); ++p)
00216    {
00217       if (*p == '=')
00218          break;
00219       *paramIt = *p;
00220    }
00221 
00222    // skip white spaces
00223    for ( ; p != str.end() && isspace(*p); ++p );
00224    // skip =
00225    for ( ; p != str.end() && *p == '='; ++p );
00226    // skip white spaces
00227    for ( ; p != str.end() && isspace(*p); ++p );
00228 
00229    // skip opening "
00230    for ( ; *p == '"' && p != str.end() ; ++p );
00231 
00232    // get param value
00233    insert_iterator<string> valueIt(paramValue, paramValue.begin());
00234    for ( ; p != str.end(); ++p)
00235    {
00236       if (*p == '\"')
00237          break;
00238       *valueIt = *p;
00239    }
00240 
00241 }
00242 
00243 // -----------------------------------------------------------------------
00244 
00245 bool UnSerialization::seekAndParseParamTag(nor_utils::StreamTokenizer& st, const string& tag, string& tagParam, string& paramValue)
00246 {
00247    bool tagFound = false;
00248    string rawTag;
00249    string foundTag;
00250    string::const_iterator p;
00251 
00252    tagParam = "";
00253    paramValue = "";
00254 
00255    do {
00256 
00257       // the full tag. I.e. <tag param="val">
00258       rawTag = st.next_token();
00259       foundTag = "";
00260 
00261       p = rawTag.begin();
00262 
00263       // get tag name
00264       insert_iterator<string> tagIt(foundTag, foundTag.begin());
00265       for ( ; p != rawTag.end(); ++p)
00266       {
00267          if ( isspace(*p) )
00268             break;
00269          *tagIt = *p;
00270       }
00271 
00272       if ( nor_utils::cmp_nocase(tag, foundTag) )
00273       {
00274          tagFound = true;
00275          break;
00276       }
00277 
00278    } while( st.has_token() );
00279 
00280    if ( !tagFound )
00281       return false;
00282 
00283    // skip white spaces
00284    for ( ; isspace(*p) && p != rawTag.end() ; ++p );
00285 
00286    // get param name
00287    insert_iterator<string> paramIt(tagParam, tagParam.begin());
00288    for ( ; p != rawTag.end(); ++p)
00289    {
00290       if (*p == '=')
00291          break;
00292       *paramIt = *p;
00293    }
00294 
00295    // skip white spaces
00296    for ( ; p != rawTag.end() && isspace(*p); ++p );
00297    // skip =
00298    for ( ; p != rawTag.end() && *p == '='; ++p );
00299    // skip white spaces
00300    for ( ; p != rawTag.end() && isspace(*p); ++p );
00301 
00302    // skip opening "
00303    for ( ; p != rawTag.end() && *p == '"'; ++p );
00304 
00305    // get param value
00306    insert_iterator<string> valueIt(paramValue, paramValue.begin());
00307    for ( ; p != rawTag.end(); ++p)
00308    {
00309       if (*p == '\"')
00310          break;
00311       *valueIt = *p;
00312    }
00313 
00314    return true;
00315 }
00316 
00317 // -----------------------------------------------------------------------
00318 
00319 } // end of namespace MultiBoost

Generated on Mon Nov 28 21:43:46 2005 for MultiBoost by  doxygen 1.4.5