Homepage Demos Overview Downloads Tutorials Reference
Credits

karmedbandit.h

Go to the documentation of this file.
00001 //-*-c++-*-
00002 #ifndef INCLUDED_karmedbandit_h_
00003 #define INCLUDED_karmedbandit_h_
00004 
00005 #include <vector>
00006 #include <iostream>
00007 #include <stdlib.h>
00008 #include <math.h>
00009 
00010 //!Makes decisions regarding an adversarial k-armed bandit
00011 /*! Uses algorithms described in:
00012  *  The non-stochastic multi-armed bandit problem
00013  *  Auer, Cesa-Bianchi, Freund, and Schapire
00014  *  October 14, 2002
00015  */
00016 class karmedbanditExp3 {
00017  public:
00018   //!constructor, pass the number of arms
00019   karmedbanditExp3(unsigned int k,double gammap)
00020     : w(k,1),lastp(0),last(-1U),g(gammap)
00021   {}
00022 
00023   //!returns the next choice, [0:k-1]
00024   unsigned int decide() {
00025     std::vector<double> p(w.size());
00026     double wsum=0;
00027     cout << "w =";
00028     for(unsigned int i=0; i<w.size(); i++)
00029       cout << ' ' << w[i];
00030     cout << endl;
00031     for(unsigned int i=0; i<w.size(); i++)
00032       wsum+=w[i];
00033     for(unsigned int i=0; i<w.size(); i++)
00034       p[i]=(1-g)*w[i]/wsum+g/w.size();
00035     cout << "p =";
00036     for(unsigned int i=0; i<w.size(); i++)
00037       cout << ' ' << p[i];
00038     cout << endl;
00039     double psum=0;
00040     for(unsigned int i=0; i<w.size(); i++)
00041       psum+=p[i];
00042     double pick=(rand()/(double)RAND_MAX)*psum;
00043     for(unsigned int i=0; i<w.size(); i++) {
00044       pick-=p[i];
00045       if(pick<=0) {
00046         lastp=p[i];
00047         return last=i;
00048       }
00049     }
00050     return -1U;
00051   }
00052   //!call this if you want to reward (r==true) or penalize (r==false) the previous decision
00053   void reward(bool r) {
00054     if(r) {
00055       w[last]*=exp(g/lastp/w.size());
00056       cout << "REWARD! :)" << endl;
00057     } else
00058       cout << "no reward. :(" << endl;
00059   }
00060   //!resets weights
00061   void reset() {
00062     for(unsigned int i=0; i<w.size(); i++)
00063       w[i]=1;
00064   }
00065   //!gets gamma parameter
00066   double getGamma() { return g; }
00067   //!sets gamma parameter
00068   void setGamma(double gammap) { g=gammap; }
00069   //!gets k parameter
00070   unsigned int getK() { return w.size(); }
00071  protected:
00072   std::vector<double> w; //!< the weights
00073   double lastp; //!< prob of last choice
00074   unsigned int last; //!< the last choice
00075   double g; //!< gamma
00076 };
00077 
00078 //!Makes decisions regarding an adversarial k-armed bandit
00079 /*! Uses algorithms described in:
00080  *  The non-stochastic multi-armed bandit problem
00081  *  Auer, Cesa-Bianchi, Freund, and Schapire
00082  *  October 14, 2002
00083  */
00084 class karmedbanditExp3_1 {
00085  public:
00086   //!constructor, pass the number of arms
00087   karmedbanditExp3_1(unsigned int k)
00088     : r(0), gr(0), last(0), G(k,0), exp3(k,0)
00089   {
00090     restart();
00091   }
00092 
00093   //!returns the next choice, [0:k-1]
00094   unsigned int decide() {
00095     double maxG=G[0];
00096     for(unsigned int i=1;i<G.size();i++)
00097       if(G[i]>maxG)
00098         maxG=G[i];
00099     if(maxG>gr-exp3.getK()/exp3.getGamma()) {
00100       restart();
00101       return last=decide();
00102     }
00103     return last=exp3.decide();
00104   }
00105   //!call this if you want to reward (r==true) or penalize (r==false) the previous decision
00106   void reward(bool rew) {
00107     if(rew)
00108       G[last]+=1;
00109     exp3.reward(rew);
00110   }
00111  protected:
00112   //!restarts exp3
00113   void restart() {
00114     std::cout << "Exp3 restart, g=" << std::flush;
00115     unsigned int k=exp3.getK();
00116     gr=(k*log((double)k))/(M_E-1)*pow(4.0,(double)r);
00117     double gammap=sqrt(k*log((double)k)/(M_E-1)/gr);
00118     //    exp3.reset(); //not sure if we're supposed to do this
00119     exp3.setGamma(gammap<1?gammap:1);
00120     std::cout << (gammap<1?gammap:1) << std::endl;
00121     r++;
00122   }
00123   unsigned int r; //!< the number of restarts
00124   double gr; //!< the gamma_r parameter
00125   unsigned int last; //!< the last choice
00126   std::vector<double> G; //!< the G-hat's
00127   karmedbanditExp3 exp3; //!< runs exp3 within this
00128 };
00129 
00130 /*! @file
00131  * @brief Defines karmedbandit - implements an algorithm which makes decisions regarding an adversarial k-armed bandit
00132  * @author ejt (Creator)
00133  *
00134  * $Author: neilh $
00135  * $Name: tekkotsu-2_1 $
00136  * $Revision: 1.3 $
00137  * $State: Rel $
00138  * $Date: 2003/09/18 22:40:27 $
00139  */
00140 
00141 #endif

Tekkotsu v2.1
Generated Tue Mar 16 23:19:13 2004 by Doxygen 1.3.5