karmedbandit.hGo to the documentation of this file.00001
00002 #ifndef INCLUDED_karmedbandit_h_
00003 #define INCLUDED_karmedbandit_h_
00004
00005 #include <vector>
00006 #include <iostream>
00007 #include <stdlib.h>
00008 #include <math.h>
00009
00010
00011
00012
00013
00014
00015
00016 class karmedbanditExp3 {
00017 public:
00018
00019 karmedbanditExp3(unsigned int k,double gammap)
00020 : w(k,1),lastp(0),last(-1U),g(gammap)
00021 {}
00022
00023
00024 unsigned int decide() {
00025 std::vector<double> p(w.size());
00026 double wsum=0;
00027 cout << "w =";
00028 for(unsigned int i=0; i<w.size(); i++)
00029 cout << ' ' << w[i];
00030 cout << endl;
00031 for(unsigned int i=0; i<w.size(); i++)
00032 wsum+=w[i];
00033 for(unsigned int i=0; i<w.size(); i++)
00034 p[i]=(1-g)*w[i]/wsum+g/w.size();
00035 cout << "p =";
00036 for(unsigned int i=0; i<w.size(); i++)
00037 cout << ' ' << p[i];
00038 cout << endl;
00039 double psum=0;
00040 for(unsigned int i=0; i<w.size(); i++)
00041 psum+=p[i];
00042 double pick=(rand()/(double)RAND_MAX)*psum;
00043 for(unsigned int i=0; i<w.size(); i++) {
00044 pick-=p[i];
00045 if(pick<=0) {
00046 lastp=p[i];
00047 return last=i;
00048 }
00049 }
00050 return -1U;
00051 }
00052
00053 void reward(bool r) {
00054 if(r) {
00055 w[last]*=exp(g/lastp/w.size());
00056 cout << "REWARD! :)" << endl;
00057 } else
00058 cout << "no reward. :(" << endl;
00059 }
00060
00061 void reset() {
00062 for(unsigned int i=0; i<w.size(); i++)
00063 w[i]=1;
00064 }
00065
00066 double getGamma() { return g; }
00067
00068 void setGamma(double gammap) { g=gammap; }
00069
00070 unsigned int getK() { return w.size(); }
00071 protected:
00072 std::vector<double> w;
00073 double lastp;
00074 unsigned int last;
00075 double g;
00076 };
00077
00078
00079
00080
00081
00082
00083
00084 class karmedbanditExp3_1 {
00085 public:
00086
00087 karmedbanditExp3_1(unsigned int k)
00088 : r(0), gr(0), last(0), G(k,0), exp3(k,0)
00089 {
00090 restart();
00091 }
00092
00093
00094 unsigned int decide() {
00095 double maxG=G[0];
00096 for(unsigned int i=1;i<G.size();i++)
00097 if(G[i]>maxG)
00098 maxG=G[i];
00099 if(maxG>gr-exp3.getK()/exp3.getGamma()) {
00100 restart();
00101 return last=decide();
00102 }
00103 return last=exp3.decide();
00104 }
00105
00106 void reward(bool rew) {
00107 if(rew)
00108 G[last]+=1;
00109 exp3.reward(rew);
00110 }
00111 protected:
00112
00113 void restart() {
00114 std::cout << "Exp3 restart, g=" << std::flush;
00115 unsigned int k=exp3.getK();
00116 gr=(k*log((double)k))/(M_E-1)*pow(4.0,(double)r);
00117 double gammap=sqrt(k*log((double)k)/(M_E-1)/gr);
00118
00119 exp3.setGamma(gammap<1?gammap:1);
00120 std::cout << (gammap<1?gammap:1) << std::endl;
00121 r++;
00122 }
00123 unsigned int r;
00124 double gr;
00125 unsigned int last;
00126 std::vector<double> G;
00127 karmedbanditExp3 exp3;
00128 };
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141 #endif
|