hops
ThompsonSampling.hpp
Go to the documentation of this file.
1 #ifndef HOPS_THOMPSONSAMPLING_HPP
2 #define HOPS_THOMPSONSAMPLING_HPP
3 
8 
9 #include <cmath>
10 #include <limits>
11 #include <memory>
12 #include <chrono>
13 
14 namespace hops {
15  namespace internal {
16  template<typename ReturnType, typename InputType>
18  virtual std::tuple<ReturnType, ReturnType> operator()(const InputType&) = 0;
19  };
20  }
21 
22  template<typename MatrixType, typename VectorType, typename GaussianProcessType>
24  public:
26 
27  static bool optimize (const size_t numberOfPosteriorUpdates,
28  const size_t numberOfSamplingRounds,
29  const size_t numberOfRoundsForConvergence,
30  GaussianProcessType& initialGP,
31  std::shared_ptr<ThompsonSamplgTargetType> targetFunction,
32  const MatrixType& inputSpaceGrid,
33  RandomNumberGenerator randomNumberGenerator,
34  size_t* numberOfPosteriorUpdatesNeeded = nullptr,
35  double smoothingLength = 0) {
36  size_t maxElementIndex;
37  bool isConverged = false;
38 
39  size_t newMaximumPosteriorMeanIndex = 0, oldMaximumPosteriorMeanIndex = 0, sameMaximumCounter = 0;
40  GaussianProcess gp = initialGP.getPriorCopy();
41 
42  UniformBallKernel smoothingKernel = UniformBallKernel<MatrixType, VectorType>(smoothingLength);
44 
45  std::unordered_map<long, long> observedInputIndex;
46  std::vector<long> observedInputCount;
47  MatrixType observedInput(0, inputSpaceGrid.cols());
48  VectorType observedValueMean(0);
49  VectorType observedValueErrorMean(0);
50 
51  size_t i = 0;
52  for (; i < numberOfPosteriorUpdates; ++i) {
53  // if the posterior mean hasn't change for the last sameMaximumCounter of rounds,
54  // then we assume convergence and break
55  if (sameMaximumCounter == numberOfRoundsForConvergence) {
56  isConverged = true;
57  break;
58  }
59 
60  for (size_t j = 0; j < numberOfSamplingRounds; ++j) {
61  // sample the acquisition function and obtain its maximum
62  gp.sample(inputSpaceGrid, randomNumberGenerator, maxElementIndex);
63  VectorType testInput = inputSpaceGrid.row(maxElementIndex);
64 
65  // evaluate stepsize which maximized the sampled acquisition function
66  auto[newObservedValue, newObservedValueError] = (*targetFunction)(testInput);
67 
68  // aggregate data if this stepsize has been tested before
69  if (observedInputIndex.count(maxElementIndex)) {
70  size_t k = observedInputIndex[maxElementIndex];
71 
72  // increment counter
73  size_t m = observedInputCount[k];
74  observedInputCount[k] += 1;
75 
76  double oldObservedValueMean = observedValueMean(k); // store for variance update
77  observedValueMean(k) = (m * observedValueMean(k) + newObservedValue) / (m+1);
78  observedValueErrorMean(k) =
79  (m * (observedValueErrorMean(k) + std::pow(oldObservedValueMean, 2)) +
80  1 * (newObservedValueError + std::pow(newObservedValue, 2))) / (m+1) - std::pow(observedValueMean(k), 2);
81  } else {
82  size_t n = observedInput.rows();
83  observedInputIndex[maxElementIndex] = n;
84  observedInputCount.push_back(1);
85 
86  observedInput = internal::append(observedInput, testInput);
87  observedValueMean = internal::append(observedValueMean, newObservedValue);
88  observedValueErrorMean = internal::append(observedValueErrorMean, newObservedValueError);
89  }
90  }
91 
92  MatrixType newObservedInput;
93  VectorType newObservedValueMean;
94  VectorType newObservedValueErrorMean;
95 
96  // update the data to have the combined observed inputs, values and errors
97  std::tie(newObservedInput, newObservedValueMean, newObservedValueErrorMean) = data.updateObservations(
98  observedInput, observedValueMean, observedValueErrorMean);
99  data.addObservations(newObservedInput, newObservedValueMean, newObservedValueErrorMean);
100 
101  // compute smoothing on unsmoothed errors
102  //MatrixType weights = smoothingKernel(data.getObservedInputs(), data.getObservedInputs());
103  const MatrixType& weights = data.getObservedCovariance();
104  VectorType smoothedObservedValueErrors = weights * data.getObservedValueErrors();
105  //VectorType normalizer = weights * VectorType::Ones(weights.cols());
106  VectorType normalizer = weights.rowwise().sum(); // * VectorType::Ones(weights.cols());
107  smoothedObservedValueErrors = smoothedObservedValueErrors.array() / normalizer.array();
108 
109  //MatrixType print(smoothedObservedValueErrors.rows(), 2);
110  //print << data.getObservedValueErrors(), smoothedObservedValueErrors;
111  //std::cout << print << std::endl << std::endl;
112 
113  std::tie(newObservedInput, newObservedValueMean, smoothedObservedValueErrors) = gp.updateObservations(
114  data.getObservedInputs(), data.getObservedValues(), smoothedObservedValueErrors);
115  gp.addObservations(newObservedInput, newObservedValueMean, smoothedObservedValueErrors);
116 
117  //double newKernelSigma = 2*(data.getObservedValues().array() + data.getObservedValueErrors().array().sqrt()).maxCoeff();
118  //double newKernelSigma = data.getObservedValues().array().maxCoeff();
119  //newKernelSigma = ( newKernelSigma == 0 ? initialGP.getKernel().m_sigma : newKernelSigma );
120  //gp.setKernelSigma(newKernelSigma);
121 
122  // check maximum of posterior mean and increment counter, if the index didnt change
123  // or reset counter, if we have a new maximum
124  // also, as long as the maximum is zero, we keep exploring
125  auto max = gp.getPosteriorMean().maxCoeff(&newMaximumPosteriorMeanIndex);
126  if (newMaximumPosteriorMeanIndex != oldMaximumPosteriorMeanIndex || max == 0) {
127  sameMaximumCounter = 0;
128  } else {
129  ++sameMaximumCounter;
130  }
131  oldMaximumPosteriorMeanIndex = newMaximumPosteriorMeanIndex;
132  }
133 
134  if (numberOfPosteriorUpdatesNeeded) {
135  *numberOfPosteriorUpdatesNeeded = i;
136  }
137 
138  gp.sample(inputSpaceGrid, randomNumberGenerator, maxElementIndex);
139  initialGP = gp.getPosteriorCopy();
140 
141  return isConverged;
142  }
143  };
144 }
145 
146 #endif // HOPS_THOMPSONSAMPLING_HPP
hops::GaussianProcess::getPriorCopy
GaussianProcess getPriorCopy()
Definition: GaussianProcess.hpp:67
hops::GaussianProcess::getObservedValues
const VectorType & getObservedValues() const
Definition: GaussianProcess.hpp:357
hops::GaussianProcess::getObservedInputs
const MatrixType & getObservedInputs() const
Definition: GaussianProcess.hpp:356
hops::ThompsonSampling::optimize
static bool optimize(const size_t numberOfPosteriorUpdates, const size_t numberOfSamplingRounds, const size_t numberOfRoundsForConvergence, GaussianProcessType &initialGP, std::shared_ptr< ThompsonSamplgTargetType > targetFunction, const MatrixType &inputSpaceGrid, RandomNumberGenerator randomNumberGenerator, size_t *numberOfPosteriorUpdatesNeeded=nullptr, double smoothingLength=0)
Definition: ThompsonSampling.hpp:27
hops::MatrixType
Eigen::MatrixXd MatrixType
Definition: MatrixType.hpp:7
hops::internal::ThompsonSamplingTarget
Definition: ThompsonSampling.hpp:17
hops::GaussianProcess::getPosteriorMean
const VectorType & getPosteriorMean()
Definition: GaussianProcess.hpp:341
hops::internal::append
MatrixType append(const MatrixType &rhs, const MatrixType &lhs)
Definition: GaussianProcess.hpp:19
hops::GaussianProcess::updateObservations
std::tuple< MatrixType, MatrixType, VectorType > updateObservations(const MatrixType &x, const VectorType &y, const VectorType &error, bool isUnique=false)
Definition: GaussianProcess.hpp:199
pcg_detail::engine
Definition: pcg_random.hpp:364
SquaredExponentialKernel.hpp
UniformBallKernel.hpp
hops::internal::ThompsonSamplingTarget::operator()
virtual std::tuple< ReturnType, ReturnType > operator()(const InputType &)=0
hops::GaussianProcess::getPosteriorCopy
GaussianProcess getPosteriorCopy()
Definition: GaussianProcess.hpp:71
GaussianProcess.hpp
hops::GaussianProcess::getObservedCovariance
const MatrixType & getObservedCovariance() const
Definition: GaussianProcess.hpp:360
hops::GaussianProcess
Definition: GaussianProcess.hpp:53
hops
Definition: CsvReader.hpp:8
hops::GaussianProcess::addObservations
void addObservations(const MatrixType &x, VectorType &y, VectorType &error)
Definition: GaussianProcess.hpp:287
hops::VectorType
Eigen::VectorXd VectorType
Definition: VectorType.hpp:7
ZeroKernel.hpp
hops::ThompsonSampling
Definition: ThompsonSampling.hpp:23
hops::UniformBallKernel
Definition: UniformBallKernel.hpp:8
hops::GaussianProcess::sample
Eigen::VectorXd sample(const MatrixType &input, hops::RandomNumberGenerator &randomNumberGenerator, size_t &maxElement)
Definition: GaussianProcess.hpp:154
hops::GaussianProcess::getObservedValueErrors
const VectorType & getObservedValueErrors() const
Definition: GaussianProcess.hpp:358