1 #ifndef HOPS_THOMPSONSAMPLING_HPP
2 #define HOPS_THOMPSONSAMPLING_HPP
16 template<
typename ReturnType,
typename InputType>
18 virtual std::tuple<ReturnType, ReturnType>
operator()(
const InputType&) = 0;
22 template<
typename MatrixType,
typename VectorType,
typename GaussianProcessType>
27 static bool optimize (
const size_t numberOfPosteriorUpdates,
28 const size_t numberOfSamplingRounds,
29 const size_t numberOfRoundsForConvergence,
30 GaussianProcessType& initialGP,
31 std::shared_ptr<ThompsonSamplgTargetType> targetFunction,
34 size_t* numberOfPosteriorUpdatesNeeded =
nullptr,
35 double smoothingLength = 0) {
36 size_t maxElementIndex;
37 bool isConverged =
false;
39 size_t newMaximumPosteriorMeanIndex = 0, oldMaximumPosteriorMeanIndex = 0, sameMaximumCounter = 0;
45 std::unordered_map<long, long> observedInputIndex;
46 std::vector<long> observedInputCount;
47 MatrixType observedInput(0, inputSpaceGrid.cols());
52 for (; i < numberOfPosteriorUpdates; ++i) {
55 if (sameMaximumCounter == numberOfRoundsForConvergence) {
60 for (
size_t j = 0; j < numberOfSamplingRounds; ++j) {
62 gp.
sample(inputSpaceGrid, randomNumberGenerator, maxElementIndex);
63 VectorType testInput = inputSpaceGrid.row(maxElementIndex);
66 auto[newObservedValue, newObservedValueError] = (*targetFunction)(testInput);
69 if (observedInputIndex.count(maxElementIndex)) {
70 size_t k = observedInputIndex[maxElementIndex];
73 size_t m = observedInputCount[k];
74 observedInputCount[k] += 1;
76 double oldObservedValueMean = observedValueMean(k);
77 observedValueMean(k) = (m * observedValueMean(k) + newObservedValue) / (m+1);
78 observedValueErrorMean(k) =
79 (m * (observedValueErrorMean(k) + std::pow(oldObservedValueMean, 2)) +
80 1 * (newObservedValueError + std::pow(newObservedValue, 2))) / (m+1) - std::pow(observedValueMean(k), 2);
82 size_t n = observedInput.rows();
83 observedInputIndex[maxElementIndex] = n;
84 observedInputCount.push_back(1);
88 observedValueErrorMean =
internal::append(observedValueErrorMean, newObservedValueError);
97 std::tie(newObservedInput, newObservedValueMean, newObservedValueErrorMean) = data.
updateObservations(
98 observedInput, observedValueMean, observedValueErrorMean);
99 data.
addObservations(newObservedInput, newObservedValueMean, newObservedValueErrorMean);
106 VectorType normalizer = weights.rowwise().sum();
107 smoothedObservedValueErrors = smoothedObservedValueErrors.array() / normalizer.array();
113 std::tie(newObservedInput, newObservedValueMean, smoothedObservedValueErrors) = gp.
updateObservations(
115 gp.
addObservations(newObservedInput, newObservedValueMean, smoothedObservedValueErrors);
126 if (newMaximumPosteriorMeanIndex != oldMaximumPosteriorMeanIndex || max == 0) {
127 sameMaximumCounter = 0;
129 ++sameMaximumCounter;
131 oldMaximumPosteriorMeanIndex = newMaximumPosteriorMeanIndex;
134 if (numberOfPosteriorUpdatesNeeded) {
135 *numberOfPosteriorUpdatesNeeded = i;
138 gp.
sample(inputSpaceGrid, randomNumberGenerator, maxElementIndex);
146 #endif // HOPS_THOMPSONSAMPLING_HPP