TinyTrainable
 
Loading...
Searching...
No Matches
recognize_commands.h
Go to the documentation of this file.
1/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2
3Licensed under the Apache License, Version 2.0 (the "License");
4you may not use this file except in compliance with the License.
5You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9Unless required by applicable law or agreed to in writing, software
10distributed under the License is distributed on an "AS IS" BASIS,
11WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12See the License for the specific language governing permissions and
13limitations under the License.
14==============================================================================*/
15
16#ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_
17#define TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_
18
19#include <cstdint>
20
22#include "tensorflow/lite/c/common.h"
23#include "tensorflow/lite/micro/micro_log.h"
24
25// Partial implementation of std::dequeue, just providing the functionality
26// that's needed to keep a record of previous neural network results over a
27// short time period, so they can be averaged together to produce a more
28// accurate overall prediction. This doesn't use any dynamic memory allocation
29// so it's a better fit for microcontroller applications, but this does mean
30// there are hard limits on the number of results it can store.
32 public:
34
35 // Data structure that holds an inference result, and the time when it
36 // was recorded.
37 struct Result {
38 Result() : time_(0), scores() {}
39 Result(int32_t time, int8_t* input_scores) : time_(time) {
40 for (int i = 0; i < kCategoryCount; ++i) {
41 scores[i] = input_scores[i];
42 }
43 }
44 int32_t time_;
46 };
47
48 int size() { return size_; }
49 bool empty() { return size_ == 0; }
52 int back_index = front_index_ + (size_ - 1);
53 if (back_index >= kMaxResults) {
54 back_index -= kMaxResults;
55 }
56 return results_[back_index];
57 }
58
59 void push_back(const Result& entry) {
60 if (size() >= kMaxResults) {
61 MicroPrintf("Couldn't push_back latest result, too many already!");
62 return;
63 }
64 size_ += 1;
65 back() = entry;
66 }
67
69 if (size() <= 0) {
70 MicroPrintf("Couldn't pop_front result, none present!");
71 return Result();
72 }
73 Result result = front();
74 front_index_ += 1;
76 front_index_ = 0;
77 }
78 size_ -= 1;
79 return result;
80 }
81
82 // Most of the functions are duplicates of dequeue containers, but this
83 // is a helper that makes it easy to iterate through the contents of the
84 // queue.
85 Result& from_front(int offset) {
86 if ((offset < 0) || (offset >= size_)) {
87 MicroPrintf("Attempt to read beyond the end of the queue!");
88 offset = size_ - 1;
89 }
90 int index = front_index_ + offset;
91 if (index >= kMaxResults) {
92 index -= kMaxResults;
93 }
94 return results_[index];
95 }
96
97 private:
98 static constexpr int kMaxResults = 50;
100
102 int size_;
103};
104
105// This class is designed to apply a very primitive decoding model on top of the
106// instantaneous results from running an audio recognition model on a single
107// window of samples. It applies smoothing over time so that noisy individual
108// label scores are averaged, increasing the confidence that apparent matches
109// are real.
110// To use it, you should create a class object with the configuration you
111// want, and then feed results from running a TensorFlow model into the
112// processing method. The timestamp for each subsequent call should be
113// increasing from the previous, since the class is designed to process a stream
114// of data over time.
116 public:
117 // labels should be a list of the strings associated with each one-hot score.
118 // The window duration controls the smoothing. Longer durations will give a
119 // higher confidence that the results are correct, but may miss some commands.
120 // The detection threshold has a similar effect, with high values increasing
121 // the precision at the cost of recall. The minimum count controls how many
122 // results need to be in the averaging window before it's seen as a reliable
123 // average. This prevents erroneous results when the averaging window is
124 // initially being populated for example. The suppression argument disables
125 // further recognitions for a set time after one has been triggered, which can
126 // help reduce spurious recognitions.
127 explicit RecognizeCommands(int32_t average_window_duration_ms = 1000,
128 uint8_t detection_threshold = 200,
129 int32_t suppression_ms = 1500,
130 int32_t minimum_count = 3);
131
132 // Call this with the results of running a model on sample data.
133 TfLiteStatus ProcessLatestResults(const TfLiteTensor* latest_results,
134 const int32_t current_time_ms,
135 const char** found_command, uint8_t* score,
136 bool* is_new_command);
137
138 private:
139 // Configuration
144
145 // Working variables
149};
150
151#endif // TENSORFLOW_LITE_MICRO_EXAMPLES_MICRO_SPEECH_RECOGNIZE_COMMANDS_H_
Definition: recognize_commands.h:31
Result & back()
Definition: recognize_commands.h:51
int size()
Definition: recognize_commands.h:48
static constexpr int kMaxResults
Definition: recognize_commands.h:98
PreviousResultsQueue()
Definition: recognize_commands.h:33
void push_back(const Result &entry)
Definition: recognize_commands.h:59
Result results_[kMaxResults]
Definition: recognize_commands.h:99
int size_
Definition: recognize_commands.h:102
Result & front()
Definition: recognize_commands.h:50
int front_index_
Definition: recognize_commands.h:101
Result & from_front(int offset)
Definition: recognize_commands.h:85
Result pop_front()
Definition: recognize_commands.h:68
bool empty()
Definition: recognize_commands.h:49
Definition: recognize_commands.h:115
PreviousResultsQueue previous_results_
Definition: recognize_commands.h:146
TfLiteStatus ProcessLatestResults(const TfLiteTensor *latest_results, const int32_t current_time_ms, const char **found_command, uint8_t *score, bool *is_new_command)
Definition: recognize_commands.cpp:35
int32_t suppression_ms_
Definition: recognize_commands.h:142
const char * previous_top_label_
Definition: recognize_commands.h:147
int32_t minimum_count_
Definition: recognize_commands.h:143
int32_t average_window_duration_ms_
Definition: recognize_commands.h:140
uint8_t detection_threshold_
Definition: recognize_commands.h:141
int32_t previous_top_label_time_
Definition: recognize_commands.h:148
constexpr int kCategoryCount
Definition: micro_features_micro_model_settings.h:40
Definition: recognize_commands.h:37
Result()
Definition: recognize_commands.h:38
int8_t scores[kCategoryCount]
Definition: recognize_commands.h:45
Result(int32_t time, int8_t *input_scores)
Definition: recognize_commands.h:39
int32_t time_
Definition: recognize_commands.h:44