presage  0.8.8
contextTracker.cpp
Go to the documentation of this file.
1 
2 /******************************************************
3  * Presage, an extensible predictive text entry system
4  * ---------------------------------------------------
5  *
6  * Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License along
19  with this program; if not, write to the Free Software Foundation, Inc.,
20  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  **********(*)*/
23 
24 
25 #include "contextTracker.h"
26 #include "../utility.h"
27 #include "../predictorRegistry.h"
28 #include "../tokenizer/forwardTokenizer.h"
29 
30 #include <stdlib.h> // for atoi()
31 
32 const char* ContextTracker::LOGGER = "Presage.ContextTracker.LOGGER";
33 const char* ContextTracker::SLIDING_WINDOW_SIZE = "Presage.ContextTracker.SLIDING_WINDOW_SIZE";
34 const char* ContextTracker::LOWERCASE_MODE = "Presage.ContextTracker.LOWERCASE_MODE";
35 
37  PredictorRegistry* registry,
38  PresageCallback* callback,
39  const char wChars[],
40  const char tChars[],
41  const char bChars[],
42  const char cChars[])
43  : wordChars (wChars),
44  separatorChars (tChars),
45  blankspaceChars(bChars),
46  controlChars (cChars),
47  predictorRegistry (registry),
48  logger ("ContextTracker", std::cerr),
49  //tokenizer (pastStream, blankspaceChars, separatorChars),
50  lowercase_mode (true),
51  dispatcher (this)
52 {
53  if (callback) {
55  } else {
56  throw new PresageException(PRESAGE_INVALID_CALLBACK_ERROR, "Invalid callback object");
57  }
58 
64 
65  // set pointer to this context tracker in predictor registry so that
66  // predictors can be constructed when next iterator is requested
67  //
68  if (predictorRegistry) {
70  }
71 
72  // build dispatch map
76 }
77 
79 {
80  delete contextChangeDetector;
81 }
82 
83 void ContextTracker::set_logger (const std::string& value)
84 {
85  logger << setlevel (value);
86  logger << INFO << "LOGGER: " << value << endl;
87 }
88 
89 void ContextTracker::set_sliding_window_size (const std::string& value)
90 {
92  logger << INFO << "SLIDING_WINDOWS_SIZE: " << value << endl;
93 }
94 
95 void ContextTracker::set_lowercase_mode (const std::string& value)
96 {
98  logger << INFO << "LOWERCASE_MODE: " << value << endl;
99 }
100 
102 {
104  if (new_callback) {
105  context_tracker_callback = new_callback;
106  }
107  return result;
108 }
109 
114 {
116 }
117 
119 {
120  // the first step in the update procedure is to learn from the
121  // newly entered text.
122 
123  std::stringstream change;
124 
125  // detect change that needs to be learned
127  logger << INFO << "update(): change: " << change.str() << endl;
128 
129  // split change up into tokens
130  std::vector<std::string> change_tokens;
131  ForwardTokenizer tok(change,
135  logger << INFO << "update(): tokenized change: ";
136  while (tok.hasMoreTokens()) {
137  std::string token = tok.nextToken();
138  change_tokens.push_back(token);
139  logger << INFO << token << '|';
140  }
141  logger << INFO << endl;
142 
143  if (! change_tokens.empty()) {
144  // remove prefix (partially entered token or empty token)
145  change_tokens.pop_back();
146  }
147 
148  logger << INFO << "update(): sanitized change tokens: ";
149  for (std::vector<std::string>::const_iterator it = change_tokens.begin();
150  it != change_tokens.end();
151  it++) {
152  logger << INFO << *it << '|';
153  }
154  logger << INFO << endl;
155 
156  // time to learn
158  Predictor* predictor = 0;
159 
160  while (it.hasNext()) {
161  predictor = it.next();
162  predictor->learn(change_tokens);
163  }
164 
165  // update sliding window
167 }
168 
169 std::string ContextTracker::getPrefix() const
170 {
171  return getToken(0);
172 }
173 
174 std::string ContextTracker::getToken(const int index) const
175 {
176  std::stringstream pastStringStream(context_tracker_callback->get_past_stream());
177  ReverseTokenizer tokenizer(pastStringStream, blankspaceChars, separatorChars);
178  tokenizer.lowercaseMode(lowercase_mode);
179 
180  std::string token;
181  int i = 0;
182  while (tokenizer.hasMoreTokens() && i <= index) {
183  token = tokenizer.nextToken();
184  i++;
185  }
186  if (i <= index) {
187  // in case the index points too far back
188  token = "";
189  }
190  return token;
191 
193 // "a b c"
194 // 2 1 0
195 // 0 1 2
196 // 1 2 3
197 //
198 // ForwardTokenizer tokenizer(pastStream, blankspaceChars, separatorChars);
199 // tokenizer.lowercaseMode(lowercase_mode);
200 // std::string result;
201 // int tokens = tokenizer.countTokens();
202 // // why oh why is this clear() required to get it to work???
203 // pastStream.clear();
204 // int j = 0;
205 // while (tokenizer.hasMoreTokens() && j < tokens - index) {
206 // result = tokenizer.nextToken();
207 // j++;
208 //
209 // std::cerr << "ContextTracker::getToken() current token: " << result << std::endl;
210 // }
211 // return result;
212 }
213 
214 std::string ContextTracker::getExtraTokenToLearn(const int index, const std::vector<std::string>& change) const
215 {
216  //logger << DEBUG
217  // << "past_stream : " << getPastStream() << endl
218  // << "change : " << contextChangeDetector->change(getPastStream()) << endl
219  // << "sliding_window: " << contextChangeDetector->get_sliding_window() + "\n" << endl;
220 
221 
222  // Extra tokens to learn are to be found in (past_stream - change)
223  //
224  // The change tokens are tokens that have not been seen or learnt
225  // before.
226  //
227  // The extra tokens to learn are tokens that have been seen and
228  // learn before, but that we need to reuse to fill out the n-gram
229  // of required cardinality that we are about to learn.
230  //
231  // To find the extra tokens to learn, we use the size of tokenized
232  // change vector to offset the index and extract the extra tokens
233  // to learn from the past stream.
234  //
235  // For example:
236  // past_stream : "The quick brown fox jumped over the "
237  // change : |over|the|
238  // extra_tokens: |The|quick|brown|fox|jumped|
239  //
240  return getToken(index + change.size());
241 }
242 
244 {
246 }
247 
248 std::string ContextTracker::getPastStream() const
249 {
250  std::string result = context_tracker_callback->get_past_stream();
251  return result;
252 }
253 
254 bool ContextTracker::isCompletionValid(const std::string& completion) const
255 {
256  bool result = false;
257 
258  std::string prefix = getPrefix();
259  prefix = Utility::strtolower(prefix); // no need to be case sensitive
260  if (completion.find(prefix) == 0) {
261  result = true;
262  }
263 
264  return result;
265 }
266 
267 bool ContextTracker::isWordChar(const char c) const
268 {
269  if(wordChars.find(c, 0) != std::string::npos)
270  return true;
271  else
272  return false;
273 }
274 
275 bool ContextTracker::isSeparatorChar(const char c) const
276 {
277  if(separatorChars.find(c, 0) != std::string::npos)
278  return true;
279  else
280  return false;
281 }
282 
283 bool ContextTracker::isBlankspaceChar(const char c) const
284 {
285  if(blankspaceChars.find(c, 0) != std::string::npos)
286  return true;
287  else
288  return false;
289 }
290 
291 bool ContextTracker::isControlChar(const char c) const
292 {
293  if(controlChars.find(c, 0) != std::string::npos)
294  return true;
295  else
296  return false;
297 }
298 
299 std::string ContextTracker::getWordChars() const
300 {
301  return wordChars;
302 }
303 
305 {
306  return separatorChars;
307 }
308 
310 {
311  return blankspaceChars;
312 }
313 
315 {
316  return controlChars;
317 }
318 
319 std::string ContextTracker::toString() const
320 {
322 }
323 
324 void ContextTracker::update (const Observable* variable)
325 {
326  logger << DEBUG << "Notification received: "
327  << variable->get_name () << " - " << variable->get_value () << endl;
328 
329  dispatcher.dispatch (variable);
330 }