presage
0.8.8
Main Page
Classes
Files
File List
File Members
src
lib
core
tokenizer
forwardTokenizer.cpp
Go to the documentation of this file.
1
2
/******************************************************
3
* Presage, an extensible predictive text entry system
4
* ---------------------------------------------------
5
*
6
* Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
11
(at your option) any later version.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License along
19
with this program; if not, write to the Free Software Foundation, Inc.,
20
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
*
22
**********(*)*/
23
24
25
#include "
forwardTokenizer.h
"
26
27
ForwardTokenizer::ForwardTokenizer
(std::istream& stream,
28
const
std::string blankspaces,
29
const
std::string separators)
30
:
Tokenizer
(stream, blankspaces, separators)
31
{
32
//std::cerr << "ForwardTokenizer::ForwardTokenizer()" << std::endl;
33
offset
=
offbeg
;
34
}
35
36
ForwardTokenizer::~ForwardTokenizer
()
37
{}
38
39
int
ForwardTokenizer::countTokens
()
40
{
41
StreamGuard
guard(
stream
,
offset
);
42
43
// store current seek pointer position
44
std::streamoff curroff =
offset
;
45
46
// position get pointer at beginning of stream
47
offset
=
offbeg
;
48
49
int
count = 0;
50
while
(
hasMoreTokens
()) {
51
count++;
52
nextToken
();
53
}
54
55
// reposition seek get pointer to original position
56
offset
= curroff;
57
58
return
count;
59
}
60
61
bool
ForwardTokenizer::hasMoreTokens
()
const
62
{
63
//StreamGuard guard(stream, offset);
64
65
if
(
offset
>=
offend
) {
66
return
false
;
67
}
else
{
68
return
true
;
69
}
70
}
71
72
std::string
ForwardTokenizer::nextToken
()
73
{
74
StreamGuard
guard(
stream
,
offset
);
75
76
int
current;
77
std::string str;
78
79
if
(
stream
.good()) {
// good() if bad,fail and eof bit are not set
80
current =
stream
.peek();
81
if
(
offset
<
offend
) {
82
83
while
(
isBlankspace
(current)
84
||
isSeparator
(current)) {
85
offset
++;
86
stream
.seekg(
offset
);
87
current =
stream
.peek();
88
}
89
90
while
(!
isBlankspace
(current)
91
&& !
isSeparator
(current)
92
&&
offset
<
offend
) {
93
94
//std::cerr << "[DEBUG] read: "
95
// << static_cast<char>(current)
96
// << std::endl;
97
98
if
(
lowercaseMode
() ) {
99
current = tolower( current );
100
}
101
102
str.push_back(current);
103
104
//std::cerr << "[DEBUG] pushed: "
105
// << static_cast<char>(current)
106
// << std::endl;
107
108
offset
++;
109
stream
.seekg(
offset
);
110
current =
stream
.peek();
111
}
112
}
113
114
// do {
115
// do {
116
// current = stream.peek();
117
// offset++;
118
// stream.seekg(offset);
119
//
120
// //std::cerr << "[DEBUG] read: "
121
// // << static_cast<char>(current)
122
// // << std::endl;
123
//
124
// if ( !isBlankspace(current)
125
// && !isSeparator(current)
126
// && offset <= offend) {
127
//
128
// if( lowercaseMode() ) {
129
// current = tolower( current );
130
// }
131
//
132
// str.push_back(current);
133
//
134
// //std::cerr << "[DEBUG] pushed: "
135
// // << static_cast<char>(current)
136
// // << std::endl;
137
// }
138
// } while ( !isBlankspace(current)
139
// && !isSeparator(current)
140
// && offset < offend);
141
// } while (str.empty() && (offset < offend));
142
}
else
{
143
std::cerr <<
"stream is NOT good!"
<<
std::endl
;
144
}
145
146
//std::cerr << "[DEBUG] token: " << str << std::endl;
147
148
return
str;
149
}
150
151
double
ForwardTokenizer::progress
()
const
152
{
153
return
static_cast<
double
>
(
offset
) /
offend
;
154
}
155
Generated on Tue Apr 8 2014 13:23:50 for presage by
1.8.1.2