Edinburgh Speech Tools 2.4-release
token_regression.cc
1/************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1996,1997 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/************************************************************************/
33/* Author: Alan W Black */
34/* Date: May 1997 */
35/************************************************************************/
36/* */
37/* Lets see if we can break the TokenStream class */
38/* */
39/************************************************************************/
40
41#include <cstdlib>
42#include "EST_Token.h"
43
44static void binary_read_test();
45
46static void find_tokens(EST_TokenStream &ts)
47{
48 // Count and display the tokens in this stream
49 int tokens;
50
51 for (tokens=0; !ts.eof(); tokens++)
52 cout << ts.get().string() << endl;
53 cout << "Total: " << tokens << endl << endl;;
54
55}
56
57int main(int argc,char **argv)
58{
59 // Simple program to read all the tokens in the named file
60 // a print a summary of them
61 (void)argc;
62 (void)argv;
64 EST_String s;
65
66 // Basic tokenizing tasks changing punctuation, whitespace and
67 // single character symbols etc.
68 s = "This is a test.";
69 cout << "Test 1: " << quote_string(s) << endl;
70 ts.open_string(s);
71 find_tokens(ts);
72 ts.close();
73
74 s = "This (is) a test.";
75 cout << "Test 2: " << quote_string(s) << endl;
76 ts.open_string(s);
77 find_tokens(ts);
78 ts.close();
79
80 s = "This (is) a test.";
81 cout << "Test 3: " << quote_string(s) << endl;
82 ts.open_string("This (is) a test.");
84 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
85 find_tokens(ts);
86 ts.close();
87
88 s = "This (is) a test.";
89 cout << "Test 4: " << quote_string(s) << endl;
90 ts.open_string(s);
91 ts.set_SingleCharSymbols("()");
92 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
93 find_tokens(ts);
94 ts.close();
95
96 s = "This \"is a\" te\\\"st.";
97 cout << "Test 5: " << quote_string(s) << endl;
98 ts.open_string(s);
99 ts.set_PrePunctuationSymbols(EST_Token_Default_PrePunctuationSymbols);
100 ts.set_PunctuationSymbols(EST_Token_Default_PunctuationSymbols);
101 find_tokens(ts);
102 ts.close();
103
104 s = "This \"is a\" te\\\"st.";
105 cout << "Test 6: " << quote_string(s) << endl;
106 ts.open_string(s);
107 ts.set_quotes('"','\\');
108 find_tokens(ts);
109 ts.close();
110
111 s = "This \"is \n\
112a\" te\\\"st.";
113 cout << "Test 7: " << quote_string(s) << endl;
114 ts.open_string(s);
115 ts.set_quotes('"','\\');
116 find_tokens(ts);
117 ts.close();
118
119 // test of reading binary data
120 binary_read_test();
121
122 return 0;
123}
124
125EST_String make_tokbins(const EST_String& filename)
126{
127 FILE *fd;
128 char buff[64];
129 int a[2];
130 int numbytes;
131 // Make a buffer with both tokens and binary data
132 sprintf(buff,"a buffer BINARY ");
133 a[0] = 7;
134 a[1] = -34;
135 memmove(buff+16,a,sizeof(int)*2);
136 sprintf(buff+16+(sizeof(int)*2)," and tokens");
137
138 if ((fd=fopen(filename,"w")) == NULL)
139 {
140 cerr << "Token_regression: failed to open " << filename << endl;
141 exit(-1);
142 }
143
144 numbytes = fwrite(buff,1,16+(sizeof(int)*2)+11,fd);
145 fclose(fd);
146
147 // Special constructions as the string contains nulls
148 return EST_String(buff,numbytes,0,numbytes);
149}
150
151static void binary_read_test()
152{
153 // You can use fread to read directly from a token stream
154 // but care should be take at the boundaries. Reading a
155 // token will always read the character following it. By
156 // convention it is recommended you include the single token
157 // BINARY follow by a single space in the stream before each
158 // binary section.
159 int b[2];
160 EST_String tokbinbuf;
162
163 tokbinbuf = make_tokbins("tmp/tokbin.dat");
164
165 // Do the reading
166
167 cout << "Reading tokens and binary from string\n";
168
169 ts.open_string(tokbinbuf);
170
171 cout << ts.get() << endl;
172 cout << ts.get() << endl;
173 if (ts.get() != "BINARY")
174 {
175 cout << "failed to read binary data, missing BINARY token." << endl;
176 exit(-1);
177 }
178 ts.fread(b,sizeof(int),2);
179 cout << b[0] << endl;
180 cout << b[1] << endl;
181 cout << ts.get() << endl;
182 cout << ts.get() << endl;
183 ts.close();
184
185 cout << "Reading tokens and binary from file\n";
186
187 ts.open("tmp/tokbin.dat");
188
189 cout << ts.get() << endl;
190 cout << ts.get() << endl;
191 if (ts.get() != "BINARY")
192 {
193 cout << "failed to read binary data, missing BINARY token." << endl;
194 exit(-1);
195 }
196 ts.fread(b,sizeof(int),2);
197 cout << b[0] << endl;
198 cout << b[1] << endl;
199 cout << ts.get() << endl;
200 cout << ts.get() << endl;
201 ts.close();
202
203}
204
205
int eof()
end of file
Definition: EST_Token.h:356
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
Definition: EST_Token.h:338
int fread(void *buff, int size, int nitems) EST_WARN_UNUSED_RESULT
Reading binary data, (don't use peek() immediately beforehand)
Definition: EST_Token.cc:355
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:344
int open_string(const EST_String &newbuffer)
open a \Ref{EST_TokenStream} for string rather than a file
Definition: EST_Token.cc:251
void set_quotes(char q, char e)
set characters to be used as quotes and escape, and set quote mode
Definition: EST_Token.h:347
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
Definition: EST_Token.h:341
void close(void)
Close stream.
Definition: EST_Token.cc:406
int open(const EST_String &filename)
open a \Ref{EST_TokenStream} for a file.
Definition: EST_Token.cc:200
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486