c++-gtk-utils
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
c++-gtk-utils
reassembler.h
Go to the documentation of this file.
1
/* Copyright (C) 2005 to 2010 Chris Vine
2
3
The library comprised in this file or of which this file is part is
4
distributed by Chris Vine under the GNU Lesser General Public
5
License as follows:
6
7
This library is free software; you can redistribute it and/or
8
modify it under the terms of the GNU Lesser General Public License
9
as published by the Free Software Foundation; either version 2.1 of
10
the License, or (at your option) any later version.
11
12
This library is distributed in the hope that it will be useful, but
13
WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15
Lesser General Public License, version 2.1, for more details.
16
17
You should have received a copy of the GNU Lesser General Public
18
License, version 2.1, along with this library (see the file LGPL.TXT
19
which came with this source code package in the c++-gtk-utils
20
sub-directory); if not, write to the Free Software Foundation, Inc.,
21
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
23
*/
24
25
#ifndef CGU_REASSEMBLER_H
26
#define CGU_REASSEMBLER_H
27
28
#include <
c++-gtk-utils/shared_handle.h
>
29
#include <
c++-gtk-utils/cgu_config.h
>
30
31
namespace
Cgu {
32
33
namespace
Utf8 {
34
35
36
/**
37
* @class Reassembler reassembler.h c++-gtk-utils/reassembler.h
38
* @brief A class for reassembling UTF-8 strings sent over pipes and
39
* sockets so they form complete valid UTF-8 characters.
40
*
41
* Utf8::Reassembler is a functor class which takes in a partially
42
* formed UTF-8 string and returns a nul-terminated string comprising
43
* such of the input string (after inserting, at the beginning, any
44
* partially formed UTF-8 character which was at the end of the input
45
* string passed in previous calls to the functor) as forms complete
46
* UTF-8 characters (storing any partial character at the end for the
47
* next call to the functor). If the input string contains invalid
48
* UTF-8 after adding any stored previous part character (apart from
49
* any partially formed character at the end of the input string) then
50
* operator() will return a null Cgu::SharedHandle<char*> object (that
51
* is, Cgu::SharedHandle<char*>::get() will return 0). Such input
52
* will not be treated as invalid if it consists only of a single
53
* partly formed UTF-8 character which could be valid if further bytes
54
* were received and added to it. In that case the returned
55
* SharedHandle<char*> object will contain an allocated string of zero
56
* length, comprising only a terminating \0 character, rather than a
57
* NULL pointer.
58
*
59
* This enables UTF-8 strings to be sent over pipes, sockets, etc and
60
* displayed in a GTK+ object at the receiving end
61
*
62
* Note that for efficiency reasons the memory held in the returned
63
* Cgu::SharedHandle<char*> object may be greater than the length of
64
* the nul-terminated string that is contained in that memory: just
65
* let the Cgu::SharedHandle<char*> object manage the memory, and use
66
* the contents like any other nul-terminated string.
67
*
68
* This class is not needed if std::getline(), with its default '\\n'
69
* delimiter, is used to read UTF-8 characters using, say,
70
* Cgu::fdistream, because a whole '\\n' delimited line of UTF-8
71
* characters will always be complete.
72
*
73
* This is an example of its use, reading from a pipe until it is
74
* closed by the writer and putting the received text in a
75
* GtkTextBuffer object:
76
* @code
77
* using namespace Cgu;
78
*
79
* GtkTextIter end;
80
* GtkTextBuffer* text_buffer = gtk_text_view_get_buffer(GTK_TEXT_VIEW(text_view));
81
* gtk_text_buffer_get_end_iter(text_buffer, &end);
82
*
83
* Utf8::Reassembler reassembler;
84
* const int BSIZE = 1024;
85
* char read_buffer[BSIZE];
86
* ssize_t res;
87
* do {
88
* res = ::read(fd, read_buffer, BSIZE);
89
* if (res > 0) {
90
* SharedHandle<char*> utf8(reassembler(read_buffer, res));
91
* if (utf8.get()) {
92
* gtk_text_buffer_insert(text_buffer, &end,
93
* utf8.get(), std::strlen(utf8));
94
* }
95
* else std::cerr << "Invalid utf8 text sent over pipe\n";
96
* }
97
* } while (res && (res != -1 || errno == EINTR));
98
* @endcode
99
*/
100
101
class
Reassembler
{
102
size_t
stored;
103
const
static
size_t
buff_size = 6;
104
char
buffer[buff_size];
105
char
* join_buffer(
const
char
*,
size_t
);
106
public
:
107
/**
108
* Takes a byte array of wholly or partly formed UTF-8 characters to
109
* be converted (after taking account of previous calls to the method)
110
* to a valid string of wholly formed characters.
111
* @param input The input array.
112
* @param size The number of bytes in the input (not the number of
113
* UTF-8 characters).
114
* @return A Cgu::SharedHandle<char*> object holding a nul-terminated
115
* string comprising such of the input (after inserting, at the
116
* beginning, any partially formed UTF-8 character which was at the
117
* end of the input passed in previous calls to the functor) as forms
118
* complete UTF-8 characters (storing any partial character at the end
119
* for the next call to the functor). If the input is invalid after
120
* such recombination, then a null Cgu::SharedHandle<char*> object is
121
* returned (that is, Cgu::SharedHandle<char*>::get() will return 0).
122
* Such input will not be treated as invalid if it consists only of a
123
* single partly formed UTF-8 character which could be valid if
124
* further bytes were received and added to it. In that case the
125
* returned Cgu::SharedHandle<char*> object will contain an allocated
126
* string of zero length, comprising only a terminating \0 character,
127
* rather than a NULL pointer.
128
* @exception std::bad_alloc The method might throw std::bad_alloc if
129
* memory is exhausted and the system throws in that case. It will
130
* not throw any other exception.
131
*/
132
Cgu::SharedHandle<char*>
operator()
(
const
char
* input,
size_t
size);
133
134
/**
135
* Gets the number of bytes of a partially formed UTF-8 character
136
* stored for the next call to operator()(). It will not throw.
137
* @return The number of bytes.
138
*/
139
size_t
get_stored
() const noexcept {
return
stored;}
140
141
/**
142
* Resets the Reassembler, by discarding any partially formed UTF-8
143
* character from previous calls to operator()(). It will not throw.
144
*/
145
void
reset
() noexcept {stored = 0;}
146
147
/**
148
* The constructor will not throw.
149
*/
150
Reassembler
() noexcept: stored(0) {}
151
152
/* Only has effect if --with-glib-memory-slices-compat or
153
* --with-glib-memory-slices-no-compat option picked */
154
CGU_GLIB_MEMORY_SLICES_FUNCS
155
};
156
157
}
// namespace Utf8
158
159
}
// namespace Cgu
160
161
#endif
Generated on Mon Apr 6 2015 11:37:41 for c++-gtk-utils by
1.8.4