libdap  Updated for version 3.20.3
libdap4 is an implementation of OPeNDAP's DAP protocol.
chunked_istream.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 //
25 // Portions of this code were taken verbatim from Josuttis,
26 // "The C++ Standard Library," p.672
27 
28 #include "config.h"
29 
30 #include <stdint.h>
31 #include <byteswap.h>
32 #include <arpa/inet.h>
33 
34 #include <cstring>
35 #include <vector>
36 
37 #include "chunked_stream.h"
38 #include "chunked_istream.h"
39 
40 #include "Error.h"
41 
42 //#define DODS_DEBUG
43 //#define DODS_DEBUG2
44 #ifdef DODS_DEBUG
45 #include <iostream>
46 #endif
47 
48 #include "util.h"
49 #include "debug.h"
50 
51 namespace libdap {
52 
53 /*
54  This code does not use a 'put back' buffer, but here's a picture of the
55  d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
56  the I/O Stream library's streambuf class works. For the case with no
57  putback, just imagine it as zero and eliminate the leftmost extension. This
58  might also come in useful if the code was extended to support put back. I
59  removed that feature because I don't see it being used with our chunked
60  transmission protocol and it requires an extra call to memcopy() when data
61  are added to the internal buffer.
62 
63  d_buffer d_buffer + putBack
64  | |
65  v v
66  |---------|--------------------------------------------|....
67  | | | .
68  |---------|--------------------------------------------|....
69  ^ ^ ^
70  | | |
71  eback() gptr() egptr()
72 
73  */
74 
84 std::streambuf::int_type
86 {
87  DBG(cerr << "underflow..." << endl);
88  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
89 
90  // return the next character; uflow() increments the puffer pointer.
91  if (gptr() < egptr())
92  return traits_type::to_int_type(*gptr());
93 
94  // gptr() == egptr() so read more data from the underlying input source.
95 
96  // To read data from the chunked stream, first read the header
97  uint32_t header;
98  d_is.read((char *) &header, 4);
99 #if !BYTE_ORDER_PREFIX
100  // When the endian nature of the server is encoded in the chunk header, the header is
101  // sent using network byte order
102  ntohl(header);
103 #endif
104 
105  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
106  // it holds data. In the latter case, bytes those will be read and moved into the
107  // buffer. Once those data are consumed, we'll be back here again and this read()
108  // will return EOF. See below for the other case...
109  if (d_is.eof()) return traits_type::eof();
110 #if BYTE_ORDER_PREFIX
111  if (d_twiddle_bytes) header = bswap_32(header);
112 #else
113  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
114  if (!d_set_twiddle) {
115  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
116  d_set_twiddle = true;
117  }
118 #endif
119  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
120 
121  DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
122  DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
123  DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
124 
125  // Handle the case where the buffer is not big enough to hold the incoming chunk
126  if (chunk_size > d_buf_size) {
127  d_buf_size = chunk_size;
128  m_buffer_alloc();
129  }
130 
131  // If the END chunk has zero bytes, return EOF. See above for more information
132  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
133 
134  // Read the chunk's data
135  d_is.read(d_buffer, chunk_size);
136  DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
137  if (d_is.bad()) return traits_type::eof();
138 
139  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
140  setg(d_buffer, // beginning of put back area
141  d_buffer, // read position (gptr() == eback())
142  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143 
144  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
145 
146  switch (header & CHUNK_TYPE_MASK) {
147  case CHUNK_END:
148  DBG2(cerr << "Found end chunk" << endl);
149  return traits_type::to_int_type(*gptr());
150  case CHUNK_DATA:
151  return traits_type::to_int_type(*gptr());
152 
153  case CHUNK_ERR:
154  // this is pretty much the end of the show... Assume the buffer/chunk holds
155  // the error message text.
156  d_error = true;
157  d_error_message = string(d_buffer, chunk_size);
158  return traits_type::eof();
159  default:
160  d_error = true;
161  d_error_message = "Failed to read known chunk header type.";
162  return traits_type::eof();
163  }
164 }
165 
182 std::streamsize
183 chunked_inbuf::xsgetn(char* s, std::streamsize num)
184 {
185  DBG(cerr << "xsgetn... num: " << num << endl);
186 
187  // if num is <= the chars currently in the buffer
188  if (num <= (egptr() - gptr())) {
189  memcpy(s, gptr(), num);
190  gbump(num);
191 
192  return traits_type::not_eof(num);
193  }
194 
195  // else they asked for more
196  uint32_t bytes_left_to_read = num;
197 
198  // are there any bytes in the buffer? if so grab them first
199  if (gptr() < egptr()) {
200  int bytes_to_transfer = egptr() - gptr();
201  memcpy(s, gptr(), bytes_to_transfer);
202  gbump(bytes_to_transfer);
203  s += bytes_to_transfer;
204  bytes_left_to_read -= bytes_to_transfer;
205  }
206 
207  // We need to get more bytes from the underlying stream; at this
208  // point the internal buffer is empty.
209 
210  // read the remaining bytes to transfer, a chunk at a time,
211  // and put any leftover stuff in the buffer.
212 
213  // note that when the code is here, gptr() == egptr(), so the
214  // next call to read() will fall through the previous tests and
215  // read at least one chunk here.
216  bool done = false;
217  while (!done) {
218  // Get a chunk header
219  uint32_t header;
220  d_is.read((char *) &header, 4);
221 #if !BYTE_ORDER_PREFIX
222  ntohl(header);
223 #endif
224 
225  // There are two EOF cases: One where the END chunk is zero bytes and one where
226  // it holds data. In the latter case, those will be read and moved into the
227  // buffer. Once those data are consumed, we'll be back here again and this read()
228  // will return EOF. See below for the other case...
229  if (d_is.eof()) return traits_type::eof();
230 #if BYTE_ORDER_PREFIX
231  if (d_twiddle_bytes) header = bswap_32(header);
232 #else
233  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
234  if (!d_set_twiddle) {
235  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
236  d_set_twiddle = true;
237  }
238 #endif
239 
240  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
241  DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
242  DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
243  DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
244 
245  // handle error chunks here
246  if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
247  d_error = true;
248  // Note that d_buffer is not used to avoid calling resize if it is too
249  // small to hold the error message. At this point, there's not much reason
250  // to optimize transport efficiency, however.
251  std::vector<char> message(chunk_size);
252  d_is.read(&message[0], chunk_size);
253  d_error_message = string(&message[0], chunk_size);
254  // leave the buffer and gptr(), ..., in a consistent state (empty)
255  setg(d_buffer, d_buffer, d_buffer);
256  }
257  // And zero-length END chunks here.
258  else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
259  return traits_type::not_eof(num-bytes_left_to_read);
260  }
261  // The next case is complicated because we read some data from the current
262  // chunk into 's' an some into the internal buffer.
263  else if (chunk_size > bytes_left_to_read) {
264  d_is.read(s, bytes_left_to_read);
265  if (d_is.bad()) return traits_type::eof();
266 
267  // Now slurp up the remain part of the chunk and store it in the buffer
268  uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
269  // expand the internal buffer if needed
270  if (bytes_leftover > d_buf_size) {
271  d_buf_size = chunk_size;
272  m_buffer_alloc();
273  }
274  // read the remain stuff in to d_buffer
275  d_is.read(d_buffer, bytes_leftover);
276  if (d_is.bad()) return traits_type::eof();
277 
278  setg(d_buffer, // beginning of put back area
279  d_buffer, // read position (gptr() == eback())
280  d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
281 
282  bytes_left_to_read = 0 /* -= d_is.gcount()*/;
283  }
284  else {
285  // expand the internal buffer if needed
286  if (chunk_size > d_buf_size) {
287  d_buf_size = chunk_size;
288  m_buffer_alloc();
289  }
290  // If we get a chunk that's zero bytes, Don't call read()
291  // to save the kernel context switch overhead.
292  if (chunk_size > 0) {
293  d_is.read(s, chunk_size);
294  if (d_is.bad()) return traits_type::eof();
295  bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
296  s += chunk_size;
297  }
298  }
299 
300  switch (header & CHUNK_TYPE_MASK) {
301  case CHUNK_END:
302  DBG(cerr << "Found end chunk" << endl);
303  // in this case bytes_left_to_read can be > 0 because we ran out of data
304  // before reading all the requested bytes. The next read() call will return
305  // eof; this call returns the number of bytes read and transferred to 's'.
306  done = true;
307  break;
308 
309  case CHUNK_DATA:
310  done = bytes_left_to_read == 0;
311  break;
312 
313  case CHUNK_ERR:
314  // this is pretty much the end of the show... The error message has
315  // already been read above
316  return traits_type::eof();
317 
318  default:
319  d_error = true;
320  d_error_message = "Failed to read known chunk header type.";
321  return traits_type::eof();
322  }
323  }
324 
325  return traits_type::not_eof(num-bytes_left_to_read);
326 }
327 
340 std::streambuf::int_type
342 {
343  // To read data from the chunked stream, first read the header
344  uint32_t header;
345  d_is.read((char *) &header, 4);
346 #if !BYTE_ORDER_PREFIX
347  ntohl(header);
348 #endif
349 
350  // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
351  // it holds data. In the latter case, bytes those will be read and moved into the
352  // buffer. Once those data are consumed, we'll be back here again and this read()
353  // will return EOF. See below for the other case...
354  if (d_is.eof()) return traits_type::eof();
355 #if BYTE_ORDER_PREFIX
356  if (d_twiddle_bytes) header = bswap_32(header);
357 #else
358  // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
359  if (!d_set_twiddle) {
360  d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
361  d_set_twiddle = true;
362  }
363 #endif
364 
365  uint32_t chunk_size = header & CHUNK_SIZE_MASK;
366 
367  DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
368  DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
369  DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
370 
371  // Handle the case where the buffer is not big enough to hold the incoming chunk
372  if (chunk_size > d_buf_size) {
373  d_buf_size = chunk_size;
374  m_buffer_alloc();
375  }
376 
377  // If the END chunk has zero bytes, return EOF. See above for more information
378  if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
379 
380  // Read the chunk's data
381  d_is.read(d_buffer, chunk_size);
382  DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
383  if (d_is.bad()) return traits_type::eof();
384 
385  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
386  setg(d_buffer, // beginning of put back area
387  d_buffer, // read position (gptr() == eback())
388  d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
389 
390  DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
391 
392  switch (header & CHUNK_TYPE_MASK) {
393  case CHUNK_END:
394  DBG(cerr << "Found end chunk" << endl);
395  return traits_type::not_eof(chunk_size);
396 
397  case CHUNK_DATA:
398  return traits_type::not_eof(chunk_size);
399 
400  case CHUNK_ERR:
401  // this is pretty much the end of the show... Assume the buffer/chunk holds
402  // the error message text.
403  d_error = true;
404  d_error_message = string(d_buffer, chunk_size);
405  return traits_type::eof();
406 
407  default:
408  d_error = true;
409  d_error_message = "Failed to read known chunk header type.";
410  return traits_type::eof();
411  }
412 }
413 
414 }
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
top level DAP object to house generic methods
Definition: AlarmHandler.h:35
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:94