libpqxx
The C++ client library for PostgreSQL
 
Loading...
Searching...
No Matches
array.hxx
1/* Handling of SQL arrays.
2 *
3 * DO NOT INCLUDE THIS FILE DIRECTLY; include pqxx/field instead.
4 *
5 * Copyright (c) 2000-2025, Jeroen T. Vermeulen.
6 *
7 * See COPYING for copyright license. If you did not receive a file called
8 * COPYING with this source code, please notify the distributor of this
9 * mistake, or contact the author.
10 */
11#ifndef PQXX_H_ARRAY
12#define PQXX_H_ARRAY
13
14#if !defined(PQXX_HEADER_PRE)
15# error "Include libpqxx headers as <pqxx/header>, not <pqxx/header.hxx>."
16#endif
17
18#include <algorithm>
19#include <cassert>
20#include <stdexcept>
21#include <string>
22#include <type_traits>
23#include <utility>
24#include <vector>
25
26#include "pqxx/connection.hxx"
27#include "pqxx/internal/array-composite.hxx"
28#include "pqxx/internal/encoding_group.hxx"
29#include "pqxx/internal/encodings.hxx"
30
31
32namespace pqxx
33{
34// TODO: Specialise for string_view/zview, allocate all strings in one buffer.
35
37
52template<
53 typename ELEMENT, std::size_t DIMENSIONS = 1u,
54 char SEPARATOR = array_separator<ELEMENT>>
55class array final
56{
57public:
59
68 array(std::string_view data, connection const &cx) :
69 array{data, pqxx::internal::enc_group(cx.encoding_id())}
70 {}
71
73
75 constexpr std::size_t dimensions() noexcept { return DIMENSIONS; }
76
78
82 std::array<std::size_t, DIMENSIONS> const &sizes() noexcept
83 {
84 return m_extents;
85 }
86
87 template<typename... INDEX> ELEMENT const &at(INDEX... index) const
88 {
89 static_assert(sizeof...(index) == DIMENSIONS);
90 check_bounds(index...);
91 return m_elts.at(locate(index...));
92 }
93
95
103 template<typename... INDEX> ELEMENT const &operator[](INDEX... index) const
104 {
105 static_assert(sizeof...(index) == DIMENSIONS);
106 return m_elts[locate(index...)];
107 }
108
110
115 constexpr auto cbegin() const noexcept { return m_elts.cbegin(); }
117 constexpr auto cend() const noexcept { return m_elts.cend(); }
119 constexpr auto crbegin() const noexcept { return m_elts.crbegin(); }
121 constexpr auto crend() const noexcept { return m_elts.crend(); }
122
124
127 constexpr std::size_t size() const noexcept { return m_elts.size(); }
128
130
145 constexpr auto ssize() const noexcept
146 {
147 return static_cast<std::ptrdiff_t>(size());
148 }
149
151
153 constexpr auto front() const noexcept { return m_elts.front(); }
154
156
158 constexpr auto back() const noexcept { return m_elts.back(); }
159
160private:
162
170 void check_dims(std::string_view data)
171 {
172 auto sz{std::size(data)};
173 if (sz < DIMENSIONS * 2)
174 throw conversion_error{pqxx::internal::concat(
175 "Trying to parse a ", DIMENSIONS, "-dimensional array out of '", data,
176 "'.")};
177
178 // Making some assumptions here:
179 // * The array holds no extraneous whitespace.
180 // * None of the sub-arrays can be null.
181 // * Only ASCII characters start off with a byte in the 0-127 range.
182 //
183 // Given those, the input must start with a sequence of DIMENSIONS bytes
184 // with the ASCII value for '{'; and likewise it must end with a sequence
185 // of DIMENSIONS bytes with the ASCII value for '}'.
186
187 if (data[0] != '{')
188 throw conversion_error{"Malformed array: does not start with '{'."};
189 for (std::size_t i{0}; i < DIMENSIONS; ++i)
190 if (data[i] != '{')
191 throw conversion_error{pqxx::internal::concat(
192 "Expecting ", DIMENSIONS, "-dimensional array, but found ", i, ".")};
193 if (data[DIMENSIONS] == '{')
194 throw conversion_error{pqxx::internal::concat(
195 "Tried to parse ", DIMENSIONS,
196 "-dimensional array from array data that has more dimensions.")};
197 for (std::size_t i{0}; i < DIMENSIONS; ++i)
198 if (data[sz - 1 - i] != '}')
199 throw conversion_error{
200 "Malformed array: does not end in the right number of '}'."};
201 }
202
203 // Allow fields to construct arrays passing the encoding group.
204 // Couldn't make this work through a call gate, thanks to the templating.
205 friend class ::pqxx::field;
206
207 array(std::string_view data, pqxx::internal::encoding_group enc)
208 {
209 using group = pqxx::internal::encoding_group;
210 switch (enc)
211 {
212 case group::MONOBYTE: parse<group::MONOBYTE>(data); break;
213 case group::BIG5: parse<group::BIG5>(data); break;
214 case group::EUC_CN: parse<group::EUC_CN>(data); break;
215 case group::EUC_JP: parse<group::EUC_JP>(data); break;
216 case group::EUC_KR: parse<group::EUC_KR>(data); break;
217 case group::EUC_TW: parse<group::EUC_TW>(data); break;
218 case group::GB18030: parse<group::GB18030>(data); break;
219 case group::GBK: parse<group::GBK>(data); break;
220 case group::JOHAB: parse<group::JOHAB>(data); break;
221 case group::MULE_INTERNAL: parse<group::MULE_INTERNAL>(data); break;
222 case group::SJIS: parse<group::SJIS>(data); break;
223 case group::UHC: parse<group::UHC>(data); break;
224 case group::UTF8: parse<group::UTF8>(data); break;
225 default: PQXX_UNREACHABLE; break;
226 }
227 }
228
230
233 std::size_t parse_field_end(std::string_view data, std::size_t here) const
234 {
235 auto const sz{std::size(data)};
236 if (here < sz)
237 switch (data[here])
238 {
239 case SEPARATOR:
240 ++here;
241 if (here >= sz)
242 throw conversion_error{"Array looks truncated."};
243 switch (data[here])
244 {
245 case SEPARATOR:
246 throw conversion_error{"Array contains double separator."};
247 case '}': throw conversion_error{"Array contains trailing separator."};
248 default: break;
249 }
250 break;
251 case '}': break;
252 default:
253 throw conversion_error{pqxx::internal::concat(
254 "Unexpected character in array: ",
255 static_cast<unsigned>(static_cast<unsigned char>(data[here])),
256 " where separator or closing brace expected.")};
257 }
258 return here;
259 }
260
262
267 constexpr std::size_t estimate_elements(std::string_view data) const noexcept
268 {
269 // Dirty trick: just count the number of bytes that look as if they may be
270 // separators. At the very worst we may overestimate by a factor of two or
271 // so, in exceedingly rare cases, on some encodings.
272 auto const separators{
273 std::count(std::begin(data), std::end(data), SEPARATOR)};
274 // The number of dimensions makes no difference here. It's still one
275 // separator between consecutive elements, just possibly with some extra
276 // braces as well.
277 return static_cast<std::size_t>(separators + 1);
278 }
279
280 template<pqxx::internal::encoding_group ENC>
281 void parse(std::string_view data)
282 {
283 static_assert(DIMENSIONS > 0u, "Can't create a zero-dimensional array.");
284 auto const sz{std::size(data)};
285 check_dims(data);
286
287 m_elts.reserve(estimate_elements(data));
288
289 // We discover the array's extents along each of the dimensions, starting
290 // with the final dimension and working our way towards the first. At any
291 // given point during parsing, we know the extents starting at this
292 // dimension.
293 std::size_t know_extents_from{DIMENSIONS};
294
295 // Currently parsing this dimension. We start off at -1, relying on C++'s
296 // well-defined rollover for unsigned numbers.
297 // The actual outermost dimension of the array is 0, and the innermost is
298 // at the end. But, the array as a whole is enclosed in braces just like
299 // each row. So we act like there's an anomalous "outer" dimension holding
300 // the entire array.
301 constexpr std::size_t outer{std::size_t{0u} - std::size_t{1u}};
302
303 // We start parsing at the fictional outer dimension. The input begins
304 // with opening braces, one for each dimension, so we'll start off by
305 // bumping all the way to the innermost dimension.
306 std::size_t dim{outer};
307
308 // Extent counters, one per "real" dimension.
309 // Note initialiser syntax; this zero-initialises all elements.
310 std::array<std::size_t, DIMENSIONS> extents{};
311
312 // Current parsing position.
313 std::size_t here{0};
314 PQXX_ASSUME(here <= sz);
315 while (here < sz)
316 {
317 if (data[here] == '{')
318 {
319 if (dim == outer)
320 {
321 // This must be the initial opening brace.
322 if (know_extents_from != DIMENSIONS)
323 throw conversion_error{
324 "Array text representation closed and reopened its outside "
325 "brace pair."};
326 assert(here == 0);
327 PQXX_ASSUME(here == 0);
328 }
329 else
330 {
331 if (dim >= (DIMENSIONS - 1))
332 throw conversion_error{
333 "Array seems to have inconsistent number of dimensions."};
334 ++extents[dim];
335 }
336 // (Rolls over to zero if we're coming from the outer dimension.)
337 ++dim;
338 extents[dim] = 0u;
339 ++here;
340 }
341 else if (data[here] == '}')
342 {
343 if (dim == outer)
344 throw conversion_error{"Array has spurious '}'."};
345 if (dim < know_extents_from)
346 {
347 // We just finished parsing our first row in this dimension.
348 // Now we know the array dimension's extent.
349 m_extents[dim] = extents[dim];
350 know_extents_from = dim;
351 }
352 else
353 {
354 if (extents[dim] != m_extents[dim])
355 throw conversion_error{"Rows in array have inconsistent sizes."};
356 }
357 // Bump back down to the next-lower dimension. Which may be the outer
358 // dimension, through underflow.
359 --dim;
360 ++here;
361 here = parse_field_end(data, here);
362 }
363 else
364 {
365 // Found an array element. The actual elements always live in the
366 // "inner" dimension.
367 if (dim != DIMENSIONS - 1)
368 throw conversion_error{
369 "Malformed array: found element where sub-array was expected."};
370 assert(dim != outer);
371 ++extents[dim];
372 std::size_t end;
373 switch (data[here])
374 {
375 case '\0': throw conversion_error{"Unexpected zero byte in array."};
376 case ',': throw conversion_error{"Array contains empty field."};
377 case '"': {
378 // Double-quoted string. We parse it into a buffer before parsing
379 // the resulting string as an element. This seems wasteful: the
380 // string might not contain any special characters. So it's
381 // tempting to check, and try to use a string_view and avoid a
382 // useless copy step. But. Even besides the branch prediction
383 // risk, the very fact that the back-end chose to quote the string
384 // indicates that there is some kind of special character in there.
385 // So in practice, this optimisation would only apply if the only
386 // special characters in the string were commas.
388 std::data(data), std::size(data), here);
389 // TODO: scan_double_quoted_string() with reusable buffer.
390 std::string const buf{
392 std::data(data), end, here)};
393 m_elts.emplace_back(from_string<ELEMENT>(buf));
394 }
395 break;
396 default: {
397 // Unquoted string. An unquoted string is always literal, no
398 // escaping or encoding, so we don't need to parse it into a
399 // buffer. We can just read it as a string_view.
401 std::data(data), std::size(data), here);
402 std::string_view const field{
403 std::string_view{std::data(data) + here, end - here}};
404 if (field == "NULL")
405 {
406 if constexpr (nullness<ELEMENT>::has_null)
407 m_elts.emplace_back(nullness<ELEMENT>::null());
408 else
409 throw unexpected_null{pqxx::internal::concat(
410 "Array contains a null ", type_name<ELEMENT>,
411 ". Consider making it an array of std::optional<",
412 type_name<ELEMENT>, "> instead.")};
413 }
414 else
415 m_elts.emplace_back(from_string<ELEMENT>(field));
416 }
417 }
418 here = end;
419 PQXX_ASSUME(here <= sz);
420 here = parse_field_end(data, here);
421 }
422 }
423
424 if (dim != outer)
425 throw conversion_error{"Malformed array; may be truncated."};
426 assert(know_extents_from == 0);
427 PQXX_ASSUME(know_extents_from == 0);
428
429 init_factors();
430 }
431
433 void init_factors() noexcept
434 {
435 std::size_t factor{1};
436 for (std::size_t dim{DIMENSIONS - 1}; dim > 0; --dim)
437 {
438 factor *= m_extents[dim];
439 m_factors[dim - 1] = factor;
440 }
441 }
442
444 template<typename... INDEX> std::size_t locate(INDEX... index) const noexcept
445 {
446 static_assert(
447 sizeof...(index) == DIMENSIONS,
448 "Indexing array with wrong number of dimensions.");
449 return add_index(index...);
450 }
451
452 template<typename OUTER, typename... INDEX>
453 constexpr std::size_t add_index(OUTER outer, INDEX... indexes) const noexcept
454 {
455 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
456 if constexpr (sizeof...(indexes) == 0)
457 {
458 return first;
459 }
460 else
461 {
462 static_assert(sizeof...(indexes) < DIMENSIONS);
463 // (Offset by 1 here because the outer dimension is not in there.)
464 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
465 static_assert(dimension < DIMENSIONS);
466 return first * m_factors[dimension] + add_index(indexes...);
467 }
468 }
469
471
473 template<typename OUTER, typename... INDEX>
474 constexpr void check_bounds(OUTER outer, INDEX... indexes) const
475 {
476 std::size_t const first{check_cast<std::size_t>(outer, "array index"sv)};
477 static_assert(sizeof...(indexes) < DIMENSIONS);
478 // (Offset by 1 here because the outer dimension is not in there.)
479 constexpr auto dimension{DIMENSIONS - (sizeof...(indexes) + 1)};
480 static_assert(dimension < DIMENSIONS);
481 if (first >= m_extents[dimension])
482 throw range_error{pqxx::internal::concat(
483 "Array index for dimension ", dimension, " is out of bounds: ", first,
484 " >= ", m_extents[dimension])};
485
486 // Now check the rest of the indexes, if any.
487 if constexpr (sizeof...(indexes) > 0)
488 check_bounds(indexes...);
489 }
490
492 std::vector<ELEMENT> m_elts;
493
495 std::array<std::size_t, DIMENSIONS> m_extents;
496
498
505 std::array<std::size_t, DIMENSIONS - 1> m_factors;
506};
507
508
510
530class PQXX_LIBEXPORT array_parser
531{
532public:
534 enum class juncture
535 {
537 row_start,
539 row_end,
541 null_value,
543 string_value,
545 done,
546 };
547
549
553 explicit array_parser(
554 std::string_view input,
555 internal::encoding_group = internal::encoding_group::MONOBYTE);
556
558
564 std::pair<juncture, std::string> get_next() { return (this->*m_impl)(); }
565
566private:
567 std::string_view m_input;
568
570 std::size_t m_pos = 0u;
571
573
578 using implementation = std::pair<juncture, std::string> (array_parser::*)();
579
581 static implementation
582 specialize_for_encoding(pqxx::internal::encoding_group enc);
583
585 implementation m_impl;
586
588 template<pqxx::internal::encoding_group>
589 std::pair<juncture, std::string> parse_array_step();
590
591 template<pqxx::internal::encoding_group>
592 std::string::size_type scan_double_quoted_string() const;
593 template<pqxx::internal::encoding_group>
594 std::string parse_double_quoted_string(std::string::size_type end) const;
595 template<pqxx::internal::encoding_group>
596 std::string::size_type scan_unquoted_string() const;
597 template<pqxx::internal::encoding_group>
598 std::string_view parse_unquoted_string(std::string::size_type end) const;
599
600 template<pqxx::internal::encoding_group>
601 std::string::size_type scan_glyph(std::string::size_type pos) const;
602 template<pqxx::internal::encoding_group>
603 std::string::size_type
604 scan_glyph(std::string::size_type pos, std::string::size_type end) const;
605};
606} // namespace pqxx
607#endif
constexpr auto back() const noexcept
Refer to the last element, if any.
Definition array.hxx:158
constexpr auto cend() const noexcept
Return end point of iteration.
Definition array.hxx:117
constexpr auto crbegin() const noexcept
Begin reverse iteration.
Definition array.hxx:119
ELEMENT const & operator[](INDEX... index) const
Access element (without bounds check).
Definition array.hxx:103
constexpr std::size_t size() const noexcept
Number of elements in the array.
Definition array.hxx:127
constexpr auto ssize() const noexcept
Number of elements in the array (as a signed number).
Definition array.hxx:145
constexpr std::size_t dimensions() noexcept
How many dimensions does this array have?
Definition array.hxx:75
constexpr auto cbegin() const noexcept
Begin iteration of individual elements.
Definition array.hxx:115
constexpr auto crend() const noexcept
Return end point of reverse iteration.
Definition array.hxx:121
std::array< std::size_t, DIMENSIONS > const & sizes() noexcept
Return the sizes of this array in each of its dimensions.
Definition array.hxx:82
array(std::string_view data, connection const &cx)
Parse an SQL array, read as text from a pqxx::result or stream.
Definition array.hxx:68
constexpr auto front() const noexcept
Refer to the first element, if any.
Definition array.hxx:153
Internal items for libpqxx' own use. Do not use these yourself.
Definition encodings.cxx:33
std::string concat(TYPE... item)
Efficiently combine a bunch of items into one big string.
Definition concat.hxx:31
std::string_view parse_unquoted_string(char const input[], std::size_t end, std::size_t pos)
Parse an unquoted array entry or cfield of a composite-type field.
Definition array-composite.hxx:149
std::size_t scan_double_quoted_string(char const input[], std::size_t size, std::size_t pos)
Definition array-composite.hxx:20
std::size_t scan_unquoted_string(char const input[], std::size_t size, std::size_t pos)
Find the end of an unquoted string in an array or composite-type value.
Definition array-composite.hxx:131
std::string parse_double_quoted_string(char const input[], std::size_t end, std::size_t pos)
Un-quote and un-escape a double-quoted SQL string.
Definition array-composite.hxx:84
The home of all libpqxx classes, functions, templates, etc.
Definition array.cxx:27
std::string const type_name
A human-readable name for a type, used in error messages and such.
Definition strconv.hxx:80
constexpr char array_separator
Element separator between SQL array elements of this type.
Definition strconv.hxx:559
T from_string(field const &value)
Convert a field's value to type T.
Definition field.hxx:548
TO check_cast(FROM value, std::string_view description)
Cast a numeric value to another type, or throw if it underflows/overflows.
Definition util.hxx:153
static TYPE null()
Return a null value.
static bool has_null
Does this type have a null value?
Definition strconv.hxx:93