LCOV - code coverage report
Current view: top level - common/stream - iconv_stream.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 91 187 48.7 %
Date: 2024-04-25 21:43:30 Functions: 6 8 75.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /* Generic stream handling code such as init and close */
      14             : 
      15             : #include "monetdb_config.h"
      16             : #include "stream.h"
      17             : #include "stream_internal.h"
      18             : 
      19             : 
      20             : /* ------------------------------------------------------------------ */
      21             : /* streams working on a substream, converting character sets using iconv */
      22             : 
      23             : #ifdef HAVE_ICONV
      24             : 
      25             : struct icstream {
      26             :         iconv_t cd;
      27             :         char buffer[BUFSIZ];
      28             :         size_t buflen;
      29             :         bool eof;
      30             : };
      31             : 
      32             : static ssize_t
      33         334 : ic_write(stream *restrict s, const void *restrict buf, size_t elmsize, size_t cnt)
      34             : {
      35         334 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
      36         334 :         char *inbuf;
      37         334 :         size_t inbytesleft = elmsize * cnt;
      38         334 :         char *bf = NULL;
      39             : 
      40         334 :         if (ic == NULL) {
      41           0 :                 mnstr_set_error(s, MNSTR_WRITE_ERROR, "stream already ended");
      42           0 :                 goto bailout;
      43             :         }
      44             : 
      45             :         /* if unconverted data from a previous call remains, add it to
      46             :          * the start of the new data, using temporary space */
      47         334 :         if (ic->buflen > 0) {
      48           7 :                 bf = malloc(ic->buflen + inbytesleft);
      49           7 :                 if (bf == NULL) {
      50             :                         /* cannot allocate memory */
      51           0 :                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "out of memory");
      52           0 :                         goto bailout;
      53             :                 }
      54           7 :                 memcpy(bf, ic->buffer, ic->buflen);
      55           7 :                 memcpy(bf + ic->buflen, buf, inbytesleft);
      56           7 :                 buf = bf;
      57           7 :                 inbytesleft += ic->buflen;
      58           7 :                 ic->buflen = 0;
      59             :         }
      60         334 :         inbuf = (char *) buf;
      61         659 :         while (inbytesleft > 0) {
      62         334 :                 char *outbuf = ic->buffer;
      63         334 :                 size_t outbytesleft = sizeof(ic->buffer);
      64             : 
      65         334 :                 if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
      66           9 :                         switch (errno) {
      67           2 :                         case EILSEQ:
      68             :                                 /* invalid multibyte sequence encountered */
      69           2 :                                 mnstr_set_error(s, MNSTR_WRITE_ERROR, "invalid multibyte sequence");
      70           2 :                                 goto bailout;
      71           7 :                         case EINVAL:
      72             :                                 /* incomplete multibyte sequence
      73             :                                  * encountered flush what has been
      74             :                                  * converted */
      75           7 :                                 if (outbytesleft < sizeof(ic->buffer) &&
      76           0 :                                     mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
      77           0 :                                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "incomplete multibyte sequence");
      78           0 :                                         goto bailout;
      79             :                                 }
      80             :                                 /* remember what hasn't been converted */
      81           7 :                                 if (inbytesleft > sizeof(ic->buffer)) {
      82             :                                         /* ridiculously long multibyte
      83             :                                          * sequence, so return
      84             :                                          * error */
      85           0 :                                         mnstr_set_error(s, MNSTR_WRITE_ERROR, "multibyte sequence too long");
      86           0 :                                         goto bailout;
      87             :                                 }
      88           7 :                                 memcpy(ic->buffer, inbuf, inbytesleft);
      89           7 :                                 ic->buflen = inbytesleft;
      90           7 :                                 if (bf)
      91           0 :                                         free(bf);
      92           7 :                                 return (ssize_t) cnt;
      93             :                         case E2BIG:
      94             :                                 /* not enough space in output buffer */
      95             :                                 break;
      96           0 :                         default:
      97           0 :                                 mnstr_set_error_errno(s, MNSTR_WRITE_ERROR, "iconv reported an error");
      98           0 :                                 goto bailout;
      99             :                         }
     100             :                 }
     101         325 :                 if (mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
     102           0 :                         mnstr_copy_error(s, s->inner);
     103           0 :                         goto bailout;
     104             :                 }
     105             :         }
     106         325 :         if (bf)
     107           6 :                 free(bf);
     108         325 :         return (ssize_t) cnt;
     109             : 
     110           2 :         bailout:
     111           2 :         assert(s->errkind != MNSTR_NO__ERROR);
     112           2 :         if (bf)
     113           1 :                 free(bf);
     114             :         return -1;
     115             : }
     116             : 
     117             : static ssize_t
     118           0 : ic_read(stream *restrict s, void *restrict buf, size_t elmsize, size_t cnt)
     119             : {
     120           0 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     121           0 :         char *inbuf;
     122           0 :         size_t inbytesleft;
     123           0 :         char *outbuf;
     124           0 :         size_t outbytesleft;
     125             : 
     126           0 :         if (ic == NULL) {
     127           0 :                 mnstr_set_error(s, MNSTR_READ_ERROR, "stream already ended");
     128           0 :                 return -1;
     129             :         }
     130           0 :         inbuf = ic->buffer;
     131           0 :         inbytesleft = ic->buflen;
     132           0 :         outbuf = (char *) buf;
     133           0 :         outbytesleft = elmsize * cnt;
     134           0 :         if (outbytesleft == 0)
     135             :                 return 0;
     136           0 :         while (outbytesleft > 0 && !ic->eof) {
     137           0 :                 if (ic->buflen == sizeof(ic->buffer)) {
     138             :                         /* ridiculously long multibyte sequence, return error */
     139           0 :                         mnstr_set_error(s, MNSTR_READ_ERROR, "multibyte sequence too long");
     140           0 :                         return -1;
     141             :                 }
     142             : 
     143           0 :                 switch (mnstr_read(s->inner, ic->buffer + ic->buflen, 1, 1)) {
     144           0 :                 case 1:
     145             :                         /* expected: read one byte */
     146           0 :                         ic->buflen++;
     147           0 :                         inbytesleft++;
     148           0 :                         break;
     149           0 :                 case 0:
     150             :                         /* end of file */
     151           0 :                         ic->eof = true;
     152           0 :                         if (ic->buflen > 0) {
     153             :                                 /* incomplete input */
     154           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "incomplete input");
     155           0 :                                 return -1;
     156             :                         }
     157           0 :                         if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
     158             :                                 /* some error occurred */
     159           0 :                                 mnstr_set_error_errno(s, MNSTR_READ_ERROR, "iconv reported an error");
     160           0 :                                 return -1;
     161             :                         }
     162           0 :                         goto exit_func; /* double break */
     163           0 :                 default:
     164             :                         /* error */
     165           0 :                         mnstr_copy_error(s, s->inner);
     166           0 :                         return -1;
     167             :                 }
     168           0 :                 if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
     169           0 :                         switch (errno) {
     170           0 :                         case EILSEQ:
     171           0 :                                 mnstr_set_error(s, MNSTR_READ_ERROR, "invalid multibyte sequence");
     172           0 :                                 return -1;
     173             :                         case EINVAL:
     174             :                                 /* incomplete multibyte sequence encountered */
     175             :                                 break;
     176           0 :                         case E2BIG:
     177             :                                 /* not enough space in output buffer,
     178             :                                  * return what we have, saving what's in
     179             :                                  * the buffer */
     180           0 :                                 goto exit_func;
     181           0 :                         default:
     182           0 :                                 mnstr_set_error_errno(s, MNSTR_READ_ERROR, "iconv reported an error");
     183           0 :                                 return -1;
     184             :                         }
     185             :                 }
     186           0 :                 if (inbytesleft == 0) {
     187             :                         /* converted complete buffer */
     188           0 :                         inbuf = ic->buffer;
     189           0 :                         ic->buflen = 0;
     190             :                 }
     191             :         }
     192           0 :       exit_func:
     193           0 :         if (inbuf > ic->buffer)
     194           0 :                 memmove(ic->buffer, inbuf, inbytesleft);
     195           0 :         ic->buflen = inbytesleft;
     196           0 :         if (outbytesleft == elmsize * cnt && !s->inner->eof) {
     197             :                 /* if we're returning data, we must pass on EOF on the
     198             :                  * next call (i.e. keep ic->eof set), otherwise we
     199             :                  * must clear it so that the next call will cause the
     200             :                  * underlying stream to be read again */
     201           0 :                 ic->eof = false;
     202             :         }
     203           0 :         return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
     204             : }
     205             : 
     206             : static int
     207           8 : ic_flush(stream *s, mnstr_flush_level flush_level)
     208             : {
     209           8 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     210           8 :         char *outbuf;
     211           8 :         size_t outbytesleft;
     212             : 
     213           8 :         if (ic == NULL)
     214             :                 return -1;
     215           8 :         outbuf = ic->buffer;
     216           8 :         outbytesleft = sizeof(ic->buffer);
     217             :         /* if unconverted data from a previous call remains, it was an
     218             :          * incomplete multibyte sequence, so an error */
     219          16 :         if (ic->buflen > 0 ||
     220           8 :             iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1 ||
     221           8 :             (outbytesleft < sizeof(ic->buffer) &&
     222           0 :              mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0)) {
     223           0 :                 mnstr_copy_error(s, s->inner);
     224           0 :                 return -1;
     225             :         }
     226           8 :         return mnstr_flush(s->inner, flush_level);
     227             : }
     228             : 
     229             : static void
     230           4 : ic_close(stream *s)
     231             : {
     232           4 :         struct icstream *ic = (struct icstream *) s->stream_data.p;
     233             : 
     234           4 :         if (ic) {
     235           4 :                 if (!s->readonly)
     236           4 :                         ic_flush(s, MNSTR_FLUSH_DATA);
     237           4 :                 iconv_close(ic->cd);
     238           4 :                 close_stream(s->inner);
     239           4 :                 s->inner = NULL;
     240           4 :                 free(s->stream_data.p);
     241           4 :                 s->stream_data.p = NULL;
     242             :         }
     243           4 : }
     244             : 
     245             : static void
     246           4 : ic_destroy(stream *s)
     247             : {
     248           4 :         ic_close(s);
     249           4 :         mnstr_destroy(s->inner);
     250           4 :         destroy_stream(s);
     251           4 : }
     252             : 
     253             : static stream *
     254           4 : ic_open(iconv_t cd, stream *restrict ss, const char *restrict name)
     255             : {
     256           4 :         stream *s;
     257           4 :         struct icstream *ic;
     258             : 
     259           4 :         if (ss->isutf8)
     260             :                 return ss;
     261           4 :         if ((s = create_wrapper_stream(name, ss)) == NULL)
     262             :                 return NULL;
     263           4 :         s->read = ic_read;
     264           4 :         s->write = ic_write;
     265           4 :         s->close = ic_close;
     266           4 :         s->destroy = ic_destroy;
     267           4 :         s->flush = ic_flush;
     268           4 :         ic = malloc(sizeof(struct icstream));
     269           4 :         if (ic == NULL) {
     270           0 :                 mnstr_destroy(s);
     271           0 :                 mnstr_set_open_error(s->name, errno, NULL);
     272           0 :                 return NULL;
     273             :         }
     274           4 :         s->stream_data.p = ic;
     275           4 :         *ic = (struct icstream) {
     276             :                 .cd = cd,
     277             :                 .buflen = 0,
     278             :                 .eof = false,
     279             :         };
     280           4 :         return s;
     281             : }
     282             : 
     283             : stream *
     284           0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     285             : {
     286           0 :         stream *s;
     287           0 :         iconv_t cd;
     288             : 
     289           0 :         if (ss == NULL || charset == NULL || name == NULL)
     290             :                 return NULL;
     291             : #ifdef STREAM_DEBUG
     292             :         fprintf(stderr, "iconv_rstream %s %s\n", charset, name);
     293             : #endif
     294           0 :         if (ss->isutf8)
     295             :                 return ss;
     296           0 :         cd = iconv_open("utf-8", charset);
     297           0 :         if (cd == (iconv_t) -1) {
     298           0 :                 mnstr_set_open_error(name, errno, "iconv_open");
     299           0 :                 return NULL;
     300             :         }
     301           0 :         s = ic_open(cd, ss, name);
     302           0 :         if (s == NULL) {
     303           0 :                 iconv_close(cd);
     304           0 :                 return NULL;
     305             :         }
     306           0 :         s->readonly = true;
     307           0 :         s->isutf8 = true;
     308           0 :         return s;
     309             : }
     310             : 
     311             : stream *
     312           4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     313             : {
     314           4 :         stream *s;
     315           4 :         iconv_t cd;
     316             : 
     317           4 :         if (ss == NULL || charset == NULL || name == NULL)
     318             :                 return NULL;
     319             : #ifdef STREAM_DEBUG
     320             :         fprintf(stderr, "iconv_wstream %s %s\n", charset, name);
     321             : #endif
     322           4 :         if (ss->isutf8)
     323             :                 return ss;
     324           4 :         cd = iconv_open(charset, "utf-8");
     325           4 :         if (cd == (iconv_t) -1) {
     326           0 :                 mnstr_set_open_error(name, errno, "iconv_open");
     327           0 :                 return NULL;
     328             :         }
     329           4 :         s = ic_open(cd, ss, name);
     330           4 :         if (s == NULL) {
     331           0 :                 iconv_close(cd);
     332           0 :                 return NULL;
     333             :         }
     334           4 :         s->readonly = false;
     335           4 :         return s;
     336             : }
     337             : 
     338             : #else
     339             : stream *
     340             : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     341             : {
     342             :         if (ss == NULL || charset == NULL || name == NULL)
     343             :                 return NULL;
     344             :         if (ss->isutf8 ||
     345             :             strcmp(charset, "utf-8") == 0 ||
     346             :             strcmp(charset, "UTF-8") == 0 ||
     347             :             strcmp(charset, "UTF8") == 0)
     348             :                 return ss;
     349             : 
     350             :         mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
     351             :         return NULL;
     352             : }
     353             : 
     354             : stream *
     355             : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
     356             : {
     357             :         if (ss == NULL || charset == NULL || name == NULL)
     358             :                 return NULL;
     359             :         if (ss->isutf8 ||
     360             :             strcmp(charset, "utf-8") == 0 ||
     361             :             strcmp(charset, "UTF-8") == 0 ||
     362             :             strcmp(charset, "UTF8") == 0)
     363             :                 return ss;
     364             : 
     365             :         mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
     366             :         return NULL;
     367             : }
     368             : #endif /* HAVE_ICONV */

Generated by: LCOV version 1.14