/* * charconv_example.c * * Example of a character-conversion callout for MX V5.2 * and V5.3. * Copyright (c) 2008, Matthew Madison. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright owner nor the names of any other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * MODULE DESCRIPTION: * * This modules contains routines that implement character * conversion for use with MX's VMS MAIL interface. You should * use this module to provide appropriate conversion between the * local native character set and a standard Internet character * set as used in e-mail messages. * * Logical names are used to specify the local and network * character sets used by this module; however, only one of each * is supported. A relatively simple extension could allow for * support of multiple network character sets, if needed. * * The code could also be modified to provide selection of * the local character set name on a per-user basis. Note, * however, that the per-user character set selection applies only * to message contents, not VMS MAIL headers. * * This module has built-in support for conversion between * the DEC Multinational Character Set (local) and ISO Latin-1 (network). * For all other character set combinations, it relies on the * iconv() routines provided in the DEC C Run-Time Library; * consult the DEC C documentation for further information on * these routines. * * See MX_EXAMPLES_DIR:CHARCONV_EXAMPLE_README.TXT for more * information. * * NOTE: This module was written for recent versions of DEC C * and OpenVMS. You may need to modify the code if you * are using the VAX C compiler and/or have an older version * of the operating system. * */ #include #include #include #include #include #include #include #define __NEW_STARLET #include #include #include #include #include #include #define OK(x) $VMS_STATUS_SUCCESS(x) /* * Local type definitions */ typedef u_int32_t vms_status_t; typedef enum { CHARCONV__LOCAL_TO_NETWORK = 1, CHARCONV__NETWORK_TO_LOCAL = 2 } conv_code_t; typedef enum { CHARCONV__LOCAL_CHARSET = 1, CHARCONV__NETWORK_CHARSET = 2 } charset_code_t; typedef enum { BBOOL_FALSE = 0, BBOOL_TRUE = 1 } bbool_t; #define CTX_S_CSNAME 64 #define CTX_S_BUFFER 65536 typedef struct { iconv_t ctx_cd; conv_code_t ctx_direction; int ctx_mcs_latin1; char *ctx_bufptr; char ctx_lcsname[CTX_S_CSNAME]; char ctx_ncsname[CTX_S_CSNAME]; } context_t; /* * Forward declarations */ vms_status_t INIT(const conv_code_t *code, void **ctxptr, u_int16_t *lclcslen, struct dsc$descriptor *lclcs, const struct dsc$descriptor *netcs, const struct dsc$descriptor *usrnam); vms_status_t CONVERT(void **ctxptr, const struct dsc$descriptor *instr, u_int16_t *outlen, struct dsc$descriptor *outstr, bbool_t *converted, u_int16_t *remain); vms_status_t GETCSNAME(void **ctxptr, const charset_code_t *code, u_int16_t *len, struct dsc$descriptor *name); vms_status_t FINISH(void **ctxptr); static int get_logical(const char *lognam, char *buf, size_t buflen); /* * Local static storage */ static int32_t context_size = sizeof(context_t); static const unsigned char mcs_to_latin1[128] = { 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0x20, 0xA1, 0xA2, 0xA3, 0x20, 0xA5, 0xA6, 0xA7, 0xA4, 0xA9, 0xAA, 0xAB, 0x20, 0x20, 0x20, 0x20, 0xB0, 0xB1, 0xB2, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0x20, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0x20, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0x20, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFF, 0x20, 0x20 }; static const unsigned char latin1_to_mcs[128] = { 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, 0xA0, 0xA1, 0xA2, 0xA3, 0xA8, 0xA5, 0x20, 0xA7, 0x20, 0xA9, 0xAA, 0xAB, 0x20, 0x20, 0x20, 0x20, 0xB0, 0xB1, 0xB2, 0xB3, 0x20, 0xB5, 0xB6, 0xB7, 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0x20, 0xBF, 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0x20, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0x20, 0xDF, 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0x20, 0xFD }; /* * ROUTINE: INIT * * DESCRIPTION: * Initializes for a character conversion. * * This routine should fill in the local character set name even * if it returns a failure status. * * If the netcs parameter is NULL, the caller is expecting * conversion between the local character set and the default * network character set, or vice-versa. If the netcs parameter * is non-NULL, the conversion is between a character set * explicitly named in a MIME header and the local character set. * * In MX V5.3, the usrnam parameter points to a character string * with the username of the local user, so that character set * customization may be performed on a per-user basis. * * PARAMETERS: * code: conv_code_t, read only, by reference * ctxptr: address of context, write only, by reference * lclcslen: word_unsigned, write only, by reference * lclcs: char_string, write only, by descriptor * netcs: char_string, read only, by descriptor (optional) * usrnam: char_string, read only, by descriptor (optional) * * RETURNS: VMS condition value * SS$_NORMAL: initialization successful; ready for conversion * SS$_BADPARAM: invalid parameter, or: * - local and network character sets are identical, no conversion needed * - no conversion available for the specified character set */ vms_status_t INIT (const conv_code_t *code, void **ctxptr, u_int16_t *lclcslen, struct dsc$descriptor *lclcs, const struct dsc$descriptor *netcs, const struct dsc$descriptor *usrnam_optional_arg__) { vms_status_t status; char lbuf[65], nbuf[65], *cp; const char *ptr; int32_t ctxsize; u_int16_t len; context_t ctx, *ctxp; const struct dsc$descriptor *usrnam; unsigned int argc; if (*code != CHARCONV__LOCAL_TO_NETWORK && *code != CHARCONV__NETWORK_TO_LOCAL) return SS$_BADPARAM; va_count(argc); usrnam = (argc > 5 ? usrnam_optional_arg__ : 0); memset(&ctx, 0, sizeof(ctx)); ctx.ctx_direction = *code; if (!get_logical("MX_SITE_LOCAL_CHARSET", ctx.ctx_lcsname, sizeof(ctx.ctx_lcsname))) strcpy(ctx.ctx_lcsname, "dec-mcs"); /* * Check for mapping to a DEC C character set name */ cp = strchr(ctx.ctx_lcsname, '='); if (0 == cp) strcpy(lbuf, ctx.ctx_lcsname); else { *cp++ = '\0'; strcpy(lbuf, cp); } if (!get_logical("MX_SITE_NETWORK_CHARSET", ctx.ctx_ncsname, sizeof(ctx.ctx_ncsname))) strcpy(ctx.ctx_ncsname, "iso-8859-1"); /* * Check for mapping to a DEC C character set name */ cp = strchr(ctx.ctx_ncsname, '='); if (0 == cp) strcpy(nbuf, ctx.ctx_ncsname); else { *cp++ = '\0'; strcpy(nbuf, cp); } /* * Always fill in the local character set name */ len = strlen(ctx.ctx_lcsname); if (lclcslen != 0) *lclcslen = len; if (lclcs != 0) LIB$SCOPY_R_DX(&len, ctx.ctx_lcsname, lclcs); /* * If a network character set name was provided, * use that instead of the default */ if (netcs != 0) { len = netcs->dsc$w_length; ptr = netcs->dsc$a_pointer; if (len > CTX_S_CSNAME-1) len = CTX_S_CSNAME-1; memcpy(ctx.ctx_ncsname, ptr, len); ctx.ctx_ncsname[len] = '\0'; strcpy(nbuf, ctx.ctx_ncsname); } /* * If the character set names are identical, no conversion * needed. Otherwise, if we're using the built-in converter, * set a flag in the context. Finally, if we aren't using * the built-in converter, initialize the DEC C RTL converter. */ if (strcasecmp(ctx.ctx_lcsname, ctx.ctx_ncsname) == 0) return SS$_BADPARAM; /* no translation needed */ if (strcasecmp(ctx.ctx_lcsname, "DEC-MCS") == 0 && strcasecmp(ctx.ctx_ncsname, "ISO-8859-1") == 0) ctx.ctx_mcs_latin1 = 1; else { if (*code == CHARCONV__LOCAL_TO_NETWORK) ctx.ctx_cd = iconv_open(nbuf, lbuf); else ctx.ctx_cd = iconv_open(lbuf, nbuf); if (ctx.ctx_cd == (iconv_t)(-1)) return SS$_BADPARAM; } /* * If we are successful to this point, allocate and * initialize a context block and conversion buffer. */ ctxsize = context_size + CTX_S_BUFFER; status = LIB$GET_VM(&ctxsize, &ctxp); if (!OK(status)) { if (!ctx.ctx_mcs_latin1) iconv_close(ctx.ctx_cd); return status; } memcpy(ctxp, &ctx, context_size); ctxp->ctx_bufptr = (void *) (ctxp + 1); *ctxptr = ctxp; return SS$_NORMAL; } /* INIT */ /* * ROUTINE: CONVERT * * DESCRIPTION: * Converts a character string. * * PARAMETERS: * ctxptr: context pointer, modify, by reference * instr: char_string, read only, by descriptor * outlen: word_unsigned, write only, by reference * outstr: char_string, write only, by descriptor * converted: BLISS boolean value, write only, by reference * remain: word_unsigned, write only, by reference * * RETURNS: VMS condition value */ vms_status_t CONVERT (void **ctxptr, const struct dsc$descriptor *instr, u_int16_t *outlen, struct dsc$descriptor *outstr, bbool_t *converted, u_int16_t *remain) { context_t *ctx = *ctxptr; char *inp; char *outp; size_t inlen, outmax, count, inremain; bbool_t did_one; /* * The input and output descriptors will always be either * CLASS_S or CLASS_D. * * If the output descriptor is CLASS_D (dynamic string descriptor), * we perform the conversion into our intermediate buffer, then * copy the result to the output string using LIB$SCOPY_R_DX. * Otherwise, we can perform the conversion directly into the * static string provided by the caller. */ inp = instr->dsc$a_pointer; inremain = inlen = instr->dsc$w_length; if (outstr->dsc$b_class == DSC$K_CLASS_D) { outp = ctx->ctx_bufptr; outmax = CTX_S_BUFFER; } else { outp = outstr->dsc$a_pointer; outmax = outstr->dsc$w_length; } did_one = BBOOL_FALSE; if (ctx->ctx_mcs_latin1) { /* * Use our built-in conversion table. Note that for DEC MCS * and ISO-Latin-1, 0x00-0x7F are identical (both US-ASCII). */ for (count = 0; count < outmax; count++) { if (*inp & 0x80) { *outp++ = (ctx->ctx_direction == CHARCONV__LOCAL_TO_NETWORK ? mcs_to_latin1[*inp & 0x7F] : latin1_to_mcs[*inp & 0x7F]); inp += 1; did_one = BBOOL_TRUE; } else *outp++ = *inp++; inremain -= 1; } } else { /* * Use iconv() to convert. If we run into a character that cannot * be converted (EILSEQ error), just copy the input character directly * to the output string. If any other error occurs, we break out of * the loop. * * iconv() returns the count of "identity" conversions. We know there * was an actual conversion if that return count is less than the number * of characters provided in the input string. */ size_t outremain = outmax, result; while (inremain > 0 && outremain > 0) { size_t inremain_in = inremain; result = iconv(ctx->ctx_cd, &inp, &inremain, &outp, &outremain); if (result != (size_t) -1) { if (result < inremain_in) did_one = BBOOL_TRUE; } else if (errno != EILSEQ) break; if (inremain == 0 || outremain == 0) break; *outp++ = *inp++; inremain--; outremain--; } count = outmax - outremain; } /* * Return the actual length of the output string. */ if (outlen != 0) *outlen = count; /* * Let the caller know whether or not any of the characters * were actually converted. */ if (converted != 0) *converted = did_one; /* * Let the caller know how many characters in the input string * were NOT converted. */ if (remain != 0) *remain = inremain; /* * If the output string was dynamic, copy to it from our * conversion buffer. */ if (outstr->dsc$b_class == DSC$K_CLASS_D) { u_int16_t len = count; return LIB$SCOPY_R_DX (&len, ctx->ctx_bufptr, outstr); } else return SS$_NORMAL; } /* CONVERT */ /* * ROUTINE: GETCSNAME * * DESCRIPTION: * Returns a character set name. * * PARAMETERS: * ctxptr: address of context, write only, by reference * code: charset_code_t, read only, by reference * xlen: word_unsigned, write only, by reference * name: char_string, write only, by descriptor * * RETURNS: VMS condition value */ vms_status_t GETCSNAME (void **ctxptr, const charset_code_t *code, u_int16_t *xlen, struct dsc$descriptor *name) { context_t *ctx = *ctxptr; u_int16_t len; char *namep; vms_status_t status; switch (*code) { default: return SS$_BADPARAM; break; case CHARCONV__LOCAL_CHARSET: len = strlen(ctx->ctx_lcsname); namep = ctx->ctx_lcsname; break; case CHARCONV__NETWORK_CHARSET: len = strlen(ctx->ctx_ncsname); namep = ctx->ctx_ncsname; break; } status = LIB$SCOPY_R_DX (&len, namep, name); if (OK(status) && xlen != 0) *xlen = len; return status; } /* GETCSNAME */ /* * ROUTINE: FINISH * * DESCRIPTION: * Cleans up after a conversion sequence. * * PARAMETERS: * ctxptr: address of context, write only, by reference * * RETURNS: VMS condition value */ vms_status_t FINISH (void **ctxptr) { context_t *ctx = *ctxptr; int32_t ctxsize = context_size + CTX_S_BUFFER; if (!ctx->ctx_mcs_latin1) iconv_close(ctx->ctx_cd); LIB$FREE_VM(&ctxsize, &ctx); *ctxptr = 0; return SS$_NORMAL; } /* FINISH */ /* * ROUTINE: get_logical * * DESCRIPTION: * Translates a logical name. * * PARAMETERS: * lognam: char_string, read only, by reference (null-terminated) * buf: char_string, write only, by reference (null-terminated) * bufsize: size_t, read only, by value * * RETURNS: int * 0: error * non-zero: success */ static int get_logical (const char *lognam, char *buf, size_t bufsize) { struct item_list_3 { u_int16_t bufsiz; u_int16_t itmcod; void *bufadr; u_int16_t *retlen; u_int32_t terminator; } lnmlst; struct dsc$descriptor dsc; u_int16_t retlen; static $DESCRIPTOR(lnm_file_dev, "LNM$FILE_DEV"); lnmlst.bufsiz = bufsize - 1; lnmlst.itmcod = LNM$_STRING; lnmlst.bufadr = buf; lnmlst.retlen = &retlen; lnmlst.terminator = 0; dsc.dsc$b_class = DSC$K_CLASS_S; dsc.dsc$b_dtype = DSC$K_DTYPE_T; dsc.dsc$w_length = strlen(lognam); dsc.dsc$a_pointer = (void *) lognam; if (!OK(SYS$TRNLNM (0, &lnm_file_dev, &dsc, 0, &lnmlst))) return 0; if (retlen == 0) return 0; buf[retlen] = '\0'; return 1; } /* get_logical */