Viewing file:
utf7.c (8.3 KB) -rw-r--r--Select action/file-type:

(
+) |

(
+) |

(
+) |
Code (
+) |
Session (
+) |

(
+) |
SDB (
+) |

(
+) |

(
+) |

(
+) |

(
+) |

(
+) |
/*
** Copyright 2003 Double Precision, Inc.
** See COPYING for distribution information.
**
** $Id: utf7.c,v 1.3 2004/05/23 14:28:25 mrsam Exp $
*/
#include "unicode.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
static const char base64tab[]=
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
struct dec_base64_struct {
unsigned char base64buf[4];
int cnt;
unicode_char uc1;
int ucnt;
int flushing;
int flushed;
};
/* Poor man's base64 decoder */
static int dec_b64_uchar(struct dec_base64_struct *dc,
unsigned char uuc,
unicode_char **buffer,
size_t *buflen)
{
if (dc->flushed)
return 0; /* Flushing trailing junk */
dc->uc1 <<= 8;
dc->uc1 |= uuc;
++dc->ucnt;
if (dc->ucnt == 2)
{
if (dc->uc1 < 0xD800 || dc->uc1 > 0xDFFF)
/* Not surrogate pair */
{
if ( *buffer )
(*buffer)[*buflen]=dc->uc1;
++*buflen;
dc->uc1=0;
dc->ucnt=0;
if (dc->flushing)
dc->flushed=1;
return (0);
}
if (dc->uc1 > 0xDBFF)
return -1; /* Bad surrogate pair */
return 0;
}
if (dc->ucnt == 4)
{
unicode_char uc2= (dc->uc1 >> 16) & 0xFFFF;
dc->uc1 &= 0xFFFF;
if (dc->uc1 < 0xDC00 || dc->uc1 > 0xDFFF)
return -1;
if (*buffer)
{
(*buffer)[*buflen] = ((uc2 & 0x3FF) << 10) |
(dc->uc1 & 0x3FF);
}
++*buflen;
dc->uc1=0;
dc->ucnt=0;
if (dc->flushing)
dc->flushed=1;
return (0);
}
return 0;
}
static int dec_b64_char(struct dec_base64_struct *dc,
char c,
unicode_char **buffer,
size_t *buflen)
{
dc->base64buf[dc->cnt]=c;
if (++dc->cnt >= 4) /* Four characters to base64 decode */
{
char a,b,c;
int w=dc->base64buf[0];
int x=dc->base64buf[1];
int y=dc->base64buf[2];
int z=dc->base64buf[3];
a= (w << 2) | (x >> 4);
b= (x << 4) | (y >> 2);
c= (y << 6) | z;
dc->cnt=0;
if (dec_b64_uchar(dc, a, buffer, buflen))
return -1;
if (dec_b64_uchar(dc, b, buffer, buflen))
return -1;
if (dec_b64_uchar(dc, c, buffer, buflen))
return -1;
}
return 0;
}
static unicode_char *tou(const struct unicode_info *foo, const char *p,
int *err)
{
int pass;
size_t i;
unicode_char *buffer=NULL;
size_t buflen=0;
/* Two passes. Count the output, alloc buffer, do it */
for (pass=0; pass<2; pass++)
{
if (pass)
{
if ((buffer=malloc((buflen+1)*sizeof(unicode_char)))
== NULL)
return NULL;
}
buflen=0;
for (i=0; p[i]; i++)
{
char *q;
struct dec_base64_struct dc;
if (p[i] != '+')
{
if (buffer)
{
buffer[buflen]=(unsigned char)p[i];
}
++buflen;
continue;
}
if (p[++i] == 0)
break;
if (p[i] == '-')
{
if (buffer)
buffer[buflen]='+';
++buflen;
continue;
}
dc.cnt=0;
dc.ucnt=0;
dc.uc1=0;
dc.flushing=0;
dc.flushed=0;
while ( p[i] && (q=strchr(base64tab, p[i])) != NULL)
{
if (dec_b64_char(&dc, (q-base64tab),
&buffer,
&buflen))
{
if (err)
{
*err=i;
errno=EINVAL;
return NULL;
}
/* Recover from decoding error */
dc.cnt=0;
dc.ucnt=0;
dc.uc1=0;
}
++i;
}
dc.flushing=1;
while (dc.cnt > 0)
{
if (dec_b64_char(&dc, 0,
&buffer,
&buflen))
{
if (err)
{
*err=i;
errno=EINVAL;
return NULL;
}
dc.cnt=0;
dc.ucnt=0;
dc.uc1=0;
}
}
if (p[i] == 0)
break;
if (p[i] != '-')
--i;
}
if (pass)
buffer[buflen]=0;
}
return buffer;
}
/* Poor man's base64 encoder */
struct enc_base64_struct {
char base64buf[3];
int cnt;
int clip;
};
static void encode_base64_char(struct enc_base64_struct *p,
char c,
char **buffer,
size_t *buflen)
{
p->base64buf[p->cnt]=c;
if (++p->cnt >= 3) /* Encode three octets in base64 */
{
int a, b, c;
int d, e, f, g;
a=(unsigned char)p->base64buf[0];
b=(unsigned char)p->base64buf[1];
c=(unsigned char)p->base64buf[2];
d=base64tab[ a >> 2 ];
e=base64tab[ ((a & 3 ) << 4) | (b >> 4)];
f=base64tab[ ((b & 15) << 2) | (c >> 6)];
g=base64tab[ c & 63 ];
p->cnt=0;
if (*buffer)
{
(*buffer)[*buflen]=d;
(*buffer)[*buflen+1]=e;
}
*buflen += 2;
if (p->clip < 2) /* Clip trailing junk, don't need it */
{
if (*buffer)
{
(*buffer)[*buflen]=f;
}
++*buflen;
}
if (p->clip < 1)
{
if (*buffer)
{
(*buffer)[*buflen]=g;
}
++*buflen;
}
}
}
static void encode_base64_u16(struct enc_base64_struct *p,
unicode_char uc,
char **buffer,
size_t *buflen)
{
encode_base64_char(p, (uc >> 8) & 255, buffer, buflen);
encode_base64_char(p, uc & 255, buffer, buflen);
}
static void encode_base64_u32(struct enc_base64_struct *p,
unicode_char uc,
char **buffer,
size_t *buflen)
{
if ((uc >= 0xD800 && uc <= 0xDFFF) /* Really illegal, but punt */
|| uc > 0xFFFFU)
{
encode_base64_u16(p, ((uc >> 10) & 0x3FF) | 0xD800,
buffer, buflen);
encode_base64_u16(p, (uc & 0x3FF) | 0xDC00,
buffer, buflen);
}
else
{
encode_base64_u16(p, uc, buffer, buflen);
}
}
#define LITERAL(c) ( (c) == '\n' || (c) == '\r' || (c) == '\t' || (c) == ' ' \
|| ( (c) >= 33 && c <= 125 && c != 92))
static char *fromu(const struct unicode_info *foo, const unicode_char *p,
int *err)
{
char *buffer=0;
size_t buflen=0;
int pass;
size_t i;
for (pass=0; pass<2; pass++)
{
if (pass)
{
if ((buffer=malloc(buflen+1)) == NULL)
return NULL;
}
buflen=0;
for (i=0; p[i]; i++)
{
struct enc_base64_struct eb;
if (p[i] == '+')
{
if (pass)
{
buffer[buflen]='+';
buffer[buflen+1]='-';
}
buflen += 2;
continue;
}
if (LITERAL(p[i]))
{
if (pass)
{
buffer[buflen]=(char)p[i];
}
++buflen;
continue;
}
if (pass)
buffer[buflen]='+';
++buflen;
eb.cnt=0;
eb.clip=0;
do
{
if (p[i] >= 0x10FFFF)
{
if (err)
{
*err=i;
errno=EINVAL;
return NULL;
}
encode_base64_u32(&eb, 0xFFFD,
&buffer, &buflen);
}
else
{
encode_base64_u32(&eb, p[i],
&buffer, &buflen);
}
++i;
} while ( p[i] && !LITERAL(p[i]));
switch (eb.cnt) {
case 2:
eb.clip=2;
break;
case 3:
eb.clip=1;
break;
}
while (eb.cnt)
{
encode_base64_char(&eb, 0, &buffer, &buflen);
}
if (!p[i])
{
if (pass)
{
buffer[buflen]='-';
}
++buflen;
break;
}
if (p[i] == '-')
{
if (pass)
{
buffer[buflen]='-';
}
++buflen;
}
--i;
}
if (pass)
buffer[buflen]=0;
}
return buffer;
}
/*
** UTF7.toupper/tolower/totitle is implemented by converting UTF8 to
** UCS-4, applying the unicode table lookup, then converting it back to
** UTF7
*/
static char *toupper_func(const struct unicode_info *u,
const char *cp, int *ip)
{
unicode_char *uc=tou(u, cp, ip), *p;
char *s;
if (!uc) return (0);
for (p=uc; *p; p++)
*p=unicode_uc(*p);
s=fromu(u, uc, NULL);
if (!s && ip)
*ip=0;
free(uc);
return (s);
}
static char *tolower_func(const struct unicode_info *u,
const char *cp, int *ip)
{
unicode_char *uc=tou(u, cp, ip), *p;
char *s;
if (!uc) return (0);
for (p=uc; *p; p++)
*p=unicode_lc(*p);
s=fromu(u, uc, NULL);
free(uc);
if (!s && ip)
*ip=0;
return (s);
}
static char *totitle_func(const struct unicode_info *u,
const char *cp, int *ip)
{
unicode_char *uc=tou(u, cp, ip), *p;
char *s;
if (!uc) return (0);
for (p=uc; *p; p++)
*p=unicode_tc(*p);
s=fromu(u, uc, NULL);
if (!s && ip)
*ip=0;
free(uc);
return (s);
}
const struct unicode_info unicode_UTF7 = {
"UTF-7",
UNICODE_UTF | UNICODE_MB |
UNICODE_HEADER_QUOPRI | UNICODE_BODY_QUOPRI,
tou,
fromu,
toupper_func,
tolower_func,
totitle_func};
#if 0
extern const struct unicode_info unicode_UTF8;
int main(int argc, char **argv)
{
char *a, *b, *c;
a=unicode_xconvert("A+ImIDkQ.", &unicode_UTF7,
&unicode_UTF8);
b=unicode_xconvert("Hi Mom -+Jjo--!", &unicode_UTF7,
&unicode_UTF8);
c=unicode_xconvert("+ZeVnLIqe-", &unicode_UTF7,
&unicode_UTF8);
printf("%s\n", a);
printf("%s\n", b);
printf("%s\n", c);
printf("%s\n", unicode_xconvert(a, &unicode_UTF8, &unicode_UTF7));
printf("%s\n", unicode_xconvert(b, &unicode_UTF8, &unicode_UTF7));
printf("%s\n", unicode_xconvert(c, &unicode_UTF8, &unicode_UTF7));
return 0;
}
#endif