Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:dirkmueller:Factory
hcode
hdcode.c
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File hdcode.c of Package hcode
#ifndef lint static char rcsid[] = "$Id: hdcode.c,v 1.6 1997/11/19 04:16:52 news Exp news $"; #endif /* * $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ * CLEAN_QP Copyright Notice * * Most of the following CLEAN_QP codes are stealed from a package called * hmailer-beta2 written by Song Woo-Geel cookie@venus.etri.re.kr. * * I am not sure if I can distribute it or not. * However, The following is the original copyright statement. * *************************************************************** * Copyright (C) 1995 Song Woo-Geel * Written by cookie@venus.etri.re.kr on May. 12 '95. * *************************************************************** * * Now, the person who stealed cookie's code and his Copyright is * *************************************************************** * Copyright (C) 1997 Sang-yong Suh <sysuh@kigam.re.kr> * * THIS CODE IS PROVIDED AS IS AND WITHOUT ANY WARRANTY. * USE IT AT YOUR OWN RISK AND DON'T COMAPLAIN ME OR TO COOKIE. * * $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ */ /* * Revision history * * 1997/05/16 sysuh Check charset="iso-2022-kr" (note the quotes) * 1997/06/14 sysuh Replace charset=iso-8859-1 to EUC-KR * 1997/11/18 sysuh Decode embedded QP texts within multipart. */ /* * hMailDecode : decode Korean "Q" or "B" encoded HANGUL news article * * SYNOPSIS * 1. standalone usage: compile with -D_MAIN * hdcode [file] * * 2. as subroutine: * * hMailDecode(char *NULL, char *NULL); # initialize * while (fgets(ibuf, sizeof(ibuf), stdin)) * hMailDecode(char *ibuf, char *obuf); # pass one line at a time * * OPTIONS * * decode file and write to stdout. * [file] If file arg is missing, read stdin. * * NOTE: * Header encoding : RFC-1342 ( "Q" or "B" encoding ) * Content encoding : Quoted-printable or ISO-2022-KR or Base64. */ #ifndef CLEAN_QP void hMailDecode(ibuf, obuf) char *ibuf; char *obuf; { } #else /* CLEAN_QP */ #include <unistd.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include <assert.h> /* RFC-1342 header encoding start/end sequence */ #define PREFIX "=?" #define POSTFIX "?=" #define SUFFIXQP "?Q?" #define SUFFIXB64 "?B?" #define OLDPREFIX "=?B?EUC-KR?" #define HEADER_CTE "Content-Transfer-Encoding: " #define HEADER_CT "Content-Type: " #define KOR_CHARSET "EUC-KR" /* KSC-5601 */ /* ISO-2022 encoding designator escape sequence */ #define INTRO_ISO "\033$)C" #define SHIFTOUT '\016' /* ASCII SO */ #define SHIFTIN '\017' /* ASCII SI */ #define DEL '\177' /* ASCII DEL */ #define OFFSET ( unsigned char ) 0200 /* or ISO encoding offset */ #define LSIZ 4096 #define TRUE 1 #define FALSE 0 enum section_t { SEC_HEADER, SEC_BODY }; enum encode_t { ENC_UNKNOWN, ENC_NONE, ENC_QP, ENC_ISO, ENC_B64 }; /* recognize encoding name and convert to encode_t type */ static enum encode_t encodingInfo(arg) char *arg ; { if (!arg) return(ENC_UNKNOWN); while (isspace(*arg)) arg++; if (strncasecmp(arg, "7bit", 4) == 0 || strncasecmp(arg, "8bit", 4) == 0 || strncasecmp(arg, "none", 4) == 0) return(ENC_NONE); else if (strncasecmp(arg, "quoted-printable", 16) == 0) return(ENC_QP); else if (strncasecmp(arg, "base64", 6) == 0) return(ENC_B64); else return(ENC_UNKNOWN); } /* convert CR+LF in middle or tail to LF. Overwrite! */ static void uncanonize(iptr) char *iptr ; { char *optr = iptr ; /* overwrite input buffer */ static char oldch = '\0' ; while ( *iptr ) { if (( oldch=='\r' && (*iptr=='\n'||*iptr=='\r' )) || *iptr != '\r' ) *optr++ = *iptr ; oldch = *iptr++; } *optr = '\0' ; } /* Decode ISO-2022-kr coded line to KSC-5601 */ static void decodeISOLine (iptr, optr) char *iptr, *optr ; { int shifted = 0 ; /* Each line begins in unshifted state */ assert( iptr != NULL && optr != NULL ); while( *iptr ) { if ( *iptr == SHIFTOUT ) shifted = 1 ; else if ( *iptr == SHIFTIN ) shifted = 0 ; else if ( shifted && *iptr > ' ' && *iptr < DEL ) *optr++ = (char) ( (unsigned char) *iptr + OFFSET); else *optr++ = *iptr ; iptr++ ; } *optr = '\0'; assert( !shifted ); /* missing shift-in code ?? */ } /* * decodeB64Str() * Return value : actual decoded resulting string length. * limit is max length to decode, actual input may be shorter. * Input size may be not multiple of 3 or 4. returns right size. * * Adapted by from bq.c, Copyright (C) 1992 Ienup Sung. * @(#)bq.c: base64 encode/decode modules by is@ev.trigem.co.kr 1992.7.22 */ #define LS6B 00077L /* least significant 6 bits */ #define LS8B 00377L /* least significant 8 bits */ #define PAD '=' static char *b64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* Decode base64 encoded string */ static int decodeB64Str(ibuf, obuf, limit ) char ibuf[], obuf[]; int limit ; { register unsigned long bitbuf = 0 ; char *iptr = ibuf, *optr = obuf, *offs ; int valid = 0 , mod4 = 0 ; assert( ibuf !=NULL && obuf != NULL && limit >= 0 ) ; while ( limit > 0 ) { bitbuf = (bitbuf << 6) & ~ LS6B; if( (offs = strchr( b64_alphabet, *iptr )) != NULL ) { bitbuf |= (unsigned long) (offs - b64_alphabet) ; valid++ ; } else /* if *iptr is PAD or non b64 alphabet, ignore */ bitbuf &= ~LS6B; iptr++ ; if ( *iptr == '\0' || iptr - ibuf >= limit ) { /* align to 4 * 6 bit shifted postion */ while ( ++mod4 % 4 != 0 ) bitbuf = (bitbuf << 6) & ~ LS6B; limit = 0 ; /* exit root */ } else ++mod4 ; if ( mod4 % 4 == 0 ) { if (valid >= 2 ) *optr++ = (char)((bitbuf >> 16 ) & LS8B); if (valid >= 3 ) *optr++ = (char)((bitbuf >> 8 ) & LS8B); if (valid == 4 ) *optr++ = (char)((bitbuf) & LS8B); bitbuf = 0 ; valid = 0 ; } } *optr = '\0'; return(optr - obuf); } /* RFC1341 BASE64 BODY section decoding */ static void decodeB64Line(ibuf, obuf) char ibuf[], obuf[] ; { assert( ibuf != NULL && obuf != NULL ); decodeB64Str(ibuf, obuf, strlen(ibuf)-1) ; /* ignore '\n' */ } /* Convert two hexadecimal digit char to integer */ /* ex) h2toi('4', 'E') == 0x4e == 'N' */ /* If any char is not hexadecimal digit, return -1 */ static int h2toi( ch1, ch2 ) char ch1, ch2 ; { unsigned d1, d2 ; if ((ch1) >= '0' && (ch1) <= '9' ) d1 = ( ch1 - '0' ); else if ((ch1) >= 'A' && (ch1) <= 'F' ) d1 = ( 10 + ch1 - 'A' ); else if ((ch1) >= 'a' && (ch1) <= 'f' ) d1 = ( 10 + ch1 - 'a' ); else return(-1); if ((ch2) >= '0' && (ch2) <= '9' ) d2 = ( ch2 - '0' ); else if ((ch2) >= 'A' && (ch2) <= 'F' ) d2 = ( 10 + ch2 - 'A' ); else if ((ch2) >= 'a' && (ch2) <= 'f' ) d2 = ( 10 + ch2 - 'a' ); else return(-1); return ( (int) (((d1<<4) + d2 ) & LS8B)) ; } static void decodeQPStr(ibuf, obuf, limit) char ibuf[], obuf[] ; int limit ; { char *iptr = ibuf, *optr = obuf ; int ctmp ; assert( iptr != NULL && obuf != NULL && limit >= 0 ); while( *iptr && ( iptr - ibuf < limit ) ) { if ( *iptr == '=' && ( ctmp = h2toi( iptr[1], iptr[2])) >= 0 ){ *optr++ = (char) ctmp ; iptr += 3 ; } else if ( *iptr == '_' ) { /* translate */ *optr++ = ' ' ; iptr++ ; } else *optr++ = *iptr++ ; } *optr = '\0' ; } /* do RFC1341 QP BODY section decoding */ static void decodeQPLine(iptr, obuf) char *iptr, obuf[] ; { char *optr = obuf ; int ctmp ; /* h2toi() returns -1 on non-hexa digit */ assert( iptr != NULL && obuf != NULL ); while( *iptr ) { if ( *iptr == '=' && ( ctmp = h2toi( iptr[1], iptr[2])) >= 0 ){ *optr++ = (char) ctmp ; iptr += 3 ; } else if ( *iptr == '=' && ( iptr[1] == '\n' )) iptr += 2 ; /* skip soft line break */ else *optr++ = *iptr++; } *optr = '\0' ; } /* Cut off trailing blanks or CR-LF to LF */ static void fixTrailer(ibuf) char ibuf[] ; { char *end = ibuf + strlen(ibuf)-1 ; /* line break */ assert( ibuf != NULL ) ; if ( *end == '\n' && end >= ibuf ) end-- ; if ( *end == '\r' && end >= ibuf ) end-- ; while (( *end == ' ' || *end == '\t' ) && end >= ibuf ) end-- ; if ( *++end != '\n' ) strcpy( end, "\n") ; } /* similar to strncpy(), but "to" string is always terminated */ static void strncpyz(to, from, len) char *to, *from ; int len ; { assert( to != NULL && from != NULL && len >= 0 ) ; while( len-- > 0 && *from ) *to++ = *from++; *to = '\0'; } /* Decode header section by RFC1342 MIME "Q" or "B" header encoding rule */ static enum encode_t decodeHeader(iptr, optr, HeadEncoding) char *iptr, *optr ; enum encode_t HeadEncoding; { char *preptr, *sufptr, *txtptr, *postptr ; assert( iptr != NULL && optr != NULL ); while(*iptr ) { if ( ( preptr = strstr( iptr, PREFIX )) == NULL || ( sufptr = strchr( preptr+strlen(PREFIX), '?' )) == NULL /* misssing POSTFIX, do not decode */ || ( postptr = strstr( sufptr+strlen(SUFFIXQP), POSTFIX ))==NULL ) { strcpy(optr, iptr ) ; return HeadEncoding; } txtptr = sufptr+strlen(SUFFIXQP) ; /* (header) =?EUC-KR?Q?=89=AB=CD=EF?= */ /* (ptr's) ^pre ^sf^txt ^post */ strncpy( optr, iptr, preptr-iptr ) ; optr += preptr - iptr ; /* watch out order */ iptr = preptr ; if ( strncasecmp( sufptr, SUFFIXQP, strlen(SUFFIXQP) ) == 0 ) { decodeQPStr( txtptr , optr, postptr-txtptr ) ; HeadEncoding = ENC_QP ; } else if ( strncasecmp( sufptr, SUFFIXB64, strlen(SUFFIXB64) ) == 0 ) { decodeB64Str( txtptr , optr, postptr-txtptr ) ; HeadEncoding = ENC_B64 ; } /* For compatibility with old (before Dec. 94) elm2.3h or hcode2.0 */ else if ( strncasecmp( preptr, OLDPREFIX, strlen(OLDPREFIX)) == 0 ) { txtptr = preptr+strlen(OLDPREFIX) ; decodeB64Str( txtptr , optr, postptr-txtptr ) ; HeadEncoding = ENC_B64 ; } else { /* Unknown coding, do not decode */ strncpyz( optr, iptr, postptr+ strlen(POSTFIX)-iptr ); } optr += strlen(optr) ; iptr = postptr + strlen(POSTFIX) ; *optr = '\0'; } return HeadEncoding; } static int replace_charset(ibuf) char *ibuf; { char *p, *q; if ((p = strstr(ibuf, "charset="))) { /* replace with "euc-kr" */ q = p + 8; if (*q == '"') q++; if (strncasecmp(q, "iso-2022-kr", 11) == 0 || strncasecmp(q, "iso-8859-1", 10) == 0) strcpy(p+8, "EUC-KR\n"); return 1; /* charset detected */ } return 0; } /*static char *strcasestr(buf, str) char *buf; char *str; { int i, n; int lenstr = strlen(str); n = strlen(buf) - lenstr + 1; for (i=0; i<n; i++,buf++) if (strncasecmp(buf, str, lenstr) == 0) return buf; return NULL; }*/ static char *get_mpb_string(ibuf) char *ibuf; { char *p, *bstr=NULL; int len; if ((p = strcasestr(ibuf, "boundary=\""))) { /* mpString */ p += 10; len = strlen(p); if (len > 10) { /* minimum length of the boundary string */ bstr = strdup(p); p = bstr + len; while (*--p == '\n' || *p == '"' || *p == ';') *p = '\0'; } } return bstr; } static int isContentTypeText(cstr) char *cstr; { return (strcasestr(cstr, "text") != NULL); } static int isUueHeader(str) char *str; { int i, n; while (isspace(*str)) str++; if (strncasecmp(str, "begin ", 6) != 0) return 0; /* check three digits and a <blank>. */ str += 6; n = strlen(str); if (n < 5) /* 3 digits, 1 blank, and 1 filename */ return 0; for (i=0; i<3; i++) if (!isdigit(*str++)) return 0; while (isspace(*str)) /* skip blanks */ str++; if (*str) return 1; return 0; } static enum section_t hMailDecode(ibuf, obuf) char *ibuf; char *obuf; { char *p, *q; static enum section_t section = SEC_HEADER ; static enum encode_t HeadEncoding = ENC_UNKNOWN; static enum encode_t Encoding = ENC_UNKNOWN; static int isoMode = 0 ; static int isPendingCT = 0; /* previous CT header is incomplete */ static char *mpBoundary = NULL; /* multipart boundary text holder */ /* initialize */ if (!ibuf || !*ibuf) { section = SEC_HEADER; HeadEncoding = ENC_UNKNOWN; Encoding = ENC_UNKNOWN; isoMode = 0; isPendingCT = 0; if (mpBoundary) free(mpBoundary); mpBoundary = NULL; return section; } fixTrailer(ibuf); if (section == SEC_HEADER && *ibuf == '\n') { section = SEC_BODY; strcpy(obuf, "\n"); return section; } if (section == SEC_HEADER) { if (isPendingCT) { if (!isspace(*ibuf)) isPendingCT = 0; else if (!replace_charset(ibuf) && !mpBoundary && (mpBoundary = get_mpb_string(ibuf))) { strcpy(obuf, ibuf); uncanonize(obuf); return section; } } /* Content-Type must be checked before Content-Transfer-Encoding */ if (strncasecmp(ibuf, HEADER_CT, strlen(HEADER_CT)) == 0) { p = ibuf + strlen(HEADER_CT); if ((q = strcasestr(p, "multipart"))) { if ((mpBoundary = get_mpb_string(q+9)) == 0) isPendingCT = 1; /* continued header */ } else if (!isContentTypeText(p)) { Encoding = ENC_NONE; } else if (!replace_charset(p)) isPendingCT = 1; strcpy(obuf, ibuf); /* Encoding = ENC_ISO and iso-8859-1 */ } else if (Encoding != ENC_NONE && strncasecmp(ibuf, HEADER_CTE, strlen(HEADER_CTE)) == 0) { Encoding = encodingInfo(ibuf+strlen(HEADER_CTE)) ; if (Encoding == ENC_QP || Encoding == ENC_B64) sprintf(obuf, "%s%s\n", HEADER_CTE, "8bit") ; else strcpy(obuf, ibuf); } else if (strstr(ibuf, PREFIX)) HeadEncoding = decodeHeader(ibuf, obuf, HeadEncoding); else strcpy(obuf, ibuf); } else if (section == SEC_BODY) { if (mpBoundary && strstr(ibuf, mpBoundary)) { section = SEC_HEADER; HeadEncoding = ENC_UNKNOWN; Encoding = ENC_UNKNOWN; isoMode = 0; strcpy(obuf, ibuf); } else if (Encoding == ENC_NONE) strcpy(obuf, ibuf); else if (isUueHeader(ibuf)) { Encoding = ENC_NONE; strcpy(obuf, ibuf); } else if (Encoding == ENC_QP) decodeQPLine(ibuf, obuf); else if (Encoding == ENC_B64) decodeB64Line(ibuf, obuf) ; else if ((p = strstr(ibuf, INTRO_ISO))) { /* remove ISO intro sequnce from ibuf */ strcpy( p, p + strlen(INTRO_ISO)); decodeISOLine(ibuf, obuf); isoMode = 1 ; } /* * If headers are "B" encoded AND content line has SO char without prioior * ISO introducer, we assume missing introducer. It's feature. */ else if ((isoMode || HeadEncoding == ENC_B64) && strchr(ibuf, SHIFTOUT) != NULL ) decodeISOLine(ibuf, obuf ); else strcpy(obuf, ibuf); } uncanonize(obuf); return section; } /* ** Is it posted on a Han newsgroups? */ int IsHanNewsgroups(char *line) { char *p; for (p=line; p; p++) { while (isspace(*p)) p++; if (strncmp(p, "han.", 4) == 0 && strncmp(p, "han.test", 8) != 0) return TRUE; p = strchr(p, ','); if (p == NULL) break; } return FALSE; } /* ** Retrun CleanQP filtered article. NULL indicates ERROR. */ char * hNewsCleanQP(char *article, int checkNewsgroups) { char *newart; size_t newlen; int used; char *p, *q, *next; char hold; char *orig = NULL; enum section_t section; newlen = strlen(article) + LSIZ; newart = malloc(newlen); if (newart == NULL) { fprintf(stderr, "hNewsDecode: can't malloc %d bytes\n", newlen); return NULL; } section = hMailDecode((char *)NULL, (char *)NULL); next = article; used = 0; for (p=next; next; p=next, *p=hold) { /* ** Make the input line. Remember the char of start of next line. */ next = strchr(p, '\n'); if (next) { hold = *++next; *next = '\0'; } /* ** Check Newsgroups line. */ if (checkNewsgroups && strncasecmp(p, "newsgroups:", 11) == 0) { checkNewsgroups = FALSE; if (!IsHanNewsgroups(p+11)) { free(newart); if (next) *next = hold; return NULL; } } /* ** Allocate output space */ if (newlen - used < LSIZ + LSIZ) { newlen += LSIZ + LSIZ; newart = realloc(newart, newlen); if (newart == NULL) { fprintf(stderr, "hNewsDecode: can't realloc %d bytes\n", newlen); if (next) *next = hold; /* recover old char */ return NULL; } } /* ** Apply filter, and set up next output pointer. */ q = newart + used; strcpy(q, p); section = hMailDecode(q, q); if (section == SEC_HEADER) { if (orig == NULL && !isspace(*p) && strcmp(p, q)) orig = p; if (orig && (!isspace(hold) || hold == '\n')) { used += strlen(q); sprintf(newart+used, "X-Orig-%s", orig); orig = NULL; } } used += strlen(newart+used); if (next == NULL) break; } return newart; } #ifdef _MAIN int main(argc, argv) int argc; char **argv; { char *article, *newart; char line[LSIZ]; int len; int used = 0; size_t artlen = 0; int checkNewsgroups = FALSE; int i; char *infile = NULL; for (i=1; i<argc; i++) if (strcmp(argv[i], "-n") == 0) checkNewsgroups = TRUE; else if (*argv[i] != '-' && infile == NULL) infile = argv[i]; else { fprintf(stderr, "usage: %s [-n] [file]\n", argv[0]); exit(1); } if (infile && !freopen(infile, "r", stdin)) { fprintf(stderr, "can't open input %s\n", infile); exit(1); } artlen = 100 * LSIZ; article = malloc(artlen); if (article == NULL) { fprintf(stderr, "can't malloc %d bytes\n", artlen); exit(1); } while (fgets(line, sizeof(line), stdin)) { len = strlen(line); if (artlen <= used + len) { artlen += LSIZ * 10; article = realloc(article, artlen); if (article == NULL) { fprintf(stderr, "can't realloc %d bytes\n", artlen); exit(1); } } strncpy(article+used, line, len); used += len; } *(article + used) = '\0'; newart = hNewsCleanQP(article, checkNewsgroups); if (newart) { free(article); article = newart; } while (*article) { putchar(*article++); } return 0; } #endif /* _MAIN */ #endif /* CLEAN_QP */
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor