#include	<stdio.h>
#include	<stdlib.h>
#include	<string.h>
#include	<ctype.h>
#include	<locale.h>
#include	<assert.h>
#include	"encutil.h"
#include	"encutil2.h"

/**
 *	t@CƏdȂ镶ϊB<br>
 *	ϊ@IE 6.0 for Windwos XP SP2<br>
 *	str𒼐ڏB<br>
 *	
 *	@param str ϊ镶(MB)
 *	
 */
void convFilename(unsigned char *str)
{
	while(*str){
		switch (*str){
			case '/':
				*str = '-';
				str++;
			break;
			case '\\':
			case '*':
			case '?':
			case '\"':
			case '<':
			case '>':
			case '|':
			case ':':
//				*str = '_';	// IE 5.5SP2͊mƎvB
//				*str = '-';	// IE 6.0SP1͂ϊĂB

				// IE 6.0SP2̓t@CɎgȂ폜Ă̂
				// t@CɎgȂ폜B
				unsigned char *p2;
				p2 = str;
				while(*p2){
					*p2 = *(p2 + 1);
					p2++;
				}
				// str͂̂܂܂Ńt@CɎgȂꍇɔB
				// strt@CɎgȂwȂȂ܂strCNgȂB

			break;
			default:
				// Rg[R[h̓Xy[XɕϊB
				if (*str < 0x20){
					*str = ' ';
				}
				// t@CɎg镶̏ꍇ͎̕ɈڂB
				if (isleadbyte(*str)){
					str++;
				}
				if (*str){
					str++;
				}
			break;
		}
	}
}

/**
 *	t@CƏdȂ镶ϊB<br>
 *	ϊ@IE 6.0 for Windwos XP SP2<br>
 *	str𒼐ڏB<br>
 *	
 *	@param str ϊ镶(Unicode)
 *	
 */
void convFilenameW(wchar_t *str)
{
	while(*str){
		switch (*str){
			case L'/':
				*str = L'-';
				str++;
			break;
			case L'\\':
			case L'*':
			case L'?':
			case L'\"':
			case L'<':
			case L'>':
			case L'|':
			case L':':
//					*str = L'_';	// IE 5.5SP2͊mƎvB
//				*str = L'-';	// IE 6.0SP1͂ϊĂB
				// IE 6.0SP2̓t@CɎgȂ폜Ă̂
				// t@CɎgȂ폜B
				wchar_t *p2;
				p2 = str;
				while(*p2){
					*p2 = *(p2 + 1);
					p2++;
				}
				// str͂̂܂܂ŕt@CɎgȂꍇɔB
				// strt@CɎgȂwȂȂ܂strCNgȂB
			break;
			default:
				// Rg[R[h̓Xy[XɕϊB
				if (*str < 0x20){
					*str = L' ';
				}
				// t@CɎg镶̏ꍇ͎̕ɈڂB
				str++;
			break;
		}
	}
}


/* QƂ̕ϊ}bsO */
static const char *chMap[2][5] = {
	{
		"&",
		" ",
		"_",
		"_",
		"_"
	},{
		"&",	// &amp; and unknown
		" ",	// &nbsp;
		"h",	// &quot;
		"<",	// &lt;
		">"	// &gt;
	}
};

/**
 * l`̕QƂWJB(}`oCg)
 *
 *	@param dst ϊ̕(MB)(srcƓΉ)
 *	@param src ϊO̕(MB)
 *	@param dstlen ϊ̒̕
 *	@param ϊ̂AϊΏۂƂȂ̒
 *	
 */
void convNumRef(char *dst,char *src,int *dstLen,int *srcLen)
{
	int origLen;	// ̕̕ϊ
	unsigned long uc;
	
	origLen = 0;
	uc = 0;
	
	if ((*src == 'x') || (*src == 'X')){
		// 16i`
		origLen++;
		src++;

		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if (!isxdigit(*src)){
				break;
			}
			if (isdigit(*src)){
				uc = (uc << 4) + (*src - '0');
			}else if (isupper(*src)){
				uc = (uc << 4) + (*src - 'A' + 10);
			}else{
				// ͏Ȃ
				uc = (uc << 4) + (*src - 'a' + 10);
			}
			
			origLen++;
			src++;
		}
		
	}else{
		// 10i`
		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if (!isdigit(*src)){
				break;
			}
			uc = uc * 10 + (*src - '0');
			origLen++;
			src++;
		}
	}

	*srcLen = origLen;
	if (uc == 0){
		*dstLen = 0;
		return;
	}

	int result = wctomb(dst,(wchar_t)uc);
	if (result == -1){
		// ϊłȂ
		*dst = '_';
		*dstLen = 1;
		return;
	}else{
		*dstLen = result;
	}

}

/**
 * QƂ̕ϊ[`(蔲)
 * dst̎wɕϊ̕񂪊i[B
 * ϊɓăt@CɎgpłȂ̈ꕔ
 * SpŒuB({̂)
 *
 * @param dst ϊ̕(MB)(srcƓΉ)
 * @param src ϊO̕(MB)
 * @param lang 0:p 1:{
 */
void convChRef(char *dst,char *src,char lang)
{
	int i,mp,chlen;
	int srcConv,dstConv;
	
	while(*src){
		if (*src == '&'){
			if (*(src + 1) == '#'){
				// l`̕Q
				convNumRef(dst,src + 2,&dstConv,&srcConv);
				dst += dstConv;
				src = src + 1 + srcConv;
			}else{
				if (!strncmp(src,"&amp;",5)){
					mp = 0;
					src += 4;
				}else if (!strncmp(src,"&nbsp;",6)){
					mp = 1;
					src += 5;
				}else if (!strncmp(src,"&quot;",6)){
					mp = 2;
					src += 5;
				}else if (!strncmp(src,"&lt;",4)){
					mp = 3;
					src += 3;
				}else if (!strncmp(src,"&gt;",4)){
					mp = 4;
					src += 3;
				}else{
					mp = 0;
				}
				chlen = strlen(chMap[lang][mp]);
				for (i = 0;i < chlen;i++){
					*dst = chMap[lang][mp][i];
					dst++;
				}
			}
		}else{
			*dst = *src;
			dst++;
		}
		src++;
	}
	*dst = '\0';
}

/**
 * l`̕QƂWJB(UTF-8)
 *
 *	@param dst ϊ̕(UTF-8)(srcƓΉ)
 *	@param src ϊO̕(UTF-8)
 *	@param dstlen ϊ̒̕
 *	@param ϊ̂AϊΏۂƂȂ̒
 *	
 */
void convNumRefUTF8(char *dst,char *src,int *dstLen,int *srcLen)
{
	int origLen;	// ̕̕ϊ
	unsigned long uc;
	
	origLen = 0;
	uc = 0;
	
	if ((*src == 'x') || (*src == 'X')){
		// 16i`
		origLen++;
		src++;

		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if (!isxdigit(*src)){
				break;
			}
			if (isdigit(*src)){
				uc = (uc << 4) + (*src - '0');
			}else if (isupper(*src)){
				uc = (uc << 4) + (*src - 'A' + 10);
			}else{
				// ͏Ȃ
				uc = (uc << 4) + (*src - 'a' + 10);
			}
			
			origLen++;
			src++;
		}
		
	}else{
		// 10i`
		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if (!isdigit(*src)){
				break;
			}
			uc = uc * 10 + (*src - '0');
			origLen++;
			src++;
		}
	}

	*srcLen = origLen;
	if (uc == 0){
		*dstLen = 0;
		return;
	}

	// QƂUTF-8ɕϊB
	wchar_t buf[2];
	buf[0] = (wchar_t)uc;
	buf[1] = L'\0';

	int result = countUTF16LEtoUTF8str(buf);
	*dstLen = result;
	toUTF8( (unsigned char *)dst , buf);

}

/**
 * QƂ̕ϊ[`(蔲)
 * dst̎wɕϊ̕񂪊i[B
 *
 * @param dst ϊ̕(UTF-8)(srcƓΉ)
 * @param src ϊO̕(UTF-8)
 * @param lang 0:p 1:{
 */
void convChRefUTF8(char *dst,char *src,char lang)
{
	int srcConv,dstConv;
	
	while(*src){
		if (*src == '&'){
			if (*(src + 1) == '#'){
				// l`̕Q
				convNumRefUTF8(dst,src + 2,&dstConv,&srcConv);
				dst += dstConv;
				src = src + 1 + srcConv;
			}else{
				if (!strncmp(src,"&amp;",5)){
					*dst = '&';
					src += 4;
				}else if (!strncmp(src,"&nbsp;",6)){
					*dst = ' ';
					src += 5;
				}else if (!strncmp(src,"&quot;",6)){
					*dst = '\"';
					src += 5;
				}else if (!strncmp(src,"&lt;",4)){
					*dst = '<';
					src += 3;
				}else if (!strncmp(src,"&gt;",4)){
					*dst = '>';
					src += 3;
				}else{
					*dst = '&';
				}
				dst++;
			}
		}else{
			*dst = *src;
			dst++;
		}
		src++;
	}
	*dst = '\0';
}

/* QƂ̕ϊ}bsO */
static const wchar_t chMapW[5][5] = {
		L"&",	// &amp; and unknown
		L" ",	// &nbsp;
		L"h",	// &quot;
		L"<",	// &lt;
		L">"	// &gt;
};

/**
 * l`̕QƂWJB(UTF-8)
 *
 *	@param dst ϊ̕(Unicode)(srcƓΉ)
 *	@param src ϊO̕(Unicode)
 *	@param dstlen ϊ̒̕
 *	@param ϊ̂AϊΏۂƂȂ̒
 *	
 */
void convNumRefW(wchar_t *dst,wchar_t *src,int *dstLen,int *srcLen)
{
	int origLen;	// ̕̕ϊ
	unsigned long uc;
	
	origLen = 0;
	uc = 0;
	
	if ((*src == L'x') || (*src == L'X')){
		// 16i`
		origLen++;
		src++;

		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if ((*src >= L'0') && (*src <= L'9')){
				uc = (uc << 4) + (*src - L'0');
			}else if ((*src >= L'A') && (*src <= L'F')){
				uc = (uc << 4) + (*src - L'A' + 10);
			}else if ((*src >= L'a') && (*src <= L'f')){
				// ͏Ȃ
				uc = (uc << 4) + (*src - L'a' + 10);
			}else{
				break;
			}
			
			origLen++;
			src++;
		}
		
	}else{
		// 10i`
		while(1){
			if (*src == ';'){
				origLen++;
				break;
			}
			if (!isdigit(*src)){
				break;
			}
			uc = uc * 10 + (*src - L'0');
			origLen++;
			src++;
		}
	}

	*srcLen = origLen;
	if (uc == 0){
		*dstLen = 0;
	}else{
		*dst = (wchar_t)uc;
		*dstLen = 1;
	}

}

/**
 * QƂ̕ϊ[`(蔲)
 * dst̎wɕϊ̕񂪊i[B
 * ϊɓăt@CɎgpłȂ̈ꕔ
 * SpŒuB({̂)
 *
 * @param dst ϊ̕(Unicode)(srcƓΉ)
 * @param src ϊO̕(Unicode)
 * @param lang 0:p 1:{
 */
void convChRefW(wchar_t *dst,wchar_t *src)
{
	int i,mp,chlen;
	int srcConv,dstConv;
	
	while(*src){
		if (*src == L'&'){
			if (*(src + 1) == L'#'){
				// l`̕Q
				convNumRefW(dst,src + 2,&dstConv,&srcConv);
				dst += dstConv;
				src = src + 1 + srcConv;
			}else{
				if (!wcsncmp(src,L"&amp;",5)){
					mp = 0;
					src += 4;
				}else if (!wcsncmp(src,L"&nbsp;",6)){
					mp = 1;
					src += 5;
				}else if (!wcsncmp(src,L"&quot;",6)){
					mp = 2;
					src += 5;
				}else if (!wcsncmp(src,L"&lt;",4)){
					mp = 3;
					src += 3;
				}else if (!wcsncmp(src,L"&gt;",4)){
					mp = 4;
					src += 3;
				}else{
					mp = 0;
				}
				chlen = wcslen(chMapW[mp]);
				for (i = 0;i < chlen;i++){
					*dst = chMapW[mp][i];
					dst++;
				}
			}
		}else{
			*dst = *src;
			dst++;
		}
		src++;
	}
	*dst = L'\0';
}

/**
 * URI̔\񕶎ǂׂB<br>
 * (AAuEUURLL^`ɏ]B)
 *
 * @param c s
 * @return 0:\񕶎͔ASCII 1:\񕶎
 */
int isUrlNoreserved(int c)
{
	if (c & 0x80) {
		return 0;
	}
	if ((c & 0x7f) <= 0x21) {
		return 0;
	}

	if (isalpha(c)) {
		return 1;
	}
	if (isdigit(c)) {
		return 1;
	}
	switch (c) {
		case '-':
		case '.':
		case '_':
		case '~':
		case ':':	/* ̓uEUURLŃGXP[vȂ */
		case '/':
		case ',':
		case '(':
		case ')':
		case '!':
		case '$':
		case '&':
		case '\'':
		case '=':
			return 1;
		default:
			return 0;
	}
	return 0;
}

/**
 * URLGR[hsB<br>
 * \񕶎ǂisUrlNoreserved֐ɏ]B
 *
 * @param escapedUrl URLGR[hꂽ̊i[
 * @param srcUrl GR[hsURL
 * @return 0
 */
int encodeURL(char *escapedUrl,const char *srcUrl)
{
	int escaped;
	char c;

	escaped = 0;
	while (*srcUrl) {
		/* Mozilla̓Xy[X+ɒuȂB
		if (*srcUrl == 0x20) {
			*escapedUrl = '+';
			escapedUrl++;
			escaped++;
		}
		*/
		if (isUrlNoreserved(*srcUrl)) {
			*escapedUrl = *srcUrl;
			escapedUrl++;
			escaped++;
		} else {
			// \񕶎EASCII%02x\LɒB
			c = *srcUrl;
			//printf("%%%02x\n",(int)c);
			sprintf(escapedUrl,"%%%02X",((int)c & 0xff));
			escapedUrl += 3;
			escaped += 3;
		}
		srcUrl++;
	}
	*escapedUrl = '\0';
	return 0;
}

/**
 * URLGR[hWJB
 *
 * @param dest	ϊMozillaURL\L
 * @param src	ϊIEURL\L
 * @return ϊ̕
 */
int decodeURL(char *dest,char *src)
{
	int charCount = 0;

	while(*src) {
		if (*src != '%') {
			if (dest != NULL) {
				*dest = *src;
				dest++;
			}
			/* Xy[X+ɒuȂȂ̂łI~bg
			if (*src == '+') {
				if (dest != NULL) {
					*dest = 0x20;
					dest++;
				}
			} else {
				if (dest != NULL) {
					*dest = *src;
					dest++;
				}
			}
			*/
			charCount++;
			src++;
		} else {
			int i;
			int isBad = 0;
			int c,newC;

			for (i = 0; i < 2;i++) {
				c = *(src + i + 1);
				if (c == '\0') {
					isBad = 1;
					break;
				}
				if (!isxdigit(c)) {
					isBad = 1;
					break;
				}
			}
			if (isBad) {
				if (dest != NULL) {
					*dest = *src;
					dest++;
				}
				src++;
				charCount++;
			} else {
				src++;
				newC = 0;
				for (i = 0; i < 2;i++) {
					c = *src;
					if (isdigit(c)) {
						newC = (newC << 4) + (c - '0');
					}else{
						c = toupper(c);
						newC = (newC << 4) + (c - 'A' + 10);
					}
					src++;
				}
				if (dest != NULL) {
					*dest = newC;
					dest++;
				}
				charCount++;
			}
		}
	}
	if (dest != NULL) {
		*dest = '\0';
	}
	return charCount;
}

/**
 * ASCIÎURLGR[hsB<br>
 * ͂łɃGR[hς݂ɂAGR[hȂB
 *
 * @param escapedUrl URLGR[hꂽ̊i[
 * @param srcUrl GR[hsURL
 * @return 0
 */
int encodeNonAsciiURL(char *escapedUrl,const char *srcUrl)
{
	int escaped;
	char c;

	escaped = 0;
	while (*srcUrl) {
		if (*srcUrl == 0x20) {
			*escapedUrl = '+';
			escapedUrl++;
			escaped++;
		} else if (isascii(*srcUrl)) {
			*escapedUrl = *srcUrl;
			escapedUrl++;
			escaped++;
		} else {
			// ASCII%02x\LɒB
			c = *srcUrl;
			//printf("%%%02x\n",(int)c);
			sprintf(escapedUrl,"%%%02X",((int)c & 0xff));
			escapedUrl += 3;
			escaped += 3;
		}
		srcUrl++;
	}
	*escapedUrl = '\0';
	return 0;
}

/**
 * _uNH[ĝURLGR[hsB<br>
 * ̓GR[hȂB<br>
 * Firefox 2/ChromẽubN}[Nbgp
 *
 * @param escapedUrl URLGR[hꂽ̊i[
 * @param srcUrl GR[hsURL
 * @return 0
 */
int encodeOnlyDoubleQuote(char *escapedUrl,const char *srcUrl)
{
	int escaped;
	char c;

	assert(srcUrl != NULL);

	escaped = 0;
	while (*srcUrl) {
		if (*srcUrl == 0x22) {
			// ASCII%02x\LɒB
			c = *srcUrl;
			//printf("%%%02x\n",(int)c);
			sprintf(escapedUrl,"%%%02X",((int)c & 0xff));
			escapedUrl += 3;
			escaped += 3;
		} else {
			*escapedUrl = *srcUrl;
			escapedUrl++;
			escaped++;
		}
		srcUrl++;
	}
	*escapedUrl = '\0';
	return 0;
}

/**
 * _uNH[gURLGR[hWJB<br>
 * Firefox 2/Chromep
 *
 * @param dest	ϊURL\L
 * @param src	ϊURL
 * @return ϊ̕
 */
int decodeOnlyDoubleQuote(char *dest,char *src)
{
	int charCount = 0;
	char *p;

	while(*src) {
		if (*src != '%') {
			if (dest != NULL) {
				*dest = *src;
				dest++;
			}
			/* Xy[X+ɒuȂȂ̂łI~bg
			if (*src == '+') {
				if (dest != NULL) {
					*dest = 0x20;
					dest++;
				}
			} else {
				if (dest != NULL) {
					*dest = *src;
					dest++;
				}
			}
			*/
			charCount++;
			src++;
		} else {
			int i;
			int isBad = 0;
			int c,newC;

			for (i = 0; i < 2;i++) {
				c = *(src + i + 1);
				if (c == '\0') {
					isBad = 1;
					break;
				}
				if (!isxdigit(c)) {
					isBad = 1;
					break;
				}
			}
			if (isBad) {
				if (dest != NULL) {
					*dest = *src;
					dest++;
				}
				src++;
				charCount++;
			} else {
				/* ܂AǂݎB */
				newC = 0;
				p = src;
				p++;
				for (i = 0; i < 2;i++) {
					c = *p;
					if (isdigit(c)) {
						newC = (newC << 4) + (c - '0');
					}else{
						c = toupper(c);
						newC = (newC << 4) + (c - 'A' + 10);
					}
					p++;
				}
				if (newC == 0x22) {
					/* _uNH[gϊB */
					if (dest != NULL) {
						*dest = newC;
						dest++;
					}
					src += 3;
				} else {
					/* łȂꍇ͂̂܂܏o */
					if (dest != NULL) {
						*dest = *src;
						dest++;
					}
					src++;
				}
				charCount++;
			}
		}
	}
	if (dest != NULL) {
		*dest = '\0';
	}
	return charCount;
}

