/* ** Copyright (c) 2006 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** Routines for encoding and decoding text. */ #include "config.h" #include "encode.h" /* ** Make the given string safe for HTML by converting every "<" into "<", ** every ">" into ">" and every "&" into "&". Return a pointer ** to a new string obtained from malloc(). ** ** We also encode " as " and ' as ' so they can appear as an argument ** to markup. */ char *htmlize(const char *zIn, int n){ int c; int i = 0; int count = 0; char *zOut; if( n<0 ) n = strlen(zIn); while( i': count += 4; break; case '&': count += 5; break; case '"': count += 6; break; case '\'': count += 5; break; default: count++; break; } i++; } i = 0; zOut = fossil_malloc( count+1 ); while( n-->0 && (c = *zIn)!=0 ){ switch( c ){ case '<': zOut[i++] = '&'; zOut[i++] = 'l'; zOut[i++] = 't'; zOut[i++] = ';'; break; case '>': zOut[i++] = '&'; zOut[i++] = 'g'; zOut[i++] = 't'; zOut[i++] = ';'; break; case '&': zOut[i++] = '&'; zOut[i++] = 'a'; zOut[i++] = 'm'; zOut[i++] = 'p'; zOut[i++] = ';'; break; case '"': zOut[i++] = '&'; zOut[i++] = 'q'; zOut[i++] = 'u'; zOut[i++] = 'o'; zOut[i++] = 't'; zOut[i++] = ';'; break; case '\'': zOut[i++] = '&'; zOut[i++] = '#'; zOut[i++] = '3'; zOut[i++] = '9'; zOut[i++] = ';'; break; default: zOut[i++] = c; break; } zIn++; } zOut[i] = 0; return zOut; } /* ** Append HTML-escaped text to a Blob. */ void htmlize_to_blob(Blob *p, const char *zIn, int n){ int c, i, j; if( n<0 ) n = strlen(zIn); for(i=j=0; i': if( j0 && (c = *zIn)!=0 ){ if( IsSafeChar(c) ){ zOut[i++] = c; }else if( c==' ' ){ zOut[i++] = '+'; }else{ zOut[i++] = '%'; zOut[i++] = "0123456789ABCDEF"[(c>>4)&0xf]; zOut[i++] = "0123456789ABCDEF"[c&0xf]; } zIn++; } zOut[i] = 0; #undef IsSafeChar return zOut; } /* ** Convert the input string into a form that is suitable for use as ** a token in the HTTP protocol. Spaces are encoded as '+' and special ** characters are encoded as "%HH" where HH is a two-digit hexadecimal ** representation of the character. The "/" character is encoded ** as "%2F". */ char *httpize(const char *z, int n){ return EncodeHttp(z, n, 1); } /* ** Convert the input string into a form that is suitable for use as ** a token in the HTTP protocol. Spaces are encoded as '+' and special ** characters are encoded as "%HH" where HH is a two-digit hexidecimal ** representation of the character. The "/" character is not encoded ** by this routine. */ char *urlize(const char *z, int n){ return EncodeHttp(z, n, 0); } /* ** Convert a single HEX digit to an integer */ static int AsciiToHex(int c){ if( c>='a' && c<='f' ){ c += 10 - 'a'; }else if( c>='A' && c<='F' ){ c += 10 - 'A'; }else if( c>='0' && c<='9' ){ c -= '0'; }else{ c = 0; } return c; } /* ** Remove the HTTP encodings from a string. The conversion is done ** in-place. Return the length of the string after conversion. */ int dehttpize(char *z){ int i, j; /* Treat a null pointer as a zero-length string. */ if( !z ) return 0; i = j = 0; while( z[i] ){ switch( z[i] ){ case '%': if( z[i+1] && z[i+2] ){ z[j] = AsciiToHex(z[i+1]) << 4; z[j] |= AsciiToHex(z[i+2]); i += 2; } break; case '+': z[j] = ' '; break; default: z[j] = z[i]; break; } i++; j++; } z[j] = 0; return j; } /* ** The "fossilize" encoding is used in the headers of records ** (aka "content files") to escape special characters. The ** fossilize encoding passes most characters through unchanged. ** The changes are these: ** ** space -> \s ** tab -> \t ** newline -> \n ** cr -> \r ** formfeed -> \f ** vtab -> \v ** nul -> \0 ** \ -> \\ ** ** The fossilize() routine does an encoding of its input and ** returns a pointer to the encoding in space obtained from ** malloc. */ char *fossilize(const char *zIn, int nIn){ int n, i, j, c; char *zOut; if( nIn<0 ) nIn = strlen(zIn); for(i=n=0; i=0xc0 ){ c = utf8Trans1[c-0xc0]; while( (*(*pz) & 0xc0)==0x80 ){ c = (c<<6) + (0x3f & *((*pz)++)); } if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } } return c; } /* ** Encode a UTF8 string for JSON. All special characters are escaped. */ void blob_append_json_string(Blob *pBlob, const char *zStr){ const unsigned char *z; char *zOut; u32 c; int n, i, j; z = (const unsigned char*)zStr; n = 0; while( (c = fossil_utf8_read(&z))!=0 ){ if( c=='\\' || c=='"' ){ n += 2; }else if( c<' ' || c>=0x7f ){ if( c=='\n' || c=='\r' ){ n += 2; }else{ n += 6; } }else{ n++; } } i = blob_size(pBlob); blob_resize(pBlob, i+n); zOut = blob_buffer(pBlob); z = (const unsigned char*)zStr; while( (c = fossil_utf8_read(&z))!=0 ){ if( c=='\\' ){ zOut[i++] = '\\'; zOut[i++] = c; }else if( c<' ' || c>=0x7f ){ zOut[i++] = '\\'; if( c=='\n' ){ zOut[i++] = 'n'; }else if( c=='\r' ){ zOut[i++] = 'r'; }else{ zOut[i++] = 'u'; for(j=3; j>=0; j--){ zOut[i+j] = "0123456789abcdef"[c&0xf]; c >>= 4; } i += 4; } }else{ zOut[i++] = c; } } zOut[i] = 0; } /* ** The characters used for HTTP base64 encoding. */ static unsigned char zBase[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* ** Translate nData bytes of content from zData into ** ((nData+2)/3)*4) bytes of base64 encoded content and ** put the result in z64. Add a zero-terminator at the end. */ int translateBase64(const char *zData, int nData, char *z64){ int i, n; for(i=n=0; i+2>2) & 0x3f ]; z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ]; z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) | ((zData[i+2]>>6) & 0x03) ]; z64[n++] = zBase[ zData[i+2] & 0x3f ]; } if( i+1>2) & 0x3f ]; z64[n++] = zBase[ ((zData[i]<<4) & 0x30) | ((zData[i+1]>>4) & 0x0f) ]; z64[n++] = zBase[ ((zData[i+1]<<2) & 0x3c) ]; z64[n++] = '='; }else if( i>2) & 0x3f ]; z64[n++] = zBase[ ((zData[i]<<4) & 0x30) ]; z64[n++] = '='; z64[n++] = '='; } z64[n] = 0; return n; } /* ** Encode a string using a base-64 encoding. ** The encoding can be reversed using the decode64 function. ** ** Space to hold the result comes from malloc(). */ char *encode64(const char *zData, int nData){ char *z64; if( nData<=0 ){ nData = strlen(zData); } z64 = fossil_malloc( (nData*4)/3 + 8 ); translateBase64(zData, nData, z64); return z64; } /* ** COMMAND: test-encode64 ** ** Usage: %fossil test-encode64 STRING */ void test_encode64_cmd(void){ char *z; int i; for(i=2; i=0 ){ x[k++] = v; if( k==4 ){ zData[j++] = ((x[0]<<2) & 0xfc) | ((x[1]>>4) & 0x03); zData[j++] = ((x[1]<<4) & 0xf0) | ((x[2]>>2) & 0x0f); zData[j++] = ((x[2]<<6) & 0xc0) | (x[3] & 0x3f); k = 0; } } } if( k>=2 ){ zData[j++] = ((x[0]<<2) & 0xfc) | ((x[1]>>4) & 0x03); if( k==3 ){ zData[j++] = ((x[1]<<4) & 0xf0) | ((x[2]>>2) & 0x0f); } } zData[j] = 0; *pnByte = j; } /* ** This function treats its input as a base-64 string and returns the ** decoded value of that string. Characters of input that are not ** valid base-64 characters (such as spaces and newlines) are ignored. ** ** Space to hold the decoded string is obtained from malloc(). ** ** The number of bytes decoded is returned in *pnByte */ char *decode64(const char *z64, int *pnByte){ char *zData; int n64 = (int)strlen(z64); while( n64>0 && z64[n64-1]=='=' ) n64--; zData = fossil_malloc( (n64*3)/4 + 4 ); decodeBase64(z64, pnByte, zData); return zData; } /* ** COMMAND: test-decode64 ** ** Usage: %fossil test-decode64 STRING */ void test_decode64_cmd(void){ char *z; int i, n; for(i=2; i>4]; *(zOut++) = zEncode[pIn[i]&0xf]; } *zOut = 0; return 0; } /* ** An array for translating single base-16 characters into a value. ** Disallowed input characters have a value of 64. Upper and lower ** case is the same. */ static const char zDecode[] = { 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 64, 64, 64, 64, 64, 64, 64, 10, 11, 12, 13, 14, 15, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 10, 11, 12, 13, 14, 15, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, }; /* ** Decode a N-character base-16 number into base-256. N must be a ** multiple of 2. The output buffer must be at least N/2 characters ** in length */ int decode16(const unsigned char *zIn, unsigned char *pOut, int N){ int i, j; if( (N&1)!=0 ) return 1; for(i=j=0; i63 ){ return zIn[0]==0; } } return 1; } /* ** The input string is a base16 value. Convert it into its canonical ** form. This means that digits are all lower case and that conversions ** like "l"->"1" and "O"->"0" occur. */ void canonical16(char *z, int n){ while( *z && n-- ){ *z = zEncode[zDecode[(*z)&0x7f]&0x1f]; z++; } } /* ** Decode a string encoded using "quoted-printable". ** ** (1) "=" followed by two hex digits becomes a single ** byte specified by the two digits ** ** The decoding is done in-place. */ void decodeQuotedPrintable(char *z, int *pnByte){ int i, j, c; for(i=j=0; (c = z[i])!=0; i++){ if( c=='=' ){ if( z[i+1]!='\r' ){ decode16((unsigned char*)&z[i+1], (unsigned char*)&z[j], 2); j++; } i += 2; }else{ z[j++] = c; } } if( pnByte ) *pnByte = j; z[j] = 0; } /* Randomness used for XOR-ing by the obscure() and unobscure() routines */ static const unsigned char aObscurer[16] = { 0xa7, 0x21, 0x31, 0xe3, 0x2a, 0x50, 0x2c, 0x86, 0x4c, 0xa4, 0x52, 0x25, 0xff, 0x49, 0x35, 0x85 }; /* ** Obscure plain text so that it is not easily readable. ** ** This is used for storing sensitive information (such as passwords) in a ** way that prevents their exposure through idle browsing. This is not ** encryption. Anybody who really wants the password can still get it. ** ** The text is XOR-ed with a repeating pattern then converted to hex. ** Space to hold the returned string is obtained from malloc and should ** be freed by the caller. */ char *obscure(const char *zIn){ int n, i; unsigned char salt; char *zOut; if( zIn==0 ) return 0; n = strlen(zIn); zOut = fossil_malloc( n*2+3 ); sqlite3_randomness(1, &salt); zOut[n+1] = (char)salt; for(i=0; i %s (%s)\n", g.argv[i], z, z2); free(z); free(z2); z = unobscure(g.argv[i]); fossil_print("UNOBSCURE: %s -> %s\n", g.argv[i], z); free(z); } }