Fossil

Check-in [82ce90f9]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Experimental fix for issue [d17d6e5b17].

Should have a LOT more testing before merging it to trunk, because it is dangerous!

The method used is as described at:
http://cygwin.com/cygwin-ug-net/using-specialnames.html The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF

Feedback welcome.

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | ticket-d17d6e5b17
Files: files | file ages | folders
SHA1:82ce90f91c6e50ab624cc08e49c7e2195dc2f0ea
User & Date: jan.nijtmans 2012-11-20 13:46:26
References
2012-11-20
15:22 Ticket [d17d6e5b] Handle file names containing brackets, interrogation mark or asterisk status still Open with 1 other change artifact: 0acc624f user: jan.nijtmans
13:47 Ticket [d17d6e5b]: 1 change artifact: 7162bc8b user: jan.nijtmans
Context
2012-11-21
09:12
Split off in separate functions

Still experimental, but starts looking better

check-in: b59dc078 user: jan.nijtmans tags: ticket-d17d6e5b17
2012-11-20
13:46
Experimental fix for issue [d17d6e5b17].

Should have a LOT more testing before merging it to trunk, because it is dangerous!

The method used is as described at:
http://cygwin.com/cygwin-ug-net/using-specialnames.html The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF

Feedback welcome.

check-in: 82ce90f9 user: jan.nijtmans tags: ticket-d17d6e5b17
08:21
If applicable, use the OPEN_ANY_SCHEMA flag in Th_FossilInit. check-in: 3c1ad1de user: mistachkin tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/file.c.

479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
....
1087
1088
1089
1090
1091
1092
1093









1094
1095
1096









1097
1098
1099
1100
1101
1102
1103
1104
1105
....
1120
1121
1122
1123
1124
1125
1126











1127
1128
1129
1130
1131

1132
1133
1134
1135








1136
1137
1138
1139
1140
1141
1142
/*
** Return true if the filename given is a valid filename for
** a file in a repository.  Valid filenames follow all of the
** following rules:
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\*[]?"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
*/
int file_is_simple_pathname(const char *z){
  int i;
  char c = z[0];
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;
        if( z[i+2]=='.' && (z[i+3]=='/' || z[i+3]==0) ) return 0;
................................................................................
#endif
}

/*
** Translate Unicode to UTF8.  Return a pointer to the translated text.
** Call fossil_mbcs_free() to deallocate any memory used to store the
** returned pointer when done.









*/
char *fossil_unicode_to_utf8(const void *zUnicode){
#ifdef _WIN32









  int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  char *zUtf = sqlite3_malloc( nByte );
  if( zUtf==0 ){
    return 0;
  }
  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else
  return (char *)zUnicode;  /* No-op on unix */
................................................................................
#endif
}

/*
** Translate UTF8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_mbcs_free() to deallocate any memory
** used to store the returned pointer when done.











*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#ifdef _WIN32
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );

  if( zUnicode==0 ){
    return 0;
  }
  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);








  return zUnicode;
#else
  return (void *)zUtf8;  /* No-op on unix */
#endif
}

/*







|













|







 







>
>
>
>
>
>
>
>
>

|

>
>
>
>
>
>
>
>
>
|
|







 







>
>
>
>
>
>
>
>
>
>
>





>




>
>
>
>
>
>
>
>







479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
....
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
....
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
/*
** Return true if the filename given is a valid filename for
** a file in a repository.  Valid filenames follow all of the
** following rules:
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\:"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
*/
int file_is_simple_pathname(const char *z){
  int i;
  char c = z[0];
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c=='\\' || c==':' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;
        if( z[i+2]=='.' && (z[i+3]=='/' || z[i+3]==0) ) return 0;
................................................................................
#endif
}

/*
** Translate Unicode to UTF8.  Return a pointer to the translated text.
** Call fossil_mbcs_free() to deallocate any memory used to store the
** returned pointer when done.
**
** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
** are translated in ASCII characters in the range U+0001 - U+007F. The
** only place they can come from are filenames using Cygwin's trick
** to circumvent invalid characters in filenames.
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
** This way, fossil will work nicely together with the cygwin shell
** handling those filenames. On other shells, the generated filename
** might not be as expected, but apart from that nothing goes wrong.
*/
char *fossil_unicode_to_utf8(void *zUnicode){
#ifdef _WIN32
  int nByte = 0;
  char *zUtf;
  WCHAR *wUnicode = zUnicode;
  while( *wUnicode != 0 ){
    if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){
      *wUnicode &= 0x7F;
    }
    ++wUnicode;
  }
  nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  zUtf = sqlite3_malloc( nByte );
  if( zUtf==0 ){
    return 0;
  }
  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else
  return (char *)zUnicode;  /* No-op on unix */
................................................................................
#endif
}

/*
** Translate UTF8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_mbcs_free() to deallocate any memory
** used to store the returned pointer when done.
**
** On Windows, characters in the range U+001 to U+0031 and the
** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid
** to be used. Therefore, translated those to characters in the
** (private use area), in the range U+0001 - U+007F, so those
** characters never arrive in any Windows API. The filenames might
** look strange in Windows explorer, but in the cygwin shell
** everything looks as expected.
**
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
**
*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#ifdef _WIN32
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
  wchar_t *wUnicode;
  if( zUnicode==0 ){
    return 0;
  }
  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
  wUnicode = zUnicode;
  while( --nByte > 0){
    if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){
      *wUnicode |= 0xF000;
    }
    ++wUnicode;
  }

  return zUnicode;
#else
  return (void *)zUtf8;  /* No-op on unix */
#endif
}

/*