Fossil

Check-in [b59dc078]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Split off in separate functions

Still experimental, but starts looking better

Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | ticket-d17d6e5b17
Files: files | file ages | folders
SHA1: b59dc078187891db8420f2dcf84f5a8728364307
User & Date: jan.nijtmans 2012-11-21 09:12:35
Context
2012-11-21
09:20
Somehow, part of previous change got lost check-in: 92725735 user: jan.nijtmans tags: ticket-d17d6e5b17
09:12
Split off in separate functions

Still experimental, but starts looking better

check-in: b59dc078 user: jan.nijtmans tags: ticket-d17d6e5b17
2012-11-20
13:46
Experimental fix for issue [d17d6e5b17].

Should have a LOT more testing before merging it to trunk, because it is dangerous!

The method used is as described at:
http://cygwin.com/cygwin-ug-net/using-specialnames.html The only problematic characters left are ':' and '\', all other problematic characters are handled by translating them to characters in the range U+F000 to U+F0FF

Feedback welcome.

check-in: 82ce90f9 user: jan.nijtmans tags: ticket-d17d6e5b17
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/file.c.

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
...
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
...
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
...
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
....
1087
1088
1089
1090
1091
1092
1093



















1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109


1110

1111

1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
....
1138
1139
1140
1141
1142
1143
1144




















1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161


1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
....
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
  if( isWd && g.allowSymlinks ){
    return lstat(zFilename, buf);
  }else{
    return stat(zFilename, buf);
  }
#else
  int rc = 0;
  wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename);
  rc = _wstati64(zMbcs, buf);
  fossil_mbcs_free(zMbcs);
  return rc;
#endif
}

/*
................................................................................


/*
** Wrapper around the access() system call.
*/
int file_access(const char *zFilename, int flags){
#ifdef _WIN32
  wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename);
  int rc = _waccess(zMbcs, flags);
  fossil_mbcs_free(zMbcs);
#else
  int rc = access(zFilename, flags);
#endif
  return rc;
}
................................................................................
  struct timeval tv[2];
  memset(tv, 0, sizeof(tv[0])*2);
  tv[0].tv_sec = newMTime;
  tv[1].tv_sec = newMTime;
  utimes(zFilename, tv);
#else
  struct _utimbuf tb;
  wchar_t *zMbcs = fossil_utf8_to_unicode(zFilename);
  tb.actime = newMTime;
  tb.modtime = newMTime;
  _wutime(zMbcs, &tb);
  fossil_mbcs_free(zMbcs);
#endif
}

................................................................................
}

/*
** Delete a file.
*/
void file_delete(const char *zFilename){
#ifdef _WIN32
  wchar_t *z = fossil_utf8_to_unicode(zFilename);
  _wunlink(z);
  fossil_mbcs_free(z);
#else
  unlink(zFilename);
#endif
}

................................................................................
  if( rc==2 ){
    if( !forceFlag ) return 1;
    file_delete(zName);
  }
  if( rc!=1 ){
#if defined(_WIN32)
    int rc;
    wchar_t *zMbcs = fossil_utf8_to_unicode(zName);
    rc = _wmkdir(zMbcs);
    fossil_mbcs_free(zMbcs);
    return rc;
#else
    return mkdir(zName, 0755);
#endif
  }
................................................................................
/*
** Return true if the filename given is a valid filename for
** a file in a repository.  Valid filenames follow all of the
** following rules:
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\:"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
*/
int file_is_simple_pathname(const char *z){
  int i;
  char c = z[0];
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c=='\\' || c==':' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;
        if( z[i+2]=='.' && (z[i+3]=='/' || z[i+3]==0) ) return 0;
................................................................................
#endif
}

/*
** Translate Unicode to UTF8.  Return a pointer to the translated text.
** Call fossil_mbcs_free() to deallocate any memory used to store the
** returned pointer when done.



















**
** On Windows, characters in the range U+FF01 to U+FF7F (private use area)
** are translated in ASCII characters in the range U+0001 - U+007F. The
** only place they can come from are filenames using Cygwin's trick
** to circumvent invalid characters in filenames.
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
** This way, fossil will work nicely together with the cygwin shell
** handling those filenames. On other shells, the generated filename
** might not be as expected, but apart from that nothing goes wrong.
*/
char *fossil_unicode_to_utf8(void *zUnicode){
#ifdef _WIN32
  int nByte = 0;
  char *zUtf;
  WCHAR *wUnicode = zUnicode;
  while( *wUnicode != 0 ){


    if ( (*wUnicode > 0xF000) && (*wUnicode <= 0xF07F) ){

      *wUnicode &= 0x7F;

    }
    ++wUnicode;
  }
  nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  zUtf = sqlite3_malloc( nByte );
  if( zUtf==0 ){
    return 0;
  }
  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else
  return (char *)zUnicode;  /* No-op on unix */
#endif
}

/*
** Translate UTF8 to MBCS for use in system calls.  Return a pointer to the
................................................................................
#endif
}

/*
** Translate UTF8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_mbcs_free() to deallocate any memory
** used to store the returned pointer when done.




















**
** On Windows, characters in the range U+001 to U+0031 and the
** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid
** to be used. Therefore, translated those to characters in the
** (private use area), in the range U+0001 - U+007F, so those
** characters never arrive in any Windows API. The filenames might
** look strange in Windows explorer, but in the cygwin shell
** everything looks as expected.
**
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
**
*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#ifdef _WIN32
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
  wchar_t *wUnicode;


  if( zUnicode==0 ){
    return 0;
  }
  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);
  wUnicode = zUnicode;
  while( --nByte > 0){
    if ( (*wUnicode < 32) || wcschr(L"\"*<>?|", *wUnicode) ){
      *wUnicode |= 0xF000;
    }
    ++wUnicode;
  }

  return zUnicode;
#else
................................................................................

/*
** Like fopen() but always takes a UTF8 argument.
*/
FILE *fossil_fopen(const char *zName, const char *zMode){
#ifdef _WIN32
  wchar_t *uMode = fossil_utf8_to_unicode(zMode);
  wchar_t *uName = fossil_utf8_to_unicode(zName);
  FILE *f = _wfopen(uName, uMode);
  fossil_mbcs_free(uName);
  fossil_mbcs_free(uMode);
#else
  FILE *f = fopen(zName, zMode);
#endif
  return f;
}







|







 







|







 







|







 







|







 







|







 







|













|







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|








|

<
<


>
>
|
>
|
>



|
<
<
<
<
<
<







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>

|


|







|

|
<
|
>
>
|
<

<
|
<
|







 







|








68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
...
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
...
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
...
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
...
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
...
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
....
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124


1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136






1137
1138
1139
1140
1141
1142
1143
....
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194

1195
1196
1197
1198

1199

1200

1201
1202
1203
1204
1205
1206
1207
1208
....
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
  if( isWd && g.allowSymlinks ){
    return lstat(zFilename, buf);
  }else{
    return stat(zFilename, buf);
  }
#else
  int rc = 0;
  wchar_t *zMbcs = fossil_utf8_to_filename(zFilename);
  rc = _wstati64(zMbcs, buf);
  fossil_mbcs_free(zMbcs);
  return rc;
#endif
}

/*
................................................................................


/*
** Wrapper around the access() system call.
*/
int file_access(const char *zFilename, int flags){
#ifdef _WIN32
  wchar_t *zMbcs = fossil_utf8_to_filename(zFilename);
  int rc = _waccess(zMbcs, flags);
  fossil_mbcs_free(zMbcs);
#else
  int rc = access(zFilename, flags);
#endif
  return rc;
}
................................................................................
  struct timeval tv[2];
  memset(tv, 0, sizeof(tv[0])*2);
  tv[0].tv_sec = newMTime;
  tv[1].tv_sec = newMTime;
  utimes(zFilename, tv);
#else
  struct _utimbuf tb;
  wchar_t *zMbcs = fossil_utf8_to_filename(zFilename);
  tb.actime = newMTime;
  tb.modtime = newMTime;
  _wutime(zMbcs, &tb);
  fossil_mbcs_free(zMbcs);
#endif
}

................................................................................
}

/*
** Delete a file.
*/
void file_delete(const char *zFilename){
#ifdef _WIN32
  wchar_t *z = fossil_utf8_to_filename(zFilename);
  _wunlink(z);
  fossil_mbcs_free(z);
#else
  unlink(zFilename);
#endif
}

................................................................................
  if( rc==2 ){
    if( !forceFlag ) return 1;
    file_delete(zName);
  }
  if( rc!=1 ){
#if defined(_WIN32)
    int rc;
    wchar_t *zMbcs = fossil_utf8_to_filename(zName);
    rc = _wmkdir(zMbcs);
    fossil_mbcs_free(zMbcs);
    return rc;
#else
    return mkdir(zName, 0755);
#endif
  }
................................................................................
/*
** Return true if the filename given is a valid filename for
** a file in a repository.  Valid filenames follow all of the
** following rules:
**
**     *  Does not begin with "/"
**     *  Does not contain any path element named "." or ".."
**     *  Does not contain any of these characters in the path: "\*[]?"
**     *  Does not end with "/".
**     *  Does not contain two or more "/" characters in a row.
**     *  Contains at least one character
*/
int file_is_simple_pathname(const char *z){
  int i;
  char c = z[0];
  if( c=='/' || c==0 ) return 0;
  if( c=='.' ){
    if( z[1]=='/' || z[1]==0 ) return 0;
    if( z[1]=='.' && (z[2]=='/' || z[2]==0) ) return 0;
  }
  for(i=0; (c=z[i])!=0; i++){
    if( c=='\\' || c=='*' || c=='[' || c==']' || c=='?' ){
      return 0;
    }
    if( c=='/' ){
      if( z[i+1]=='/' ) return 0;
      if( z[i+1]=='.' ){
        if( z[i+2]=='/' || z[i+2]==0 ) return 0;
        if( z[i+2]=='.' && (z[i+3]=='/' || z[i+3]==0) ) return 0;
................................................................................
#endif
}

/*
** Translate Unicode to UTF8.  Return a pointer to the translated text.
** Call fossil_mbcs_free() to deallocate any memory used to store the
** returned pointer when done.
*/
char *fossil_unicode_to_utf8(const void *zUnicode){
#ifdef _WIN32
  int nByte = WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, 0, 0, 0, 0);
  char *zUtf = sqlite3_malloc( nByte );
  if( zUtf==0 ){
    return 0;
  }
  WideCharToMultiByte(CP_UTF8, 0, zUnicode, -1, zUtf, nByte, 0, 0);
  return zUtf;
#else
  return (char *)zUnicode;  /* No-op on unix */
#endif
}

/*
** Translate Unicode (filename) to UTF8.  Return a pointer to the
** translated text.  Call fossil_mbcs_free() to deallocate any
** memory used to store the returned pointer when done.
**
** On Windows, characters in the range U+F001 to U+F07F (private use area)
** are translated in ASCII characters in the range U+0001 - U+007F. The
** only place they can come from are filenames using Cygwin's trick
** to circumvent invalid characters in filenames.
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
** This way, fossil will work nicely together with the cygwin shell
** handling those filenames. On other shells, the generated filename
** might not be as expected, but apart from that nothing goes wrong.
*/
char *fossil_filename_to_utf8(void *zUnicode){
#ifdef _WIN32


  WCHAR *wUnicode = zUnicode;
  while( *wUnicode != 0 ){
    if ( (*wUnicode & 0xFF80) == 0xF000 ){
      WCHAR converted = (*wUnicode & 0x7F);
      /* Only really convert it when the resulting char is in the given range*/
      if ( (converted < 32) || wcschr(L"\"*<>?|:", converted) ){
        *wUnicode = converted;
      }
    }
    ++wUnicode;
  }
  return fossil_unicode_to_utf8(zUnicode);






#else
  return (char *)zUnicode;  /* No-op on unix */
#endif
}

/*
** Translate UTF8 to MBCS for use in system calls.  Return a pointer to the
................................................................................
#endif
}

/*
** Translate UTF8 to unicode for use in system calls.  Return a pointer to the
** translated text..  Call fossil_mbcs_free() to deallocate any memory
** used to store the returned pointer when done.
*/
void *fossil_utf8_to_unicode(const char *zUtf8){
#ifdef _WIN32
  int nByte = MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, 0, 0);
  wchar_t *zUnicode = sqlite3_malloc( nByte * 2 );
  if( zUnicode==0 ){
    return 0;
  }
  MultiByteToWideChar(CP_UTF8, 0, zUtf8, -1, zUnicode, nByte);

  return zUnicode;
#else
  return (void *)zUtf8;  /* No-op on unix */
#endif
}

/*
** Translate UTF8 to unicode for use in filename translations.
** Return a pointer to the translated text..  Call fossil_mbcs_free()
** to deallocate any memory used to store the returned pointer when done.
**
** On Windows, characters in the range U+0001 to U+0031 and the
** characters '"', '*', ':', '<', '>', '?', '|' and '\\' are invalid
** to be used. Therefore, translated those to characters in the
** (private use area), in the range U+F001 - U+F07F, so those
** characters never arrive in any Windows API. The filenames might
** look strange in Windows explorer, but in the cygwin shell
** everything looks as expected.
**
** See: <http://cygwin.com/cygwin-ug-net/using-specialnames.html>
**
*/
void *fossil_utf8_to_filename(const char *zUtf8){
#ifdef _WIN32
  WCHAR *zUnicode = fossil_utf8_to_unicode(zUtf8);

  WCHAR *wUnicode = zUnicode;
  /* If path starts with "<drive>:/" or "<drive>:\", don't translate the ':' */
  if ( file_is_absolute_path(zUtf8) ){
    wUnicode += 3;

  }

  while( *wUnicode != '\0' ){

    if ( (*wUnicode < 32) || wcschr(L"\"*<>?|:", *wUnicode) ){
      *wUnicode |= 0xF000;
    }
    ++wUnicode;
  }

  return zUnicode;
#else
................................................................................

/*
** Like fopen() but always takes a UTF8 argument.
*/
FILE *fossil_fopen(const char *zName, const char *zMode){
#ifdef _WIN32
  wchar_t *uMode = fossil_utf8_to_unicode(zMode);
  wchar_t *uName = fossil_utf8_to_filename(zName);
  FILE *f = _wfopen(uName, uMode);
  fossil_mbcs_free(uName);
  fossil_mbcs_free(uMode);
#else
  FILE *f = fopen(zName, zMode);
#endif
  return f;
}

Changes to src/rebuild.c.

831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;

  zUnicodePath = fossil_utf8_to_unicode(zPath);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

      if( pEntry->d_name[0]=='.' ){
        continue;
      }
      zUtf8Name = fossil_unicode_to_utf8(pEntry->d_name);
      zSubpath = mprintf("%s/%s", zPath, zUtf8Name);
      fossil_mbcs_free(zUtf8Name);
      if( file_isdir(zSubpath)==1 ){
        recon_read_dir(zSubpath);
      }
      blob_init(&path, 0, 0);
      blob_appendf(&path, "%s", zSubpath);







|









|







831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
  DIR *d;
  struct dirent *pEntry;
  Blob aContent; /* content of the just read artifact */
  static int nFileRead = 0;
  void *zUnicodePath;
  char *zUtf8Name;

  zUnicodePath = fossil_utf8_to_filename(zPath);
  d = opendir(zUnicodePath);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      Blob path;
      char *zSubpath;

      if( pEntry->d_name[0]=='.' ){
        continue;
      }
      zUtf8Name = fossil_filename_to_utf8(pEntry->d_name);
      zSubpath = mprintf("%s/%s", zPath, zUtf8Name);
      fossil_mbcs_free(zUtf8Name);
      if( file_isdir(zSubpath)==1 ){
        recon_read_dir(zSubpath);
      }
      blob_init(&path, 0, 0);
      blob_appendf(&path, "%s", zSubpath);

Changes to src/vfile.c.

457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
       "INSERT OR IGNORE INTO sfile(x) SELECT :file"
       "  WHERE NOT EXISTS(SELECT 1 FROM vfile WHERE pathname=:file)"
    );
  }
  depth++;

  zDir = blob_str(pPath);
  zMbcs = fossil_utf8_to_unicode(zDir);
  d = opendir(zMbcs);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      char *zPath;
      char *zUtf8;
      if( pEntry->d_name[0]=='.' ){
        if( (scanFlags & SCAN_ALL)==0 ) continue;
        if( pEntry->d_name[1]==0 ) continue;
        if( pEntry->d_name[1]=='.' && pEntry->d_name[2]==0 ) continue;
      }
      zUtf8 = fossil_unicode_to_utf8(pEntry->d_name);
      blob_appendf(pPath, "/%s", zUtf8);
      zPath = blob_str(pPath);
      if( glob_match(pIgnore, &zPath[nPrefix+1]) ){
        /* do nothing */
      }else if( file_wd_isdir(zPath)==1 ){
        if( !vfile_top_of_checkout(zPath) ){
          vfile_scan(pPath, nPrefix, scanFlags, pIgnore);







|










|







457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
       "INSERT OR IGNORE INTO sfile(x) SELECT :file"
       "  WHERE NOT EXISTS(SELECT 1 FROM vfile WHERE pathname=:file)"
    );
  }
  depth++;

  zDir = blob_str(pPath);
  zMbcs = fossil_utf8_to_filename(zDir);
  d = opendir(zMbcs);
  if( d ){
    while( (pEntry=readdir(d))!=0 ){
      char *zPath;
      char *zUtf8;
      if( pEntry->d_name[0]=='.' ){
        if( (scanFlags & SCAN_ALL)==0 ) continue;
        if( pEntry->d_name[1]==0 ) continue;
        if( pEntry->d_name[1]=='.' && pEntry->d_name[2]==0 ) continue;
      }
      zUtf8 = fossil_filename_to_utf8(pEntry->d_name);
      blob_appendf(pPath, "/%s", zUtf8);
      zPath = blob_str(pPath);
      if( glob_match(pIgnore, &zPath[nPrefix+1]) ){
        /* do nothing */
      }else if( file_wd_isdir(zPath)==1 ){
        if( !vfile_top_of_checkout(zPath) ){
          vfile_scan(pPath, nPrefix, scanFlags, pIgnore);