Fossil

Check-in [cd06b7d8]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:bug-fix: Before preparing a diff, BOM's should not be removed. It might result in a BOM in the middle of UTF-8, which is invalid, but that's how diff works.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | use-blob_strip_bom
Files: files | file ages | folders
SHA1: cd06b7d8af012b59185ee008b5d5fb1442df6dda
User & Date: jan.nijtmans 2012-11-01 14:02:39
Context
2012-11-05
13:56
merge trunk check-in: b0e05a90 user: jan.nijtmans tags: use-blob_strip_bom
2012-11-01
14:02
bug-fix: Before preparing a diff, BOM's should not be removed. It might result in a BOM in the middle of UTF-8, which is invalid, but that's how diff works. check-in: cd06b7d8 user: jan.nijtmans tags: use-blob_strip_bom
12:32
merge trunk check-in: 9e97de34 user: jan.nijtmans tags: use-blob_strip_bom
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/blob.c.

1089
1090
1091
1092
1093
1094
1095
1096
1097

1098
1099
1100
1101
1102
1103
1104
1105
1106

1107
1108
1109
1110
1111
1112
1113

1114
1115
1116
1117
1118
1119
1120
1121



1122
1123
1124
1125
1126
1127
1128
....
1132
1133
1134
1135
1136
1137
1138



1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
  *pRight = swap;
}

/*
** Strip a possible BOM from the blob. On Windows, if there
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
** to UTF-8 is done.
** If useMbcs is false and there is no BOM, the input string
** is assumed to be UTF-8 already, so no conversion is done.

*/
void blob_strip_bom(Blob *pBlob, int useMbcs){
  static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
#ifdef _WIN32
  static const unsigned short ubom = 0xfeff;
  static const unsigned short urbom = 0xfffe;
#endif /* _WIN32 */
  char *zUtf8;
  if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {

	struct Blob temp;
    zUtf8 = blob_str(pBlob) + 3;
    blob_zero(&temp);
    blob_append(&temp, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
    blob_swap(pBlob, &temp);
    blob_reset(&temp);

#ifdef _WIN32
  }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
      && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + 2;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);



    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
  }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
      && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
    zUtf8 = blob_buffer(pBlob);
    unsigned int i = blob_size(pBlob);
    while( i > 0 ){
................................................................................
        zUtf8[--i] = temp;
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + 2;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);



    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
  }else if (useMbcs) {
    zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
#endif /* _WIN32 */
  }
}







|

>









>
|
|
|
|
|
|
|
>








>
>
>







 







>
>
>


|







1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
....
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
  *pRight = swap;
}

/*
** Strip a possible BOM from the blob. On Windows, if there
** is either no BOM at all or an (le/be) UTF-16 BOM, a conversion
** to UTF-8 is done.
** If useMbcs is 0 and there is no BOM, the input string
** is assumed to be UTF-8 already, so no conversion is done.
** If useMbcs is 2, any BOM is replaced by the UTF-8 BOM
*/
void blob_strip_bom(Blob *pBlob, int useMbcs){
  static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
#ifdef _WIN32
  static const unsigned short ubom = 0xfeff;
  static const unsigned short urbom = 0xfffe;
#endif /* _WIN32 */
  char *zUtf8;
  if( blob_size(pBlob)>2 && memcmp(blob_buffer(pBlob), bom, 3)==0 ) {
    if( useMbcs<2 ){
      struct Blob temp;
      zUtf8 = blob_str(pBlob) + 3;
      blob_zero(&temp);
      blob_append(&temp, zUtf8, -1);
      fossil_mbcs_free(zUtf8);
      blob_swap(pBlob, &temp);
      blob_reset(&temp);
    }
#ifdef _WIN32
  }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
      && memcmp(blob_buffer(pBlob), &ubom, 2)==0 ) {
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + 2;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    if( useMbcs>1 ){
      blob_append(pBlob, (char*)bom, 3);
    }
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
  }else if( blob_size(pBlob)>1 && (blob_size(pBlob)&1)==0
      && memcmp(blob_buffer(pBlob), &urbom, 2)==0 ) {
    zUtf8 = blob_buffer(pBlob);
    unsigned int i = blob_size(pBlob);
    while( i > 0 ){
................................................................................
        zUtf8[--i] = temp;
    }
    /* Make sure the blob contains two terminating 0-bytes */
    blob_append(pBlob, "", 1);
    zUtf8 = blob_str(pBlob) + 2;
    zUtf8 = fossil_unicode_to_utf8(zUtf8);
    blob_zero(pBlob);
    if( useMbcs>1 ){
      blob_append(pBlob, (char*)bom, 3);
    }
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
  }else if (useMbcs==1) {
    zUtf8 = fossil_mbcs_to_utf8(blob_str(pBlob));
    blob_zero(pBlob);
    blob_append(pBlob, zUtf8, -1);
    fossil_mbcs_free(zUtf8);
#endif /* _WIN32 */
  }
}

Changes to src/checkin.c.

934
935
936
937
938
939
940
941
942
943
944
945
946
947

948
949
950
951
952
953
954
#endif
    ){
      char *zOrig = file_newname(zFilename, "original", 1);
      FILE *f;
      blob_write_to_file(p, zOrig);
      fossil_free(zOrig);
      f = fossil_fopen(zFilename, "wb");
      if( eType==-3 ) {
        blob_remove_cr(p);
      }else{
        static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
        fwrite(bom, 1, 3, f);
        blob_strip_bom(p, 0);
      }

      fwrite(blob_buffer(p), 1, blob_size(p), f);
      fclose(f);
      return 1;
    }else if( cReply!='y' && cReply!='Y' ){
      fossil_fatal("Abandoning commit due to %s in %s",
                   zWarning, blob_str(&fname));
    }







|
<
<




>







934
935
936
937
938
939
940
941


942
943
944
945
946
947
948
949
950
951
952
953
#endif
    ){
      char *zOrig = file_newname(zFilename, "original", 1);
      FILE *f;
      blob_write_to_file(p, zOrig);
      fossil_free(zOrig);
      f = fossil_fopen(zFilename, "wb");
      if( eType!=-3 ) {


        static const unsigned char bom[] = { 0xEF, 0xBB, 0xBF };
        fwrite(bom, 1, 3, f);
        blob_strip_bom(p, 0);
      }
      blob_remove_cr(p);
      fwrite(blob_buffer(p), 1, blob_size(p), f);
      fclose(f);
      return 1;
    }else if( cReply!='y' && cReply!='Y' ){
      fossil_fatal("Abandoning commit due to %s in %s",
                   zWarning, blob_str(&fname));
    }

Changes to src/diffcmd.c.

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

107

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
  int fIncludeBinary,       /* Include binary files for external diff */
  u64 diffFlags             /* Flags to control the diff */
){
  if( zDiffCmd==0 ){
    Blob out;                 /* Diff output text */
    Blob file2;               /* Content of zFile2 */
    const char *zName2;       /* Name of zFile2 for display */
    int eType2;

    /* Read content of zFile2 into memory */
    blob_zero(&file2);
    if( file_wd_size(zFile2)<0 ){
      zName2 = NULL_DEVICE;
    }else{
      if( file_wd_islink(zFile2) ){
        blob_read_link(&file2, zFile2);
      }else{
        blob_read_from_file(&file2, zFile2);
      }
      zName2 = zName;
    }

    eType2 = looks_like_text(&file2)&3;

    /* Compute and output the differences */
    if( diffFlags & DIFF_BRIEF ){
      if( blob_compare(pFile1, &file2) ){
        fossil_print("CHANGED  %s\n", zName);
      }
    }else if( eType1!=eType2 ){
      fossil_print(DIFF_CANNOT_COMPUTE_ENCODING);
    }else{
      if( eType1>1 ){
        blob_strip_bom(pFile1, 0);
        blob_strip_bom(&file2, 0);
      }
      blob_zero(&out);
      text_diff(pFile1, &file2, &out, diffFlags);
      if( blob_size(&out) ){
        diff_print_filenames(zName, zName2, diffFlags);
        fossil_print("%s\n", blob_str(&out));
      }
      blob_reset(&out);
................................................................................
  u64 diffFlags             /* Diff flags */
){
  if( diffFlags & DIFF_BRIEF ) return;
  if( zDiffCmd==0 ){
    Blob out;      /* Diff output text */

    blob_zero(&out);
    if( eType>1 ){
      blob_strip_bom(pFile1, 0);
      blob_strip_bom(pFile2, 0);
    }
    text_diff(pFile1, pFile2, &out, diffFlags);
    diff_print_filenames(zName, zName, diffFlags);
    fossil_print("%s\n", blob_str(&out));

    /* Release memory resources */
    blob_reset(&out);
  }else{







|













>
|
>








<
|
|
<







 







<
|
|
<







86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117

118
119

120
121
122
123
124
125
126
...
216
217
218
219
220
221
222

223
224

225
226
227
228
229
230
231
  int fIncludeBinary,       /* Include binary files for external diff */
  u64 diffFlags             /* Flags to control the diff */
){
  if( zDiffCmd==0 ){
    Blob out;                 /* Diff output text */
    Blob file2;               /* Content of zFile2 */
    const char *zName2;       /* Name of zFile2 for display */
    int eType2 = 0;

    /* Read content of zFile2 into memory */
    blob_zero(&file2);
    if( file_wd_size(zFile2)<0 ){
      zName2 = NULL_DEVICE;
    }else{
      if( file_wd_islink(zFile2) ){
        blob_read_link(&file2, zFile2);
      }else{
        blob_read_from_file(&file2, zFile2);
      }
      zName2 = zName;
    }
    if( !fIncludeBinary ){
      eType2 = looks_like_text(&file2)&3;
    }
    /* Compute and output the differences */
    if( diffFlags & DIFF_BRIEF ){
      if( blob_compare(pFile1, &file2) ){
        fossil_print("CHANGED  %s\n", zName);
      }
    }else if( eType1!=eType2 ){
      fossil_print(DIFF_CANNOT_COMPUTE_ENCODING);
    }else{

      blob_strip_bom(pFile1, 2);
      blob_strip_bom(&file2, 2);

      blob_zero(&out);
      text_diff(pFile1, &file2, &out, diffFlags);
      if( blob_size(&out) ){
        diff_print_filenames(zName, zName2, diffFlags);
        fossil_print("%s\n", blob_str(&out));
      }
      blob_reset(&out);
................................................................................
  u64 diffFlags             /* Diff flags */
){
  if( diffFlags & DIFF_BRIEF ) return;
  if( zDiffCmd==0 ){
    Blob out;      /* Diff output text */

    blob_zero(&out);

    blob_strip_bom(pFile1, 2);
    blob_strip_bom(pFile2, 2);

    text_diff(pFile1, pFile2, &out, diffFlags);
    diff_print_filenames(zName, zName, diffFlags);
    fossil_print("%s\n", blob_str(&out));

    /* Release memory resources */
    blob_reset(&out);
  }else{