Fossil

Check-in [2ef37b3b]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | ge-tarfix
Files: files | file ages | folders
SHA1:2ef37b3b2a0c2dd48f4f902644fdb85175eae86a
User & Date: ge 2011-07-24 00:36:46
Context
2011-07-24
19:47
Use a Blob object rather than a custom printf function in order to construct the PAX header for tarballs. Closed-Leaf check-in: 02ce8b4a user: drh tags: ge-tarfix
00:36
Improvements to tar generation. Uses the format documented in Posix.1-2008 to handle long file names and UTF-8. check-in: 2ef37b3b user: ge tags: ge-tarfix
2011-07-23
22:13
Fix a harmless compiler warning. check-in: ba15af45 user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/tar.c.

25
26
27
28
29
30
31




32








33
34
35
36
37
38
39
40
41
42
43

44






45
46
47


48
49
50
51
52
53

54



















































































































































































































































55
56
57
58
59
60
61
62
63
64

65
66







67
68
69
70
71
72





73
74
75
76
77

78












79
80
81
82
83
84
85
86




87
88
89
90
91
92
93
94
95
96
97
98
99

100
101
102
103
104







105
106
107

108
109
110
111
112
113
114
...
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
...
140
141
142
143
144
145
146







147
148
149
150
151
152
153
/*
** State information for the tarball builder.
*/
static struct tarball_t {
  unsigned char *aHdr;      /* Space for building headers */
  char *zSpaces;            /* Spaces for padding */
  char *zPrevDir;           /* Name of directory for previous entry */




} tball;









/*
** Begin the process of generating a tarball.
**
** Initialize the GZIP compressor and the table of directory names.
*/
static void tar_begin(void){
  assert( tball.aHdr==0 );
  tball.aHdr = fossil_malloc(512+512+256);
  memset(tball.aHdr, 0, 512+512+256);
  tball.zSpaces = (char*)&tball.aHdr[512];

  tball.zPrevDir = (char*)&tball.zSpaces[512];






  memcpy(&tball.aHdr[108], "0000000", 8);  /* Owner ID */
  memcpy(&tball.aHdr[116], "0000000", 8);  /* Group ID */
  memcpy(&tball.aHdr[257], "ustar  ", 7);  /* Format */


  gzip_begin();
  db_multi_exec(
    "CREATE TEMP TABLE dir(name UNIQUE);"
  );
}


/*



















































































































































































































































** Build a header for a file or directory and write that header
** into the growing tarball.
*/
static void tar_add_header(
  const char *zName,     /* Name of the object */
  int nName,             /* Number of characters in zName */
  int iMode,             /* Mode.  0644 or 0755 */
  unsigned int mTime,    /* File modification time */
  int iSize,             /* Size of the object in bytes */
  int iType              /* Type of object.  0==file.  5==directory */

){
  unsigned int cksum = 0;







  int i;
  if( nName>100 ){
    memcpy(&tball.aHdr[345], zName, nName-100);
    memcpy(tball.aHdr, &zName[nName-100], 100);
    memset(&tball.aHdr[245+nName], 0, 267-nName);
  }else{





    memcpy(tball.aHdr, zName, nName);
    memset(&tball.aHdr[nName], 0, 100-nName);
    memset(&tball.aHdr[345], 0, 167);
  }
  sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);

  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);












  sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);
  memset(&tball.aHdr[148], ' ', 8);
  tball.aHdr[156] = iType + '0';
  for(i=0; i<512; i++) cksum += tball.aHdr[i];
  sqlite3_snprintf(7, (char*)&tball.aHdr[148], "%06o", cksum);
  tball.aHdr[154] = 0;
  gzip_step((char*)tball.aHdr, 512);
}





/*
** Recursively add an directory entry for the given file if those
** directories have not previously been seen.
*/
static void tar_add_directory_of(
  const char *zName,      /* Name of directory including final "/" */
  int nName,              /* Characters in zName */
  unsigned int mTime      /* Modification time */
){
  int i;
  for(i=nName-1; i>0 && zName[i]!='/'; i--){}
  if( i<=0 ) return;

  if( tball.zPrevDir[i]==0 && memcmp(tball.zPrevDir, zName, i)==0 ) return;
  db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
  if( sqlite3_changes(g.db)==0 ) return;
  tar_add_directory_of(zName, i-1, mTime);
  tar_add_header(zName, i, 0755, mTime, 0, 5);







  memcpy(tball.zPrevDir, zName, i);
  tball.zPrevDir[i] = 0;
}


/*
** Add a single file to the growing tarball.
*/
static void tar_add_file(
  const char *zName,               /* Name of the file.  nul-terminated */
  Blob *pContent,                  /* Content of the file */
................................................................................
  int isExe,                       /* True for executable files */
  unsigned int mTime               /* Last modification time of the file */
){
  int nName = strlen(zName);
  int n = blob_size(pContent);
  int lastPage;

  if( nName>=250 ){
    fossil_fatal("name too long for ustar format: \"%s\"", zName);
  }
  tar_add_directory_of(zName, nName, mTime);
  tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, 0);
  if( n ){
    gzip_step(blob_buffer(pContent), n);
    lastPage = n % 512;
    if( lastPage!=0 ){
      gzip_step(tball.zSpaces, 512 - lastPage);
    }
  }
................................................................................
static void tar_finish(Blob *pOut){
  db_multi_exec("DROP TABLE dir");
  gzip_step(tball.zSpaces, 512);
  gzip_step(tball.zSpaces, 512);
  gzip_finish(pOut);
  fossil_free(tball.aHdr);
  tball.aHdr = 0;







}


/*
** COMMAND: test-tarball
**
** Generate a GZIP-compresssed tarball in the file given by the first argument







>
>
>
>

>
>
>
>
>
>
>
>








|
|

>
|
>
>
>
>
>
>


|
>
>






>

>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>









<
>

<
>
>
>
>
>
>
>
|
<
<
<
<
<
>
>
>
>
>
|
<
<
|
<
>
|
>
>
>
>
>
>
>
>
>
>
>
>
|
<
<
<
<
<
<
|
>
>
>
>













>
|



|
>
>
>
>
>
>
>



>







 







|
<
<

|







 







>
>
>
>
>
>
>







25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

329
330

331
332
333
334
335
336
337
338





339
340
341
342
343
344


345

346
347
348
349
350
351
352
353
354
355
356
357
358
359
360






361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
...
403
404
405
406
407
408
409
410


411
412
413
414
415
416
417
418
419
...
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
/*
** State information for the tarball builder.
*/
static struct tarball_t {
  unsigned char *aHdr;      /* Space for building headers */
  char *zSpaces;            /* Spaces for padding */
  char *zPrevDir;           /* Name of directory for previous entry */
  int nPrevDirAlloc;        /* size of zPrevDir */
  char *pScratch;           /* scratch buffer used to build PAX data */
  int nScratchUsed;         /* part of buffer containing data */
  int nScratchAlloc;        /* size of buffer */
} tball;


/*
** field lengths of 'ustar' name and prefix fields.
*/
#define USTAR_NAME_LEN    100
#define USTAR_PREFIX_LEN  155


/*
** Begin the process of generating a tarball.
**
** Initialize the GZIP compressor and the table of directory names.
*/
static void tar_begin(void){
  assert( tball.aHdr==0 );
  tball.aHdr = fossil_malloc(512+512);
  memset(tball.aHdr, 0, 512+512);
  tball.zSpaces = (char*)&tball.aHdr[512];
  /* zPrevDir init */
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  /* scratch buffer init */
  tball.pScratch = NULL;
  tball.nScratchUsed = 0;
  tball.nScratchAlloc = 0;

  memcpy(&tball.aHdr[108], "0000000", 8);  /* Owner ID */
  memcpy(&tball.aHdr[116], "0000000", 8);  /* Group ID */
  memcpy(&tball.aHdr[257], "ustar\00000", 8);  /* POSIX.1 format */
  memcpy(&tball.aHdr[265], "nobody", 7);   /* Owner name */
  memcpy(&tball.aHdr[297], "nobody", 7);   /* Group name */
  gzip_begin();
  db_multi_exec(
    "CREATE TEMP TABLE dir(name UNIQUE);"
  );
}


/*
** print to the scratch buffer
**
** used to build the Pax Interchange Format data, and create
** pseudo-file names for the header data.
**
** The buffer is grown automatically to accommodate the data.
*/
static int scratch_printf(
  const char *fmt,
  ...
){
  for(;;){
    int newSize, minSpace, n;
    /* calculate space in buffer */
    int space = tball.nScratchAlloc - tball.nScratchUsed;
    /* format the string */
    va_list vl;
    va_start(vl, fmt);
    n = vsnprintf(&tball.pScratch[tball.nScratchUsed], space, fmt, vl);
    assert(n >= 0);
    va_end(vl);
    /* if it fit we're done */
    if(n < space)
      return n;
    /* buffer too short: calculate reasonable new size */
    minSpace = tball.nScratchUsed+n+1;
    newSize = 2 * tball.nScratchAlloc;
    if(newSize < minSpace)
      newSize = minSpace;
    /* grow the buffer */
    tball.pScratch = fossil_realloc(tball.pScratch, newSize);
    tball.nScratchAlloc = newSize;
    /* loop to try again */
  }
}


/*
** verify that lla characters in 'zName' are in the
** ISO646 (=ASCII) character set.
*/
static int is_iso646_name(
  const char *zName,     /* file path */
  int nName              /* path length */
){
  int i;
  for(i = 0; i < nName; i++){
    unsigned char c = (unsigned char)zName[i];
    if(c > 0x7e)
      return 0;
  }
  return 1;
}


/*
**   copy string pSrc into pDst, truncating or padding with 0 if necessary
*/
static void padded_copy(
  char *pDest,
  int nDest,
  const char *pSrc,
  int nSrc
){
  if(nSrc >= nDest){
    memcpy(pDest, pSrc, nDest);
  }else{
    memcpy(pDest, pSrc, nSrc);
    memset(&pDest[nSrc], 0, nDest - nSrc);
  }
}



/******************************************************************************
**
** The 'tar' format has evolved over time. Initially the name was stored
** in a 100 byte null-terminated field 'name'. File path names were
** limited to 99 bytes.
**
** The Posix.1 'ustar' format added a 155 byte field 'prefix', allowing
** for up to 255 characters to be stored. The full file path is formed by
** concatenating the field 'prefix', a slash, and the field 'name'. This
** gives some measure of compatibility with programs that only understand
** the oldest format.
**
** The latest Posix extension is called the 'pax Interchange Format'.
** It removes all the limitations of the previous two formats by allowing
** the storage of arbitrary-length attributes in a separate object that looks
** like a file to programs that do not understand this extension. So the
** contents of the 'name' and 'prefix' fields should contain values that allow
** versions of tar that do not understand this extension to still do
** something useful.
**
******************************************************************************/

/*
** The position we use to split a file path into the 'name' and 'prefix'
** fields needs to meet the following criteria:
**
**   - not at the beginning or end of the string
**   - the position must contain a slash
**   - no more than 100 characters follow the slash
**   - no more than 155 characters precede it
**
** The routine 'find_split_pos' finds a split position. It will meet the
** criteria of listed above if such a position exists. If no such
** position exists it generates one that useful for generating the
** values used for backward compatibility.
*/
static int find_split_pos(
  const char *zName,     /* file path */
  int nName              /* path length */
){
  int i, split = 0;
  /* only search if the string needs splitting */
  if(nName > USTAR_NAME_LEN){
    for(i = 1; i+1 < nName; i++)
      if(zName[i] == '/'){
        split = i+1;
        /* if the split position is within USTAR_NAME_LEN bytes from
         * the end we can quit */
        if(nName - split <= USTAR_NAME_LEN)
          break;
      }
  }
  return split;
}


/*
** attempt to split the file name path to meet 'ustar' header
** criteria.
*/
static int tar_split_path(
  const char *zName,     /* path */
  int nName,             /* path length */
  char *pName,           /* name field */
  char *pPrefix          /* prefix field */
){
  int split = find_split_pos(zName, nName);
  /* check whether both pieces fit */
  if(nName - split > USTAR_NAME_LEN || split > USTAR_PREFIX_LEN+1)
    return 0; /* no */

  /* extract name */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);

  /* extract prefix */
  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split - 1 : 0));

  return 1; /* success */
}


/*
** When using an extension header we still need to put something
** reasonable in the name and prefix fields. This is probably as
** good as it gets.
*/
static void approximate_split_path(
  const char *zName,     /* path */
  int nName,             /* path length */
  char *pName,           /* name field */
  char *pPrefix,         /* prefix field */
  int bHeader            /* is this a 'x' type tar header? */
){
  int split;

  /* if this is a Pax Interchange header prepend "PaxHeader/"
   * so we can tell files apart from metadata */
  if(bHeader){
       int n;
       tball.nScratchUsed = 0;
       n = scratch_printf("PaxHeader/%*.*s", nName, nName, zName);
       zName = tball.pScratch;
       nName = n;
  }

  /* find the split position */
  split = find_split_pos(zName, nName);

  /* extract a name, truncate if needed */
  padded_copy(pName, USTAR_NAME_LEN, &zName[split], nName - split);

  /* extract a prefix field, truncate when needed */
  padded_copy(pPrefix, USTAR_PREFIX_LEN, zName, (split > 0 ? split-1 : 0));
}


/*
** add a Pax Interchange header to the scratch buffer
**
** format: <length> <key>=<value>\n
** the tricky part is that each header contains its own
** size in decimal, counting that length.
*/
static void add_pax_header(
  const char *zField,
  const char *zValue,
  int nValue
){
  /* calculate length without length field */
  int blen = strlen(zField) + nValue + 3;
  /* calculate the length of the length field */
  int next10 = 1;
  int n;
  for(n = blen; n > 0; ){
    blen++; next10 *= 10;
    n /= 10;
  }
  /* adding the length extended the length field? */
  if(blen > next10)
    blen++;
  /* build the string */
  n = scratch_printf("%d %s=%*.*s\n", blen, zField, nValue, nValue, zValue);
  /* this _must_ be right */
  if(n != blen)
    fossil_fatal("internal error: PAX tar header has bad length");
  /* add length to scratch buffer */
  tball.nScratchUsed += blen;
}


/*
** set the header type, calculate the checksum and output
** the header
*/
static void cksum_and_write_header(
  char cType
){
  unsigned int cksum = 0;
  int i;
  memset(&tball.aHdr[148], ' ', 8);
  tball.aHdr[156] = cType;
  for(i=0; i<512; i++) cksum += tball.aHdr[i];
  sqlite3_snprintf(8, (char*)&tball.aHdr[148], "%07o", cksum);
  tball.aHdr[155] = 0;
  gzip_step((char*)tball.aHdr, 512);
}


/*
** Build a header for a file or directory and write that header
** into the growing tarball.
*/
static void tar_add_header(
  const char *zName,     /* Name of the object */
  int nName,             /* Number of characters in zName */
  int iMode,             /* Mode.  0644 or 0755 */
  unsigned int mTime,    /* File modification time */
  int iSize,             /* Size of the object in bytes */

  char cType             /* Type of object.  '0'==file.  '5'==directory */
){

  /* set mode and modification time */
  sqlite3_snprintf(8, (char*)&tball.aHdr[100], "%07o", iMode);
  sqlite3_snprintf(12, (char*)&tball.aHdr[136], "%011o", mTime);

  /* see if we need to output a Pax Interchange Header */
  if( !is_iso646_name(zName, nName) ||
            !tar_split_path(zName, nName, tball.aHdr, &tball.aHdr[345]) ){
    int lastPage;





    /* add a file name for interoperability with older programs */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 1);

    /* generate the Pax Interchange path header */
    tball.nScratchUsed = 0;
    add_pax_header("path", zName, nName);




    /* set the header length, and write the header */
    sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", tball.nScratchUsed);
    cksum_and_write_header('x');

    /* write the Pax Interchange data */
    gzip_step(tball.pScratch, tball.nScratchUsed);
    lastPage = tball.nScratchUsed % 512;
    if( lastPage!=0 )
      gzip_step(tball.zSpaces, 512 - lastPage);

    /* generate an approximate path for the regular header */
    approximate_split_path(zName, nName, tball.aHdr, &tball.aHdr[345], 0);
  }
  /* set the size */
  sqlite3_snprintf(12, (char*)&tball.aHdr[124], "%011o", iSize);







  /* write the regular header */
  cksum_and_write_header(cType);
}


/*
** Recursively add an directory entry for the given file if those
** directories have not previously been seen.
*/
static void tar_add_directory_of(
  const char *zName,      /* Name of directory including final "/" */
  int nName,              /* Characters in zName */
  unsigned int mTime      /* Modification time */
){
  int i;
  for(i=nName-1; i>0 && zName[i]!='/'; i--){}
  if( i<=0 ) return;
  if( i < tball.nPrevDirAlloc && tball.zPrevDir[i]==0 &&
        memcmp(tball.zPrevDir, zName, i)==0 ) return;
  db_multi_exec("INSERT OR IGNORE INTO dir VALUES('%#q')", i, zName);
  if( sqlite3_changes(g.db)==0 ) return;
  tar_add_directory_of(zName, i-1, mTime);
  tar_add_header(zName, i, 0755, mTime, 0, '5');
  if( i >= tball.nPrevDirAlloc ){
    int nsize = tball.nPrevDirAlloc * 2;
    if(i+1 > nsize)
      nsize = i+1;
    tball.zPrevDir = fossil_realloc(tball.zPrevDir, nsize);
    tball.nPrevDirAlloc = nsize;
  }
  memcpy(tball.zPrevDir, zName, i);
  tball.zPrevDir[i] = 0;
}


/*
** Add a single file to the growing tarball.
*/
static void tar_add_file(
  const char *zName,               /* Name of the file.  nul-terminated */
  Blob *pContent,                  /* Content of the file */
................................................................................
  int isExe,                       /* True for executable files */
  unsigned int mTime               /* Last modification time of the file */
){
  int nName = strlen(zName);
  int n = blob_size(pContent);
  int lastPage;

  /* length check moved to tar_split_path */


  tar_add_directory_of(zName, nName, mTime);
  tar_add_header(zName, nName, isExe ? 0755 : 0644, mTime, n, '0');
  if( n ){
    gzip_step(blob_buffer(pContent), n);
    lastPage = n % 512;
    if( lastPage!=0 ){
      gzip_step(tball.zSpaces, 512 - lastPage);
    }
  }
................................................................................
static void tar_finish(Blob *pOut){
  db_multi_exec("DROP TABLE dir");
  gzip_step(tball.zSpaces, 512);
  gzip_step(tball.zSpaces, 512);
  gzip_finish(pOut);
  fossil_free(tball.aHdr);
  tball.aHdr = 0;
  fossil_free(tball.zPrevDir);
  tball.zPrevDir = NULL;
  tball.nPrevDirAlloc = 0;
  fossil_free(tball.pScratch);
  tball.pScratch = NULL;
  tball.nScratchUsed = 0;
  tball.nScratchAlloc = 0;
}


/*
** COMMAND: test-tarball
**
** Generate a GZIP-compresssed tarball in the file given by the first argument