/* ** Copyright (c) 2007 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** Procedures for managing the VFILE table. */ #include "config.h" #include "vfile.h" #include #include #if defined(__DMC__) #include "dirent.h" #else #include #endif /* ** The input is guaranteed to be a 40-character well-formed UUID. ** Find its rid. */ int fast_uuid_to_rid(const char *zUuid){ static Stmt q; int rid; db_static_prepare(&q, "SELECT rid FROM blob WHERE uuid=:uuid"); db_bind_text(&q, ":uuid", zUuid); if( db_step(&q)==SQLITE_ROW ){ rid = db_column_int(&q, 0); }else{ rid = 0; } db_reset(&q); return rid; } /* ** Given a UUID, return the corresponding record ID. If the UUID ** does not exist, then return 0. ** ** For this routine, the UUID must be exact. For a match against ** user input with mixed case, use resolve_uuid(). ** ** If the UUID is not found and phantomize is 1 or 2, then attempt to ** create a phantom record. A private phantom is created for 2 and ** a public phantom is created for 1. */ int uuid_to_rid(const char *zUuid, int phantomize){ int rid, sz; char z[UUID_SIZE+1]; sz = strlen(zUuid); if( sz!=UUID_SIZE || !validate16(zUuid, sz) ){ return 0; } memcpy(z, zUuid, UUID_SIZE+1); canonical16(z, sz); rid = fast_uuid_to_rid(z); if( rid==0 && phantomize ){ rid = content_new(zUuid, phantomize-1); } return rid; } /* ** Load a vfile from a record ID. */ void load_vfile_from_rid(int vid){ int rid, size; Stmt ins, ridq; Manifest *p; ManifestFile *pFile; if( db_exists("SELECT 1 FROM vfile WHERE vid=%d", vid) ){ return; } db_begin_transaction(); p = manifest_get(vid, CFTYPE_MANIFEST); if( p==0 ) return; db_multi_exec("DELETE FROM vfile WHERE vid=%d", vid); db_prepare(&ins, "INSERT INTO vfile(vid,isexe,islink,rid,mrid,pathname) " " VALUES(:vid,:isexe,:islink,:id,:id,:name)"); db_prepare(&ridq, "SELECT rid,size FROM blob WHERE uuid=:uuid"); db_bind_int(&ins, ":vid", vid); manifest_file_rewind(p); while( (pFile = manifest_file_next(p,0))!=0 ){ if( pFile->zUuid==0 || uuid_is_shunned(pFile->zUuid) ) continue; db_bind_text(&ridq, ":uuid", pFile->zUuid); if( db_step(&ridq)==SQLITE_ROW ){ rid = db_column_int(&ridq, 0); size = db_column_int(&ridq, 0); }else{ rid = 0; size = 0; } db_reset(&ridq); if( rid==0 || size<0 ){ fossil_warning("content missing for %s", pFile->zName); continue; } db_bind_int(&ins, ":isexe", ( manifest_file_mperm(pFile)==PERM_EXE )); db_bind_int(&ins, ":id", rid); db_bind_text(&ins, ":name", pFile->zName); db_bind_int(&ins, ":islink", ( manifest_file_mperm(pFile)==PERM_LNK )); db_step(&ins); db_reset(&ins); } db_finalize(&ridq); db_finalize(&ins); manifest_destroy(p); db_end_transaction(0); } /* ** Look at every VFILE entry with the given vid and set update ** VFILE.CHNGED field on every file according to whether or not ** the file has changes. 0 means no change. 1 means edited. 2 means ** the file has changed due to a merge. 3 means the file was added ** by a merge. ** ** If VFILE.DELETED is true or if VFILE.RID is zero, then the file was ** either removed from managemented via "fossil rm" or added via ** "fossil add", respectively, and in both cases we always know that ** the file has changed without having the check the size, mtime, ** or on-disk content. ** ** If the size of the file has changed, then we always know that the file ** changed without having to look at the mtime or on-disk content. ** ** The mtime of the file is only a factor if the mtime-changes setting ** is false and the useSha1sum flag is false. If the mtime-changes ** setting is true (or undefined - it defaults to true) or if useSha1sum ** is true, then we do not trust the mtime and will examine the on-disk ** content to determine if a file really is the same. ** ** If the mtime is used, it is used only to determine if files are the same. ** If the mtime of a file has changed, we still examine the on-disk content ** to see whether or not the edit was a null-edit. */ void vfile_check_signature(int vid, int notFileIsFatal, int useSha1sum){ int nErr = 0; Stmt q; Blob fileCksum, origCksum; int useMtime = useSha1sum==0 && db_get_boolean("mtime-changes", 1); db_begin_transaction(); db_prepare(&q, "SELECT id, %Q || pathname," " vfile.mrid, deleted, chnged, uuid, size, mtime" " FROM vfile LEFT JOIN blob ON vfile.mrid=blob.rid" " WHERE vid=%d ", g.zLocalRoot, vid); while( db_step(&q)==SQLITE_ROW ){ int id, rid, isDeleted; const char *zName; int chnged = 0; int oldChnged; i64 oldMtime; i64 currentMtime; i64 origSize; i64 currentSize; id = db_column_int(&q, 0); zName = db_column_text(&q, 1); rid = db_column_int(&q, 2); isDeleted = db_column_int(&q, 3); oldChnged = chnged = db_column_int(&q, 4); oldMtime = db_column_int64(&q, 7); currentSize = file_wd_size(zName); origSize = db_column_int64(&q, 6); currentMtime = file_wd_mtime(0); if( chnged==0 && (isDeleted || rid==0) ){ /* "fossil rm" or "fossil add" always change the file */ chnged = 1; }else if( !file_wd_isfile_or_link(0) && currentSize>=0 ){ if( notFileIsFatal ){ fossil_warning("not an ordinary file: %s", zName); nErr++; } chnged = 1; } if( origSize!=currentSize ){ if( chnged==0 ){ /* A file size change is definitive - the file has changed. No ** need to check the mtime or sha1sum */ chnged = 1; } }else if( chnged==1 && rid!=0 && !isDeleted ){ /* File is believed to have changed but it is the same size. ** Double check that it really has changed by looking at content. */ assert( origSize==currentSize ); db_ephemeral_blob(&q, 5, &origCksum); if( sha1sum_file(zName, &fileCksum) ){ blob_zero(&fileCksum); } if( blob_compare(&fileCksum, &origCksum)==0 ) chnged = 0; blob_reset(&origCksum); blob_reset(&fileCksum); }else if( chnged==0 && (useMtime==0 || currentMtime!=oldMtime) ){ /* For files that were formerly believed to be unchanged, if their ** mtime changes, or unconditionally if --sha1sum is used, check ** to see if they have been edited by looking at their SHA1 sum */ assert( origSize==currentSize ); db_ephemeral_blob(&q, 5, &origCksum); if( sha1sum_file(zName, &fileCksum) ){ blob_zero(&fileCksum); } if( blob_compare(&fileCksum, &origCksum) ){ chnged = 1; } blob_reset(&origCksum); blob_reset(&fileCksum); } if( currentMtime!=oldMtime || chnged!=oldChnged ){ db_multi_exec("UPDATE vfile SET mtime=%lld, chnged=%d WHERE id=%d", currentMtime, chnged, id); } } db_finalize(&q); if( nErr ) fossil_fatal("abort due to prior errors"); db_end_transaction(0); } /* ** Write all files from vid to the disk. Or if vid==0 and id!=0 ** write just the specific file where VFILE.ID=id. */ void vfile_to_disk( int vid, /* vid to write to disk */ int id, /* Write this one file, if not zero */ int verbose, /* Output progress information */ int promptFlag /* Prompt user to confirm overwrites */ ){ Stmt q; Blob content; int nRepos = strlen(g.zLocalRoot); if( vid>0 && id==0 ){ db_prepare(&q, "SELECT id, %Q || pathname, mrid, isexe, islink" " FROM vfile" " WHERE vid=%d AND mrid>0", g.zLocalRoot, vid); }else{ assert( vid==0 && id>0 ); db_prepare(&q, "SELECT id, %Q || pathname, mrid, isexe, islink" " FROM vfile" " WHERE id=%d AND mrid>0", g.zLocalRoot, id); } while( db_step(&q)==SQLITE_ROW ){ int id, rid, isExe, isLink; const char *zName; id = db_column_int(&q, 0); zName = db_column_text(&q, 1); rid = db_column_int(&q, 2); isExe = db_column_int(&q, 3); isLink = db_column_int(&q, 4); content_get(rid, &content); if( file_is_the_same(&content, zName) ){ blob_reset(&content); if( file_wd_setexe(zName, isExe) ){ db_multi_exec("UPDATE vfile SET mtime=%lld WHERE id=%d", file_wd_mtime(zName), id); } continue; } if( promptFlag && file_wd_size(zName)>=0 ){ Blob ans; char *zMsg; char cReply; zMsg = mprintf("overwrite %s (a=always/y/N)? ", zName); prompt_user(zMsg, &ans); free(zMsg); cReply = blob_str(&ans)[0]; blob_reset(&ans); if( cReply=='a' || cReply=='A' ){ promptFlag = 0; cReply = 'y'; } if( cReply=='n' || cReply=='N' ){ blob_reset(&content); continue; } } if( verbose ) fossil_print("%s\n", &zName[nRepos]); if( file_wd_isdir(zName) == 1 ){ /*TODO(dchest): remove directories? */ fossil_fatal("%s is directory, cannot overwrite\n", zName); } if( file_wd_size(zName)>=0 && (isLink || file_wd_islink(zName)) ){ file_delete(zName); } if( isLink ){ symlink_create(blob_str(&content), zName); }else{ blob_write_to_file(&content, zName); } file_wd_setexe(zName, isExe); blob_reset(&content); db_multi_exec("UPDATE vfile SET mtime=%lld WHERE id=%d", file_wd_mtime(zName), id); } db_finalize(&q); } /* ** Delete from the disk every file in VFILE vid. */ void vfile_unlink(int vid){ Stmt q; db_prepare(&q, "SELECT %Q || pathname FROM vfile" " WHERE vid=%d AND mrid>0", g.zLocalRoot, vid); while( db_step(&q)==SQLITE_ROW ){ const char *zName; zName = db_column_text(&q, 0); file_delete(zName); } db_finalize(&q); db_multi_exec("UPDATE vfile SET mtime=NULL WHERE vid=%d AND mrid>0", vid); } /* ** Check to see if the directory named in zPath is the top of a checkout. ** In other words, check to see if directory pPath contains a file named ** "_FOSSIL_" or ".fos". Return true or false. */ int vfile_top_of_checkout(const char *zPath){ char *zFile; int fileFound = 0; zFile = mprintf("%s/_FOSSIL_", zPath); fileFound = file_size(zFile)>=1024; fossil_free(zFile); if( !fileFound ){ zFile = mprintf("%s/.fos", zPath); fileFound = file_size(zFile)>=1024; fossil_free(zFile); } return fileFound; } /* ** Load into table SFILE the name of every ordinary file in ** the directory pPath. Omit the first nPrefix characters of ** of pPath when inserting into the SFILE table. ** ** Subdirectories are scanned recursively. ** Omit files named in VFILE. ** ** Files whose names begin with "." are omitted unless allFlag is true. ** ** Any files or directories that match the glob pattern pIgnore are ** excluded from the scan. Name matching occurs after the first ** nPrefix characters are elided from the filename. */ void vfile_scan(Blob *pPath, int nPrefix, int allFlag, Glob *pIgnore){ DIR *d; int origSize; const char *zDir; struct dirent *pEntry; int skipAll = 0; static Stmt ins; static int depth = 0; char *zMbcs; origSize = blob_size(pPath); if( pIgnore ){ blob_appendf(pPath, "/"); if( glob_match(pIgnore, &blob_str(pPath)[nPrefix+1]) ) skipAll = 1; blob_resize(pPath, origSize); } if( skipAll ) return; if( depth==0 ){ db_prepare(&ins, "INSERT OR IGNORE INTO sfile(x) SELECT :file" " WHERE NOT EXISTS(SELECT 1 FROM vfile WHERE pathname=:file)" ); } depth++; zDir = blob_str(pPath); zMbcs = fossil_utf8_to_mbcs(zDir); d = opendir(zMbcs); if( d ){ while( (pEntry=readdir(d))!=0 ){ char *zPath; char *zUtf8; if( pEntry->d_name[0]=='.' ){ if( !allFlag ) continue; if( pEntry->d_name[1]==0 ) continue; if( pEntry->d_name[1]=='.' && pEntry->d_name[2]==0 ) continue; } zUtf8 = fossil_mbcs_to_utf8(pEntry->d_name); blob_appendf(pPath, "/%s", zUtf8); fossil_mbcs_free(zUtf8); zPath = blob_str(pPath); if( glob_match(pIgnore, &zPath[nPrefix+1]) ){ /* do nothing */ }else if( file_wd_isdir(zPath)==1 ){ if( !vfile_top_of_checkout(zPath) ){ vfile_scan(pPath, nPrefix, allFlag, pIgnore); } }else if( file_wd_isfile_or_link(zPath) ){ db_bind_text(&ins, ":file", &zPath[nPrefix+1]); db_step(&ins); db_reset(&ins); } blob_resize(pPath, origSize); } closedir(d); } fossil_mbcs_free(zMbcs); depth--; if( depth==0 ){ db_finalize(&ins); } } /* ** Compute an aggregate MD5 checksum over the disk image of every ** file in vid. The file names are part of the checksum. The resulting ** checksum is the same as is expected on the R-card of a manifest. ** ** This function operates differently if the Global.aCommitFile ** variable is not NULL. In that case, the disk image is used for ** each file in aCommitFile[] and the repository image ** is used for all others). ** ** Newly added files that are not contained in the repository are ** omitted from the checksum if they are not in Global.aCommitFile[]. ** ** Newly deleted files are included in the checksum if they are not ** part of Global.aCommitFile[] ** ** Renamed files use their new name if they are in Global.aCommitFile[] ** and their original name if they are not in Global.aCommitFile[] ** ** Return the resulting checksum in blob pOut. */ void vfile_aggregate_checksum_disk(int vid, Blob *pOut){ FILE *in; Stmt q; char zBuf[4096]; db_must_be_within_tree(); db_prepare(&q, "SELECT %Q || pathname, pathname, origname, file_is_selected(id), rid" " FROM vfile" " WHERE (NOT deleted OR NOT file_is_selected(id)) AND vid=%d" " ORDER BY pathname /*scan*/", g.zLocalRoot, vid ); md5sum_init(); while( db_step(&q)==SQLITE_ROW ){ const char *zFullpath = db_column_text(&q, 0); const char *zName = db_column_text(&q, 1); int isSelected = db_column_int(&q, 3); if( isSelected ){ md5sum_step_text(zName, -1); if( file_wd_islink(zFullpath) ){ /* Instead of file content, use link destination path */ Blob pathBuf; sqlite3_snprintf(sizeof(zBuf), zBuf, " %ld\n", blob_read_link(&pathBuf, zFullpath)); md5sum_step_text(zBuf, -1); md5sum_step_text(blob_str(&pathBuf), -1); blob_reset(&pathBuf); }else{ in = fossil_fopen(zFullpath,"rb"); if( in==0 ){ md5sum_step_text(" 0\n", -1); continue; } fseek(in, 0L, SEEK_END); sqlite3_snprintf(sizeof(zBuf), zBuf, " %ld\n", ftell(in)); fseek(in, 0L, SEEK_SET); md5sum_step_text(zBuf, -1); /*printf("%s %s %s",md5sum_current_state(),zName,zBuf); fflush(stdout);*/ for(;;){ int n; n = fread(zBuf, 1, sizeof(zBuf), in); if( n<=0 ) break; md5sum_step_text(zBuf, n); } fclose(in); } }else{ int rid = db_column_int(&q, 4); const char *zOrigName = db_column_text(&q, 2); char zBuf[100]; Blob file; if( zOrigName ) zName = zOrigName; if( rid>0 ){ md5sum_step_text(zName, -1); blob_zero(&file); content_get(rid, &file); sqlite3_snprintf(sizeof(zBuf), zBuf, " %d\n", blob_size(&file)); md5sum_step_text(zBuf, -1); md5sum_step_blob(&file); blob_reset(&file); } } } db_finalize(&q); md5sum_finish(pOut); } /* ** Do a file-by-file comparison of the content of the repository and ** the working check-out on disk. Report any errors. */ void vfile_compare_repository_to_disk(int vid){ int rc; Stmt q; Blob disk, repo; db_must_be_within_tree(); db_prepare(&q, "SELECT %Q || pathname, pathname, rid FROM vfile" " WHERE NOT deleted AND vid=%d AND file_is_selected(id)", g.zLocalRoot, vid ); md5sum_init(); while( db_step(&q)==SQLITE_ROW ){ const char *zFullpath = db_column_text(&q, 0); const char *zName = db_column_text(&q, 1); int rid = db_column_int(&q, 2); blob_zero(&disk); if( file_wd_islink(zFullpath) ){ rc = blob_read_link(&disk, zFullpath); }else{ rc = blob_read_from_file(&disk, zFullpath); } if( rc<0 ){ fossil_print("ERROR: cannot read file [%s]\n", zFullpath); blob_reset(&disk); continue; } blob_zero(&repo); content_get(rid, &repo); if( blob_size(&repo)!=blob_size(&disk) ){ fossil_print("ERROR: [%s] is %d bytes on disk but %d in the repository\n", zName, blob_size(&disk), blob_size(&repo)); blob_reset(&disk); blob_reset(&repo); continue; } if( blob_compare(&repo, &disk) ){ fossil_print( "ERROR: [%s] is different on disk compared to the repository\n", zName); } blob_reset(&disk); blob_reset(&repo); } db_finalize(&q); } /* ** Compute an aggregate MD5 checksum over the repository image of every ** file in vid. The file names are part of the checksum. The resulting ** checksum is suitable for the R-card of a manifest. ** ** Return the resulting checksum in blob pOut. */ void vfile_aggregate_checksum_repository(int vid, Blob *pOut){ Blob file; Stmt q; char zBuf[100]; db_must_be_within_tree(); db_prepare(&q, "SELECT pathname, origname, rid, file_is_selected(id)" " FROM vfile" " WHERE (NOT deleted OR NOT file_is_selected(id))" " AND rid>0 AND vid=%d" " ORDER BY pathname /*scan*/", vid); blob_zero(&file); md5sum_init(); while( db_step(&q)==SQLITE_ROW ){ const char *zName = db_column_text(&q, 0); const char *zOrigName = db_column_text(&q, 1); int rid = db_column_int(&q, 2); int isSelected = db_column_int(&q, 3); if( zOrigName && !isSelected ) zName = zOrigName; md5sum_step_text(zName, -1); content_get(rid, &file); sqlite3_snprintf(sizeof(zBuf), zBuf, " %d\n", blob_size(&file)); md5sum_step_text(zBuf, -1); /*printf("%s %s %s",md5sum_current_state(),zName,zBuf); fflush(stdout);*/ md5sum_step_blob(&file); blob_reset(&file); } db_finalize(&q); md5sum_finish(pOut); } /* ** Compute an aggregate MD5 checksum over the repository image of every ** file in manifest vid. The file names are part of the checksum. The ** resulting checksum is suitable for use as the R-card of a manifest. ** ** Return the resulting checksum in blob pOut. ** ** If pManOut is not NULL then fill it with the checksum found in the ** "R" card near the end of the manifest. ** ** In a well-formed manifest, the two checksums computed here, pOut and ** pManOut, should be identical. */ void vfile_aggregate_checksum_manifest(int vid, Blob *pOut, Blob *pManOut){ int fid; Blob file; Manifest *pManifest; ManifestFile *pFile; char zBuf[100]; blob_zero(pOut); if( pManOut ){ blob_zero(pManOut); } db_must_be_within_tree(); pManifest = manifest_get(vid, CFTYPE_MANIFEST); if( pManifest==0 ){ fossil_panic("manifest file (%d) is malformed", vid); } manifest_file_rewind(pManifest); while( (pFile = manifest_file_next(pManifest,0))!=0 ){ if( pFile->zUuid==0 ) continue; fid = uuid_to_rid(pFile->zUuid, 0); md5sum_step_text(pFile->zName, -1); content_get(fid, &file); sqlite3_snprintf(sizeof(zBuf), zBuf, " %d\n", blob_size(&file)); md5sum_step_text(zBuf, -1); md5sum_step_blob(&file); blob_reset(&file); } if( pManOut ){ if( pManifest->zRepoCksum ){ blob_append(pManOut, pManifest->zRepoCksum, -1); }else{ blob_zero(pManOut); } } manifest_destroy(pManifest); md5sum_finish(pOut); } /* ** COMMAND: test-agg-cksum */ void test_agg_cksum_cmd(void){ int vid; Blob hash, hash2; db_must_be_within_tree(); vid = db_lget_int("checkout", 0); vfile_aggregate_checksum_disk(vid, &hash); printf("disk: %s\n", blob_str(&hash)); blob_reset(&hash); vfile_aggregate_checksum_repository(vid, &hash); printf("archive: %s\n", blob_str(&hash)); blob_reset(&hash); vfile_aggregate_checksum_manifest(vid, &hash, &hash2); printf("manifest: %s\n", blob_str(&hash)); printf("recorded: %s\n", blob_str(&hash2)); }