/* ** Copyright (c) 2007 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code used to compute a "diff" between two ** text files. */ #include "config.h" #include "diff.h" #include /* ** Maximum length of a line in a text file. (8192) */ #define LENGTH_MASK_SZ 13 #define LENGTH_MASK ((1<LENGTH_MASK ){ return 0; } j = 0; } } if( j>LENGTH_MASK ){ return 0; } a = fossil_malloc( nLine*sizeof(a[0]) ); memset(a, 0, nLine*sizeof(a[0]) ); if( n==0 ){ *pnLine = 0; return a; } /* Fill in the array */ for(i=0; i0 && fossil_isspace(z[k-1]) ){ k--; } for(h=0, x=0; xh==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0; } /* ** Append a single line of "diff" output to pOut. */ static void appendDiffLine(Blob *pOut, char *zPrefix, DLine *pLine){ blob_append(pOut, zPrefix, 1); blob_append(pOut, pLine->z, pLine->h & LENGTH_MASK); blob_append(pOut, "\n", 1); } /* ** Expand the size of aEdit[] array to hold nEdit elements. */ static void expandEdit(DContext *p, int nEdit){ p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int)); p->nEditAlloc = nEdit; } /* ** Append a new COPY/DELETE/INSERT triple. */ static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){ /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */ if( p->nEdit>=3 ){ if( p->aEdit[p->nEdit-1]==0 ){ if( p->aEdit[p->nEdit-2]==0 ){ p->aEdit[p->nEdit-3] += nCopy; p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } if( nCopy==0 ){ p->aEdit[p->nEdit-2] += nDel; p->aEdit[p->nEdit-1] += nIns; return; } } if( nCopy==0 && nDel==0 ){ p->aEdit[p->nEdit-1] += nIns; return; } } if( p->nEdit+3>p->nEditAlloc ){ expandEdit(p, p->nEdit*2 + 15); if( p->aEdit==0 ) return; } p->aEdit[p->nEdit++] = nCopy; p->aEdit[p->nEdit++] = nDel; p->aEdit[p->nEdit++] = nIns; } /* ** Given a diff context in which the aEdit[] array has been filled ** in, compute a context diff into pOut. */ static void contextDiff(DContext *p, Blob *pOut, int nContext){ DLine *A; /* Left side of the diff */ DLine *B; /* Right side of the diff */ int a = 0; /* Index of next line in A[] */ int b = 0; /* Index of next line in B[] */ int *R; /* Array of COPY/DELETE/INSERT triples */ int r; /* Index into R[] */ int nr; /* Number of COPY/DELETE/INSERT triples to process */ int mxr; /* Maximum value for r */ int na, nb; /* Number of lines shown from A and B */ int i, j; /* Loop counters */ int m; /* Number of lines to output */ int skip; /* Number of lines to skip */ A = p->aFrom; B = p->aTo; R = p->aEdit; mxr = p->nEdit; while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; } for(r=0; r0 && R[r+nr*3]nContext ){ na = nb = nContext; skip = R[r] - nContext; }else{ na = nb = R[r]; skip = 0; } for(i=0; inContext ){ na += nContext; nb += nContext; }else{ na += R[r+nr*3]; nb += R[r+nr*3]; } for(i=1; inContext ) m = nContext; for(j=0; jaFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ int mxLength = 0; /* Length of longest common subsequence */ int i, j; /* Loop counters */ int k; /* Length of a candidate subsequence */ int iSXb = iS1; /* Best match so far */ int iSYb = iS2; /* Best match so far */ for(i=iS1; iaFrom[i], &p->aTo[j]) ) continue; if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){ continue; } k = 1; while( i+kaFrom[i+k],&p->aTo[j+k]) ){ k++; } if( k>mxLength ){ iSXb = i; iSYb = j; mxLength = k; } } } *piSX = iSXb; *piEX = iSXb + mxLength; *piSY = iSYb; *piEY = iSYb + mxLength; } /* ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[] ** file and lines iS2 through iE2-1 of the aTo[] file. Locate a sequence ** of lines in these two blocks that are exactly the same. Return ** the bounds of the matching sequence. ** ** If there are two or more possible answers of the same length, the ** returned sequence should be the one closest to the center of the ** input range. ** ** Ideally, the common sequence should be the longest possible common ** sequence. However, an exact computation of LCS is O(N*N) which is ** way too slow for larger files. So this routine uses an O(N) ** heuristic approximation based on hashing that usually works about ** as well. But if the O(N) algorithm doesn't get a good solution ** and N is not too large, we fall back to an exact solution by ** calling optimalLCS(). */ static void longestCommonSequence( DContext *p, /* Two files being compared */ int iS1, int iE1, /* Range of lines in p->aFrom[] */ int iS2, int iE2, /* Range of lines in p->aTo[] */ int *piSX, int *piEX, /* Write p->aFrom[] common segment here */ int *piSY, int *piEY /* Write p->aTo[] common segment here */ ){ double bestScore = -1e30; /* Best score seen so far */ int i, j; /* Loop counters */ int iSX, iSY, iEX, iEY; /* Current match */ double score; /* Current score */ int skew; /* How lopsided is the match */ int dist; /* Distance of match from center */ int mid; /* Center of the span */ int iSXb, iSYb, iEXb, iEYb; /* Best match so far */ int iSXp, iSYp, iEXp, iEYp; /* Previous match */ iSXb = iSXp = iS1; iEXb = iEXp = iS1; iSYb = iSYp = iS2; iEYb = iEYp = iS2; mid = (iE1 + iS1)/2; for(i=iS1; iaTo[p->aFrom[i].h % p->nTo].iHash; while( j>0 && (j-1=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1])) ){ if( limit++ > 10 ){ j = 0; break; } j = p->aTo[j-1].iNext; } if( j==0 ) continue; assert( i>=iSXb && i>=iSXp ); if( i=iSYb && j=iSYp && jiS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){ iSX--; iSY--; } iEX = i+1; iEY = j; while( iEXaFrom[iEX],&p->aTo[iEY]) ){ iEX++; iEY++; } skew = (iSX-iS1) - (iSY-iS2); if( skew<0 ) skew = -skew; dist = (iSX+iEX)/2 - mid; if( dist<0 ) dist = -dist; score = (iEX - iSX) - 0.05*skew - 0.05*dist; if( score>bestScore ){ bestScore = score; iSXb = iSX; iSYb = iSY; iEXb = iEX; iEYb = iEY; }else{ iSXp = iSX; iSYp = iSY; iEXp = iEX; iEYp = iEY; } } if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){ /* If no common sequence is found using the hashing heuristic and ** the input is not too big, use the expensive exact solution */ optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY); }else{ *piSX = iSXb; *piSY = iSYb; *piEX = iEXb; *piEY = iEYb; } /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY); */ } /* ** Do a single step in the difference. Compute a sequence of ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of ** the input into lines iS2 through iE2-1 of the output and write ** that sequence into the difference context. ** ** The algorithm is to find a block of common text near the middle ** of the two segments being diffed. Then recursively compute ** differences on the blocks before and after that common segment. ** Special cases apply if either input segment is empty or if the ** two segments have no text in common. */ static void diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){ int iSX, iEX, iSY, iEY; if( iE1<=iS1 ){ /* The first segment is empty */ if( iE2>iS2 ){ appendTriple(p, 0, 0, iE2-iS2); } return; } if( iE2<=iS2 ){ /* The second segment is empty */ appendTriple(p, 0, iE1-iS1, 0); return; } /* Find the longest matching segment between the two sequences */ longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY); if( iEX>iSX ){ /* A common segement has been found. ** Recursively diff either side of the matching segment */ diff_step(p, iS1, iSX, iS2, iSY); if( iEX>iSX ){ appendTriple(p, iEX - iSX, 0, 0); } diff_step(p, iEX, iE1, iEY, iE2); }else{ /* The two segments have nothing in common. Delete the first then ** insert the second. */ appendTriple(p, 0, iE1-iS1, iE2-iS2); } } /* ** Compute the differences between two files already loaded into ** the DContext structure. ** ** A divide and conquer technique is used. We look for a large ** block of common text that is in the middle of both files. Then ** compute the difference on those parts of the file before and ** after the common block. This technique is fast, but it does ** not necessarily generate the minimum difference set. On the ** other hand, we do not need a minimum difference set, only one ** that makes sense to human readers, which this algorithm does. ** ** Any common text at the beginning and end of the two files is ** removed before starting the divide-and-conquer algorithm. */ static void diff_all(DContext *p){ int mnE, iS, iE1, iE2; /* Carve off the common header and footer */ iE1 = p->nFrom; iE2 = p->nTo; while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){ iE1--; iE2--; } mnE = iE1aFrom[iS],&p->aTo[iS]); iS++){} /* do the difference */ if( iS>0 ){ appendTriple(p, iS, 0, 0); } diff_step(p, iS, iE1, iS, iE2); if( iE1nFrom ){ appendTriple(p, p->nFrom - iE1, 0, 0); } /* Terminate the COPY/DELETE/INSERT triples with three zeros */ expandEdit(p, p->nEdit+3); if( p->aEdit ){ p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; p->aEdit[p->nEdit++] = 0; } } /* ** Generate a report of the differences between files pA and pB. ** If pOut is not NULL then a unified diff is appended there. It ** is assumed that pOut has already been initialized. If pOut is ** NULL, then a pointer to an array of integers is returned. ** The integers come in triples. For each triple, ** the elements are the number of lines copied, the number of ** lines deleted, and the number of lines inserted. The vector ** is terminated by a triple of all zeros. ** ** This diff utility does not work on binary files. If a binary ** file is encountered, 0 is returned and pOut is written with ** text "cannot compute difference between binary files". */ int *text_diff( Blob *pA_Blob, /* FROM file */ Blob *pB_Blob, /* TO file */ Blob *pOut, /* Write unified diff here if not NULL */ int nContext, /* Amount of context to unified diff */ int ignoreEolWs /* Ignore whitespace at the end of lines */ ){ DContext c; /* Prepare the input files */ memset(&c, 0, sizeof(c)); c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob), &c.nFrom, ignoreEolWs); c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob), &c.nTo, ignoreEolWs); if( c.aFrom==0 || c.aTo==0 ){ free(c.aFrom); free(c.aTo); if( pOut ){ blob_appendf(pOut, "cannot compute difference between binary files\n"); } return 0; } /* Compute the difference */ diff_all(&c); if( pOut ){ /* Compute a context diff if requested */ contextDiff(&c, pOut, nContext); free(c.aFrom); free(c.aTo); free(c.aEdit); return 0; }else{ /* If a context diff is not requested, then return the ** array of COPY/DELETE/INSERT triples. */ free(c.aFrom); free(c.aTo); return c.aEdit; } } /* ** COMMAND: test-rawdiff */ void test_rawdiff_cmd(void){ Blob a, b; int r; int i; int *R; if( g.argc<4 ) usage("FILE1 FILE2 ..."); blob_read_from_file(&a, g.argv[2]); for(i=3; i3 ) printf("-------------------------------\n"); blob_read_from_file(&b, g.argv[i]); R = text_diff(&a, &b, 0, 0, 0); for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){ printf(" copy %4d delete %4d insert %4d\n", R[r], R[r+1], R[r+2]); } /* free(R); */ blob_reset(&b); } } /* ** COMMAND: test-udiff */ void test_udiff_cmd(void){ Blob a, b, out; if( g.argc!=4 ) usage("FILE1 FILE2"); blob_read_from_file(&a, g.argv[2]); blob_read_from_file(&b, g.argv[3]); blob_zero(&out); text_diff(&a, &b, &out, 3, 0); blob_write_to_file(&out, "-"); } /************************************************************************** ** The basic difference engine is above. What follows is the annotation ** engine. Both are in the same file since they share many components. */ /* ** The status of an annotation operation is recorded by an instance ** of the following structure. */ typedef struct Annotator Annotator; struct Annotator { DContext c; /* The diff-engine context */ struct { /* Lines of the original files... */ const char *z; /* The text of the line */ int n; /* Number of bytes (omitting trailing space and \n) */ const char *zSrc; /* Tag showing origin of this line */ } *aOrig; int nOrig; /* Number of elements in aOrig[] */ int nNoSrc; /* Number of entries where aOrig[].zSrc==NULL */ }; /* ** Initialize the annotation process by specifying the file that is ** to be annotated. The annotator takes control of the input Blob and ** will release it when it is finished with it. */ static int annotation_start(Annotator *p, Blob *pInput){ int i; memset(p, 0, sizeof(*p)); p->c.aTo = break_into_lines(blob_str(pInput), blob_size(pInput),&p->c.nTo,1); if( p->c.aTo==0 ){ return 1; } p->aOrig = fossil_malloc( sizeof(p->aOrig[0])*p->c.nTo ); for(i=0; ic.nTo; i++){ p->aOrig[i].z = p->c.aTo[i].z; p->aOrig[i].n = p->c.aTo[i].h & LENGTH_MASK; p->aOrig[i].zSrc = 0; } p->nOrig = p->c.nTo; return 0; } /* ** The input pParent is the next most recent ancestor of the file ** being annotated. Do another step of the annotation. Return true ** if additional annotation is required. zPName is the tag to insert ** on each line of the file being annotated that was contributed by ** pParent. Memory to hold zPName is leaked. */ static int annotation_step(Annotator *p, Blob *pParent, char *zPName){ int i, j; int lnTo; /* Prepare the parent file to be diffed */ p->c.aFrom = break_into_lines(blob_str(pParent), blob_size(pParent), &p->c.nFrom, 1); if( p->c.aFrom==0 ){ return 1; } /* Compute the differences going from pParent to the file being ** annotated. */ diff_all(&p->c); /* Where new lines are inserted on this difference, record the ** zPName as the source of the new line. */ for(i=lnTo=0; ic.nEdit; i+=3){ for(j=0; jc.aEdit[i]; j++, lnTo++){ p->aOrig[lnTo].zSrc = zPName; } lnTo += p->c.aEdit[i+2]; } /* Clear out the diff results */ free(p->c.aEdit); p->c.aEdit = 0; p->c.nEdit = 0; p->c.nEditAlloc = 0; /* Clear out the from file */ free(p->c.aFrom); blob_zero(pParent); /* Return no errors */ return 0; } /* ** COMMAND: test-annotate-step */ void test_annotate_step_cmd(void){ Blob orig, b; Annotator x; int i; if( g.argc<4 ) usage("RID1 RID2 ..."); db_must_be_within_tree(); blob_zero(&b); content_get(name_to_rid(g.argv[2]), &orig); if( annotation_start(&x, &orig) ){ fossil_fatal("binary file"); } for(i=3; i%.10s %s %9.9s", g.zTop, zUuid, zUuid, zDate, zUser ); }else{ zLabel = mprintf("%.10s %s %9.9s", zUuid, zDate, zUser); } content_get(pid, &step); annotation_step(p, &step, zLabel); blob_reset(&step); } db_finalize(&q); } /* ** WEBPAGE: annotate ** ** Query parameters: ** ** checkin=ID The manifest ID at which to start the annotation ** filename=FILENAME The filename. */ void annotation_page(void){ int mid; int fnid; int i; Annotator ann; login_check_credentials(); if( !g.okRead ){ login_needed(); return; } mid = name_to_rid(PD("checkin","0")); fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", P("filename")); if( mid==0 || fnid==0 ){ fossil_redirect_home(); } if( !db_exists("SELECT 1 FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid) ){ fossil_redirect_home(); } style_header("File Annotation"); annotate_file(&ann, fnid, mid, g.okHistory); @
  for(i=0; i
  style_footer();
}

/*
** COMMAND: annotate
**
** %fossil annotate FILENAME
**
** Output the text of a file with markings to show when each line of
** the file was last modified.
*/
void annotate_cmd(void){
  int fnid;         /* Filename ID */
  int fid;          /* File instance ID */
  int mid;          /* Manifest where file was checked in */
  Blob treename;    /* FILENAME translated to canonical form */
  char *zFilename;  /* Cannonical filename */
  Annotator ann;    /* The annotation of the file */
  int i;            /* Loop counter */

  db_must_be_within_tree();
  if (g.argc<3) {
    usage("FILENAME");
  }
  file_tree_name(g.argv[2], &treename, 1);
  zFilename = blob_str(&treename);
  fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename);
  if( fnid==0 ){
    fossil_fatal("no such file: %s", zFilename);
  }
  fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename);
  if( fid==0 ){
    fossil_fatal("not part of current checkout: %s", zFilename);
  }
  mid = db_int(0, "SELECT mid FROM mlink WHERE fid=%d AND fnid=%d", fid, fnid);
  if( mid==0 ){
    fossil_panic("unable to find manifest");
  }
  annotate_file(&ann, fnid, mid, 0);
  for(i=0; i