Fossil

Artifact Content
Login

Artifact d9bd197c10a97fb9e7f3f8d88be457738b3cc7b1:


     1  /*
     2  ** Copyright (c) 2007 D. Richard Hipp
     3  **
     4  ** This program is free software; you can redistribute it and/or
     5  ** modify it under the terms of the Simplified BSD License (also
     6  ** known as the "2-Clause License" or "FreeBSD License".)
     7  
     8  ** This program is distributed in the hope that it will be useful,
     9  ** but without any warranty; without even the implied warranty of
    10  ** merchantability or fitness for a particular purpose.
    11  **
    12  ** Author contact information:
    13  **   drh@hwaci.com
    14  **   http://www.hwaci.com/drh/
    15  **
    16  *******************************************************************************
    17  **
    18  ** This file contains code used to compute a "diff" between two
    19  ** text files.
    20  */
    21  #include "config.h"
    22  #include "diff.h"
    23  #include <assert.h>
    24  
    25  
    26  /*
    27  ** Maximum length of a line in a text file.  (8192)
    28  */
    29  #define LENGTH_MASK_SZ  13
    30  #define LENGTH_MASK     ((1<<LENGTH_MASK_SZ)-1)
    31  
    32  /*
    33  ** Information about each line of a file being diffed.
    34  **
    35  ** The lower LENGTH_MASK_SZ bits of the hash (DLine.h) are the length
    36  ** of the line.  If any line is longer than LENGTH_MASK characters,
    37  ** the file is considered binary.
    38  */
    39  typedef struct DLine DLine;
    40  struct DLine {
    41    const char *z;        /* The text of the line */
    42    unsigned int h;       /* Hash of the line */
    43    unsigned int iNext;   /* 1+(Index of next line with same the same hash) */
    44  
    45    /* an array of DLine elements services two purposes.  The fields
    46    ** above are one per line of input text.  But each entry is also
    47    ** a bucket in a hash table, as follows: */
    48    unsigned int iHash;   /* 1+(first entry in the hash chain) */
    49  };
    50  
    51  /*
    52  ** A context for running a diff.
    53  */
    54  typedef struct DContext DContext;
    55  struct DContext {
    56    int *aEdit;        /* Array of copy/delete/insert triples */
    57    int nEdit;         /* Number of integers (3x num of triples) in aEdit[] */
    58    int nEditAlloc;    /* Space allocated for aEdit[] */
    59    DLine *aFrom;      /* File on left side of the diff */
    60    int nFrom;         /* Number of lines in aFrom[] */
    61    DLine *aTo;        /* File on right side of the diff */
    62    int nTo;           /* Number of lines in aTo[] */
    63  };
    64  
    65  /*
    66  ** Return an array of DLine objects containing a pointer to the
    67  ** start of each line and a hash of that line.  The lower 
    68  ** bits of the hash store the length of each line.
    69  **
    70  ** Trailing whitespace is removed from each line.  2010-08-20:  Not any
    71  ** more.  If trailing whitespace is ignored, the "patch" command gets
    72  ** confused by the diff output.  Ticket [a9f7b23c2e376af5b0e5b]
    73  **
    74  ** Return 0 if the file is binary or contains a line that is
    75  ** too long.
    76  */
    77  static DLine *break_into_lines(const char *z, int n, int *pnLine, int ignoreWS){
    78    int nLine, i, j, k, x;
    79    unsigned int h, h2;
    80    DLine *a;
    81  
    82    /* Count the number of lines.  Allocate space to hold
    83    ** the returned array.
    84    */
    85    for(i=j=0, nLine=1; i<n; i++, j++){
    86      int c = z[i];
    87      if( c==0 ){
    88        return 0;
    89      }
    90      if( c=='\n' && z[i+1]!=0 ){
    91        nLine++;
    92        if( j>LENGTH_MASK ){
    93          return 0;
    94        }
    95        j = 0;
    96      }
    97    }
    98    if( j>LENGTH_MASK ){
    99      return 0;
   100    }
   101    a = fossil_malloc( nLine*sizeof(a[0]) );
   102    memset(a, 0, nLine*sizeof(a[0]) );
   103    if( n==0 ){
   104      *pnLine = 0;
   105      return a;
   106    }
   107  
   108    /* Fill in the array */
   109    for(i=0; i<nLine; i++){
   110      a[i].z = z;
   111      for(j=0; z[j] && z[j]!='\n'; j++){}
   112      k = j;
   113      while( ignoreWS && k>0 && fossil_isspace(z[k-1]) ){ k--; }
   114      for(h=0, x=0; x<k; x++){
   115        h = h ^ (h<<2) ^ z[x];
   116      }
   117      a[i].h = h = (h<<LENGTH_MASK_SZ) | k;;
   118      h2 = h % nLine;
   119      a[i].iNext = a[h2].iHash;
   120      a[h2].iHash = i+1;
   121      z += j+1;
   122    }
   123  
   124    /* Return results */
   125    *pnLine = nLine;
   126    return a;
   127  }
   128  
   129  /*
   130  ** Return true if two DLine elements are identical.
   131  */
   132  static int same_dline(DLine *pA, DLine *pB){
   133    return pA->h==pB->h && memcmp(pA->z,pB->z,pA->h & LENGTH_MASK)==0;
   134  }
   135  
   136  /*
   137  ** Append a single line of "diff" output to pOut.
   138  */
   139  static void appendDiffLine(Blob *pOut, char *zPrefix, DLine *pLine){
   140    blob_append(pOut, zPrefix, 1);
   141    blob_append(pOut, pLine->z, pLine->h & LENGTH_MASK);
   142    blob_append(pOut, "\n", 1);
   143  }
   144  
   145  /*
   146  ** Expand the size of aEdit[] array to hold nEdit elements.
   147  */
   148  static void expandEdit(DContext *p, int nEdit){
   149    p->aEdit = fossil_realloc(p->aEdit, nEdit*sizeof(int));
   150    p->nEditAlloc = nEdit;
   151  }
   152  
   153  /*
   154  ** Append a new COPY/DELETE/INSERT triple.
   155  */
   156  static void appendTriple(DContext *p, int nCopy, int nDel, int nIns){
   157    /* printf("APPEND %d/%d/%d\n", nCopy, nDel, nIns); */
   158    if( p->nEdit>=3 ){
   159      if( p->aEdit[p->nEdit-1]==0 ){
   160        if( p->aEdit[p->nEdit-2]==0 ){
   161          p->aEdit[p->nEdit-3] += nCopy;
   162          p->aEdit[p->nEdit-2] += nDel;
   163          p->aEdit[p->nEdit-1] += nIns;
   164          return;
   165        }
   166        if( nCopy==0 ){
   167          p->aEdit[p->nEdit-2] += nDel;
   168          p->aEdit[p->nEdit-1] += nIns;
   169          return;
   170        }
   171      }
   172      if( nCopy==0 && nDel==0 ){
   173        p->aEdit[p->nEdit-1] += nIns;
   174        return;
   175      }
   176    }  
   177    if( p->nEdit+3>p->nEditAlloc ){
   178      expandEdit(p, p->nEdit*2 + 15);
   179      if( p->aEdit==0 ) return;
   180    }
   181    p->aEdit[p->nEdit++] = nCopy;
   182    p->aEdit[p->nEdit++] = nDel;
   183    p->aEdit[p->nEdit++] = nIns;
   184  }
   185  
   186  
   187  /*
   188  ** Given a diff context in which the aEdit[] array has been filled
   189  ** in, compute a context diff into pOut.
   190  */
   191  static void contextDiff(DContext *p, Blob *pOut, int nContext){
   192    DLine *A;     /* Left side of the diff */
   193    DLine *B;     /* Right side of the diff */  
   194    int a = 0;    /* Index of next line in A[] */
   195    int b = 0;    /* Index of next line in B[] */
   196    int *R;       /* Array of COPY/DELETE/INSERT triples */
   197    int r;        /* Index into R[] */
   198    int nr;       /* Number of COPY/DELETE/INSERT triples to process */
   199    int mxr;      /* Maximum value for r */
   200    int na, nb;   /* Number of lines shown from A and B */
   201    int i, j;     /* Loop counters */
   202    int m;        /* Number of lines to output */
   203    int skip;     /* Number of lines to skip */
   204  
   205    A = p->aFrom;
   206    B = p->aTo;
   207    R = p->aEdit;
   208    mxr = p->nEdit;
   209    while( mxr>2 && R[mxr-1]==0 && R[mxr-2]==0 ){ mxr -= 3; }
   210    for(r=0; r<mxr; r += 3*nr){
   211      /* Figure out how many triples to show in a single block */
   212      for(nr=1; R[r+nr*3]>0 && R[r+nr*3]<nContext*2; nr++){}
   213      /* printf("r=%d nr=%d\n", r, nr); */
   214  
   215      /* For the current block comprising nr triples, figure out
   216      ** how many lines of A and B are to be displayed
   217      */
   218      if( R[r]>nContext ){
   219        na = nb = nContext;
   220        skip = R[r] - nContext;
   221      }else{
   222        na = nb = R[r];
   223        skip = 0;
   224      }
   225      for(i=0; i<nr; i++){
   226        na += R[r+i*3+1];
   227        nb += R[r+i*3+2];
   228      }
   229      if( R[r+nr*3]>nContext ){
   230        na += nContext;
   231        nb += nContext;
   232      }else{
   233        na += R[r+nr*3];
   234        nb += R[r+nr*3];
   235      }
   236      for(i=1; i<nr; i++){
   237        na += R[r+i*3];
   238        nb += R[r+i*3];
   239      }
   240      /*
   241       * If the patch changes an empty file or results in an empty file,
   242       * the block header must use 0,0 as position indicator and not 1,0.
   243       * Otherwise, patch would be confused and may reject the diff.
   244       */
   245      blob_appendf(pOut,"@@ -%d,%d +%d,%d @@\n",
   246        na ? a+skip+1 : 0, na,
   247        nb ? b+skip+1 : 0, nb);
   248  
   249      /* Show the initial common area */
   250      a += skip;
   251      b += skip;
   252      m = R[r] - skip;
   253      for(j=0; j<m; j++){
   254        appendDiffLine(pOut, " ", &A[a+j]);
   255      }
   256      a += m;
   257      b += m;
   258  
   259      /* Show the differences */
   260      for(i=0; i<nr; i++){
   261        m = R[r+i*3+1];
   262        for(j=0; j<m; j++){
   263          appendDiffLine(pOut, "-", &A[a+j]);
   264        }
   265        a += m;
   266        m = R[r+i*3+2];
   267        for(j=0; j<m; j++){
   268          appendDiffLine(pOut, "+", &B[b+j]);
   269        }
   270        b += m;
   271        if( i<nr-1 ){
   272          m = R[r+i*3+3];
   273          for(j=0; j<m; j++){
   274            appendDiffLine(pOut, " ", &B[b+j]);
   275          }
   276          b += m;
   277          a += m;
   278        }
   279      }
   280  
   281      /* Show the final common area */
   282      assert( nr==i );
   283      m = R[r+nr*3];
   284      if( m>nContext ) m = nContext;
   285      for(j=0; j<m; j++){
   286        appendDiffLine(pOut, " ", &B[b+j]);
   287      }
   288    }
   289  }
   290  
   291  /*
   292  ** Compute the optimal longest common subsequence (LCS) using an
   293  ** exhaustive search.  This version of the LCS is only used for
   294  ** shorter input strings since runtime is O(N*N) where N is the
   295  ** input string length.
   296  */
   297  static void optimalLCS(
   298    DContext *p,               /* Two files being compared */
   299    int iS1, int iE1,          /* Range of lines in p->aFrom[] */
   300    int iS2, int iE2,          /* Range of lines in p->aTo[] */
   301    int *piSX, int *piEX,      /* Write p->aFrom[] common segment here */
   302    int *piSY, int *piEY       /* Write p->aTo[] common segment here */
   303  ){
   304    int mxLength = 0;          /* Length of longest common subsequence */
   305    int i, j;                  /* Loop counters */
   306    int k;                     /* Length of a candidate subsequence */
   307    int iSXb = iS1;            /* Best match so far */
   308    int iSYb = iS2;            /* Best match so far */
   309  
   310    for(i=iS1; i<iE1-mxLength; i++){
   311      for(j=iS2; j<iE2-mxLength; j++){
   312        if( !same_dline(&p->aFrom[i], &p->aTo[j]) ) continue;
   313        if( mxLength && !same_dline(&p->aFrom[i+mxLength], &p->aTo[j+mxLength]) ){
   314          continue;
   315        }
   316        k = 1;
   317        while( i+k<iE1 && j+k<iE2 && same_dline(&p->aFrom[i+k],&p->aTo[j+k]) ){
   318          k++;
   319        }
   320        if( k>mxLength ){
   321          iSXb = i;
   322          iSYb = j;
   323          mxLength = k;
   324        }
   325      }
   326    }
   327    *piSX = iSXb;
   328    *piEX = iSXb + mxLength;
   329    *piSY = iSYb;
   330    *piEY = iSYb + mxLength;
   331  }
   332  
   333  /*
   334  ** Compare two blocks of text on lines iS1 through iE1-1 of the aFrom[]
   335  ** file and lines iS2 through iE2-1 of the aTo[] file.  Locate a sequence
   336  ** of lines in these two blocks that are exactly the same.  Return
   337  ** the bounds of the matching sequence.
   338  **
   339  ** If there are two or more possible answers of the same length, the
   340  ** returned sequence should be the one closest to the center of the
   341  ** input range.
   342  **
   343  ** Ideally, the common sequence should be the longest possible common
   344  ** sequence.  However, an exact computation of LCS is O(N*N) which is
   345  ** way too slow for larger files.  So this routine uses an O(N)
   346  ** heuristic approximation based on hashing that usually works about 
   347  ** as well.  But if the O(N) algorithm doesn't get a good solution
   348  ** and N is not too large, we fall back to an exact solution by
   349  ** calling optimalLCS().
   350  */
   351  static void longestCommonSequence(
   352    DContext *p,               /* Two files being compared */
   353    int iS1, int iE1,          /* Range of lines in p->aFrom[] */
   354    int iS2, int iE2,          /* Range of lines in p->aTo[] */
   355    int *piSX, int *piEX,      /* Write p->aFrom[] common segment here */
   356    int *piSY, int *piEY       /* Write p->aTo[] common segment here */
   357  ){
   358    double bestScore = -1e30;  /* Best score seen so far */
   359    int i, j;                  /* Loop counters */
   360    int iSX, iSY, iEX, iEY;    /* Current match */
   361    double score;              /* Current score */
   362    int skew;                  /* How lopsided is the match */
   363    int dist;                  /* Distance of match from center */
   364    int mid;                   /* Center of the span */
   365    int iSXb, iSYb, iEXb, iEYb;   /* Best match so far */
   366    int iSXp, iSYp, iEXp, iEYp;   /* Previous match */
   367  
   368  
   369    iSXb = iSXp = iS1;
   370    iEXb = iEXp = iS1;
   371    iSYb = iSYp = iS2;
   372    iEYb = iEYp = iS2;
   373    mid = (iE1 + iS1)/2;
   374    for(i=iS1; i<iE1; i++){
   375      int limit = 0;
   376      j = p->aTo[p->aFrom[i].h % p->nTo].iHash;
   377      while( j>0 
   378        && (j-1<iS2 || j>=iE2 || !same_dline(&p->aFrom[i], &p->aTo[j-1]))
   379      ){
   380        if( limit++ > 10 ){
   381          j = 0;
   382          break;
   383        }
   384        j = p->aTo[j-1].iNext;
   385      }
   386      if( j==0 ) continue;
   387      assert( i>=iSXb && i>=iSXp );
   388      if( i<iEXb && j>=iSYb && j<iEYb ) continue;
   389      if( i<iEXp && j>=iSYp && j<iEYp ) continue;
   390      iSX = i;
   391      iSY = j-1;
   392      while( iSX>iS1 && iSY>iS2 && same_dline(&p->aFrom[iSX-1],&p->aTo[iSY-1]) ){
   393        iSX--;
   394        iSY--;
   395      }
   396      iEX = i+1;
   397      iEY = j;
   398      while( iEX<iE1 && iEY<iE2 && same_dline(&p->aFrom[iEX],&p->aTo[iEY]) ){
   399        iEX++;
   400        iEY++;
   401      }
   402      skew = (iSX-iS1) - (iSY-iS2);
   403      if( skew<0 ) skew = -skew;
   404      dist = (iSX+iEX)/2 - mid;
   405      if( dist<0 ) dist = -dist;
   406      score = (iEX - iSX) - 0.05*skew - 0.05*dist;
   407      if( score>bestScore ){
   408        bestScore = score;
   409        iSXb = iSX;
   410        iSYb = iSY;
   411        iEXb = iEX;
   412        iEYb = iEY;
   413      }else{
   414        iSXp = iSX;
   415        iSYp = iSY;
   416        iEXp = iEX;
   417        iEYp = iEY;
   418      }
   419    }
   420    if( iSXb==iEXb && (iE1-iS1)*(iE2-iS2)<400 ){
   421      /* If no common sequence is found using the hashing heuristic and
   422      ** the input is not too big, use the expensive exact solution */
   423      optimalLCS(p, iS1, iE1, iS2, iE2, piSX, piEX, piSY, piEY);
   424    }else{
   425      *piSX = iSXb;
   426      *piSY = iSYb;
   427      *piEX = iEXb;
   428      *piEY = iEYb;
   429    }
   430    /* printf("LCS(%d..%d/%d..%d) = %d..%d/%d..%d\n", 
   431       iS1, iE1, iS2, iE2, *piSX, *piEX, *piSY, *piEY);  */
   432  }
   433  
   434  /*
   435  ** Do a single step in the difference.  Compute a sequence of
   436  ** copy/delete/insert steps that will convert lines iS1 through iE1-1 of
   437  ** the input into lines iS2 through iE2-1 of the output and write
   438  ** that sequence into the difference context.
   439  **
   440  ** The algorithm is to find a block of common text near the middle
   441  ** of the two segments being diffed.  Then recursively compute
   442  ** differences on the blocks before and after that common segment.
   443  ** Special cases apply if either input segment is empty or if the
   444  ** two segments have no text in common.
   445  */
   446  static void diff_step(DContext *p, int iS1, int iE1, int iS2, int iE2){
   447    int iSX, iEX, iSY, iEY;
   448  
   449    if( iE1<=iS1 ){
   450      /* The first segment is empty */
   451      if( iE2>iS2 ){
   452        appendTriple(p, 0, 0, iE2-iS2);
   453      }
   454      return;
   455    }
   456    if( iE2<=iS2 ){
   457      /* The second segment is empty */
   458      appendTriple(p, 0, iE1-iS1, 0);
   459      return;
   460    }
   461  
   462    /* Find the longest matching segment between the two sequences */
   463    longestCommonSequence(p, iS1, iE1, iS2, iE2, &iSX, &iEX, &iSY, &iEY);
   464  
   465    if( iEX>iSX ){
   466      /* A common segement has been found.
   467      ** Recursively diff either side of the matching segment */
   468      diff_step(p, iS1, iSX, iS2, iSY);
   469      if( iEX>iSX ){
   470        appendTriple(p, iEX - iSX, 0, 0);
   471      }
   472      diff_step(p, iEX, iE1, iEY, iE2);
   473    }else{
   474      /* The two segments have nothing in common.  Delete the first then
   475      ** insert the second. */
   476      appendTriple(p, 0, iE1-iS1, iE2-iS2);
   477    }
   478  }
   479  
   480  /*
   481  ** Compute the differences between two files already loaded into
   482  ** the DContext structure.
   483  **
   484  ** A divide and conquer technique is used.  We look for a large
   485  ** block of common text that is in the middle of both files.  Then
   486  ** compute the difference on those parts of the file before and
   487  ** after the common block.  This technique is fast, but it does
   488  ** not necessarily generate the minimum difference set.  On the
   489  ** other hand, we do not need a minimum difference set, only one
   490  ** that makes sense to human readers, which this algorithm does.
   491  **
   492  ** Any common text at the beginning and end of the two files is
   493  ** removed before starting the divide-and-conquer algorithm.
   494  */
   495  static void diff_all(DContext *p){
   496    int mnE, iS, iE1, iE2;
   497  
   498    /* Carve off the common header and footer */
   499    iE1 = p->nFrom;
   500    iE2 = p->nTo;
   501    while( iE1>0 && iE2>0 && same_dline(&p->aFrom[iE1-1], &p->aTo[iE2-1]) ){
   502      iE1--;
   503      iE2--;
   504    }
   505    mnE = iE1<iE2 ? iE1 : iE2;
   506    for(iS=0; iS<mnE && same_dline(&p->aFrom[iS],&p->aTo[iS]); iS++){}
   507  
   508    /* do the difference */
   509    if( iS>0 ){
   510      appendTriple(p, iS, 0, 0);
   511    }
   512    diff_step(p, iS, iE1, iS, iE2);
   513    if( iE1<p->nFrom ){
   514      appendTriple(p, p->nFrom - iE1, 0, 0);
   515    }
   516  
   517    /* Terminate the COPY/DELETE/INSERT triples with three zeros */
   518    expandEdit(p, p->nEdit+3);
   519    if( p->aEdit ){
   520      p->aEdit[p->nEdit++] = 0;
   521      p->aEdit[p->nEdit++] = 0;
   522      p->aEdit[p->nEdit++] = 0;
   523    }
   524  }
   525  
   526  /*
   527  ** Generate a report of the differences between files pA and pB.
   528  ** If pOut is not NULL then a unified diff is appended there.  It
   529  ** is assumed that pOut has already been initialized.  If pOut is
   530  ** NULL, then a pointer to an array of integers is returned.  
   531  ** The integers come in triples.  For each triple,
   532  ** the elements are the number of lines copied, the number of
   533  ** lines deleted, and the number of lines inserted.  The vector
   534  ** is terminated by a triple of all zeros.
   535  **
   536  ** This diff utility does not work on binary files.  If a binary
   537  ** file is encountered, 0 is returned and pOut is written with
   538  ** text "cannot compute difference between binary files".
   539  */
   540  int *text_diff(
   541    Blob *pA_Blob,   /* FROM file */
   542    Blob *pB_Blob,   /* TO file */
   543    Blob *pOut,      /* Write unified diff here if not NULL */
   544    int nContext,    /* Amount of context to unified diff */
   545    int ignoreEolWs  /* Ignore whitespace at the end of lines */
   546  ){
   547    DContext c;
   548   
   549    /* Prepare the input files */
   550    memset(&c, 0, sizeof(c));
   551    c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
   552                               &c.nFrom, ignoreEolWs);
   553    c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
   554                             &c.nTo, ignoreEolWs);
   555    if( c.aFrom==0 || c.aTo==0 ){
   556      free(c.aFrom);
   557      free(c.aTo);
   558      if( pOut ){
   559        blob_appendf(pOut, "cannot compute difference between binary files\n");
   560      }
   561      return 0;
   562    }
   563  
   564    /* Compute the difference */
   565    diff_all(&c);
   566  
   567    if( pOut ){
   568      /* Compute a context diff if requested */
   569      contextDiff(&c, pOut, nContext);
   570      free(c.aFrom);
   571      free(c.aTo);
   572      free(c.aEdit);
   573      return 0;
   574    }else{
   575      /* If a context diff is not requested, then return the
   576      ** array of COPY/DELETE/INSERT triples.
   577      */
   578      free(c.aFrom);
   579      free(c.aTo);
   580      return c.aEdit;
   581    }
   582  }
   583  
   584  /*
   585  ** COMMAND: test-rawdiff
   586  */
   587  void test_rawdiff_cmd(void){
   588    Blob a, b;
   589    int r;
   590    int i;
   591    int *R;
   592    if( g.argc<4 ) usage("FILE1 FILE2 ...");
   593    blob_read_from_file(&a, g.argv[2]);
   594    for(i=3; i<g.argc; i++){
   595      if( i>3 ) printf("-------------------------------\n");
   596      blob_read_from_file(&b, g.argv[i]);
   597      R = text_diff(&a, &b, 0, 0, 0);
   598      for(r=0; R[r] || R[r+1] || R[r+2]; r += 3){
   599        printf(" copy %4d  delete %4d  insert %4d\n", R[r], R[r+1], R[r+2]);
   600      }
   601      /* free(R); */
   602      blob_reset(&b);
   603    }
   604  }
   605  
   606  /*
   607  ** COMMAND: test-udiff
   608  */
   609  void test_udiff_cmd(void){
   610    Blob a, b, out;
   611    if( g.argc!=4 ) usage("FILE1 FILE2");
   612    blob_read_from_file(&a, g.argv[2]);
   613    blob_read_from_file(&b, g.argv[3]);
   614    blob_zero(&out);
   615    text_diff(&a, &b, &out, 3, 0);
   616    blob_write_to_file(&out, "-");
   617  }
   618  
   619  /**************************************************************************
   620  ** The basic difference engine is above.  What follows is the annotation
   621  ** engine.  Both are in the same file since they share many components.
   622  */
   623  
   624  /*
   625  ** The status of an annotation operation is recorded by an instance
   626  ** of the following structure.
   627  */
   628  typedef struct Annotator Annotator;
   629  struct Annotator {
   630    DContext c;       /* The diff-engine context */
   631    struct {          /* Lines of the original files... */
   632      const char *z;       /* The text of the line */
   633      int n;               /* Number of bytes (omitting trailing space and \n) */
   634      const char *zSrc;    /* Tag showing origin of this line */
   635    } *aOrig;
   636    int nOrig;        /* Number of elements in aOrig[] */
   637    int nNoSrc;       /* Number of entries where aOrig[].zSrc==NULL */
   638  };
   639  
   640  /*
   641  ** Initialize the annotation process by specifying the file that is
   642  ** to be annotated.  The annotator takes control of the input Blob and
   643  ** will release it when it is finished with it.
   644  */
   645  static int annotation_start(Annotator *p, Blob *pInput){
   646    int i;
   647  
   648    memset(p, 0, sizeof(*p));
   649    p->c.aTo = break_into_lines(blob_str(pInput), blob_size(pInput),&p->c.nTo,1);
   650    if( p->c.aTo==0 ){
   651      return 1;
   652    }
   653    p->aOrig = fossil_malloc( sizeof(p->aOrig[0])*p->c.nTo );
   654    for(i=0; i<p->c.nTo; i++){
   655      p->aOrig[i].z = p->c.aTo[i].z;
   656      p->aOrig[i].n = p->c.aTo[i].h & LENGTH_MASK;
   657      p->aOrig[i].zSrc = 0;
   658    }
   659    p->nOrig = p->c.nTo;
   660    return 0;
   661  }
   662  
   663  /*
   664  ** The input pParent is the next most recent ancestor of the file
   665  ** being annotated.  Do another step of the annotation.  Return true
   666  ** if additional annotation is required.  zPName is the tag to insert
   667  ** on each line of the file being annotated that was contributed by
   668  ** pParent.  Memory to hold zPName is leaked.
   669  */
   670  static int annotation_step(Annotator *p, Blob *pParent, char *zPName){
   671    int i, j;
   672    int lnTo;
   673  
   674    /* Prepare the parent file to be diffed */
   675    p->c.aFrom = break_into_lines(blob_str(pParent), blob_size(pParent),
   676                                  &p->c.nFrom, 1);
   677    if( p->c.aFrom==0 ){
   678      return 1;
   679    }
   680  
   681    /* Compute the differences going from pParent to the file being
   682    ** annotated. */
   683    diff_all(&p->c);
   684  
   685    /* Where new lines are inserted on this difference, record the
   686    ** zPName as the source of the new line.
   687    */
688 for(i=lnTo=0; i<p->c.nEdit; i+=3){
689 for(j=0; j<p->c.aEdit[i]; j++, lnTo++){ 690 p->aOrig[lnTo].zSrc = zPName; 691 } 692 lnTo += p->c.aEdit[i+2]; 693 } 694 695 /* Clear out the diff results */ 696 free(p->c.aEdit); 697 p->c.aEdit = 0; 698 p->c.nEdit = 0; 699 p->c.nEditAlloc = 0; 700 701 /* Clear out the from file */ 702 free(p->c.aFrom); 703 blob_zero(pParent); 704 705 /* Return no errors */ 706 return 0; 707 } 708 709 710 /* 711 ** COMMAND: test-annotate-step 712 */ 713 void test_annotate_step_cmd(void){ 714 Blob orig, b; 715 Annotator x; 716 int i; 717 718 if( g.argc<4 ) usage("RID1 RID2 ..."); 719 db_must_be_within_tree(); 720 blob_zero(&b); 721 content_get(name_to_rid(g.argv[2]), &orig); 722 if( annotation_start(&x, &orig) ){ 723 fossil_fatal("binary file"); 724 } 725 for(i=3; i<g.argc; i++){ 726 blob_zero(&b); 727 content_get(name_to_rid(g.argv[i]), &b); 728 if( annotation_step(&x, &b, g.argv[i-1]) ){ 729 fossil_fatal("binary file"); 730 } 731 } 732 for(i=0; i<x.nOrig; i++){ 733 const char *zSrc = x.aOrig[i].zSrc; 734 if( zSrc==0 ) zSrc = g.argv[g.argc-1]; 735 printf("%10s: %.*s\n", zSrc, x.aOrig[i].n, x.aOrig[i].z); 736 } 737 } 738 739 /* 740 ** Compute a complete annotation on a file. The file is identified 741 ** by its filename number (filename.fnid) and the baseline in which 742 ** it was checked in (mlink.mid). 743 */ 744 static void annotate_file(Annotator *p, int fnid, int mid, int webLabel){ 745 Blob toAnnotate; /* Text of the final version of the file */ 746 Blob step; /* Text of previous revision */ 747 int rid; /* Artifact ID of the file being annotated */ 748 char *zLabel; /* Label to apply to a line */ 749 Stmt q; /* Query returning all ancestor versions */ 750 751 /* Initialize the annotation */ 752 rid = db_int(0, "SELECT fid FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid); 753 if( rid==0 ){ 754 fossil_panic("file #%d is unchanged in manifest #%d", fnid, mid); 755 } 756 if( !content_get(rid, &toAnnotate) ){ 757 fossil_panic("unable to retrieve content of artifact #%d", rid); 758 } 759 db_multi_exec("CREATE TEMP TABLE ok(rid INTEGER PRIMARY KEY)"); 760 compute_ancestors(mid, 1000000000); 761 annotation_start(p, &toAnnotate); 762 763 db_prepare(&q, 764 "SELECT mlink.fid, blob.uuid, date(event.mtime), " 765 " coalesce(event.euser,event.user) " 766 " FROM mlink, blob, event" 767 " WHERE mlink.fnid=%d" 768 " AND mlink.mid IN ok" 769 " AND blob.rid=mlink.mid" 770 " AND event.objid=mlink.mid" 771 " ORDER BY event.mtime DESC", 772 fnid 773 ); 774 while( db_step(&q)==SQLITE_ROW ){ 775 int pid = db_column_int(&q, 0); 776 const char *zUuid = db_column_text(&q, 1); 777 const char *zDate = db_column_text(&q, 2); 778 const char *zUser = db_column_text(&q, 3); 779 if( webLabel ){ 780 zLabel = mprintf( 781 "<a href='%s/info/%s' target='infowindow'>%.10s</a> %s %9.9s", 782 g.zTop, zUuid, zUuid, zDate, zUser 783 ); 784 }else{ 785 zLabel = mprintf("%.10s %s %9.9s", zUuid, zDate, zUser); 786 } 787 content_get(pid, &step); 788 annotation_step(p, &step, zLabel); 789 blob_reset(&step); 790 } 791 db_finalize(&q); 792 } 793 794 /* 795 ** WEBPAGE: annotate 796 ** 797 ** Query parameters: 798 ** 799 ** checkin=ID The manifest ID at which to start the annotation 800 ** filename=FILENAME The filename. 801 */ 802 void annotation_page(void){ 803 int mid; 804 int fnid; 805 int i; 806 Annotator ann; 807 808 login_check_credentials(); 809 if( !g.okRead ){ login_needed(); return; } 810 mid = name_to_rid(PD("checkin","0")); 811 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", P("filename")); 812 if( mid==0 || fnid==0 ){ fossil_redirect_home(); } 813 if( !db_exists("SELECT 1 FROM mlink WHERE mid=%d AND fnid=%d",mid,fnid) ){ 814 fossil_redirect_home(); 815 } 816 style_header("File Annotation"); 817 annotate_file(&ann, fnid, mid, g.okHistory); 818 @ <pre> 819 for(i=0; i<ann.nOrig; i++){ 820 ((char*)ann.aOrig[i].z)[ann.aOrig[i].n] = 0; 821 @ %s(ann.aOrig[i].zSrc): %h(ann.aOrig[i].z) 822 } 823 @ </pre> 824 style_footer(); 825 } 826 827 /* 828 ** COMMAND: annotate 829 ** 830 ** %fossil annotate FILENAME 831 ** 832 ** Output the text of a file with markings to show when each line of 833 ** the file was last modified. 834 */ 835 void annotate_cmd(void){ 836 int fnid; /* Filename ID */ 837 int fid; /* File instance ID */ 838 int mid; /* Manifest where file was checked in */ 839 Blob treename; /* FILENAME translated to canonical form */ 840 char *zFilename; /* Cannonical filename */ 841 Annotator ann; /* The annotation of the file */ 842 int i; /* Loop counter */ 843 844 db_must_be_within_tree(); 845 if (g.argc<3) { 846 usage("FILENAME"); 847 } 848 file_tree_name(g.argv[2], &treename, 1); 849 zFilename = blob_str(&treename); 850 fnid = db_int(0, "SELECT fnid FROM filename WHERE name=%Q", zFilename); 851 if( fnid==0 ){ 852 fossil_fatal("no such file: %s", zFilename); 853 } 854 fid = db_int(0, "SELECT rid FROM vfile WHERE pathname=%Q", zFilename); 855 if( fid==0 ){ 856 fossil_fatal("not part of current checkout: %s", zFilename); 857 } 858 mid = db_int(0, "SELECT mid FROM mlink WHERE fid=%d AND fnid=%d", fid, fnid); 859 if( mid==0 ){ 860 fossil_panic("unable to find manifest"); 861 } 862 annotate_file(&ann, fnid, mid, 0); 863 for(i=0; i<ann.nOrig; i++){ 864 printf("%s: %.*s\n", ann.aOrig[i].zSrc, ann.aOrig[i].n, ann.aOrig[i].z); 865 } 866 }