Fossil

Artifact Content
Login

Artifact e5d6a82db484f0e004a51278744ed0092f503923:


     1  /*
     2  ** Copyright (c) 2009 D. Richard Hipp
     3  **
     4  ** This program is free software; you can redistribute it and/or
     5  ** modify it under the terms of the Simplified BSD License (also
     6  ** known as the "2-Clause License" or "FreeBSD License".)
     7  
     8  ** This program is distributed in the hope that it will be useful,
     9  ** but without any warranty; without even the implied warranty of
    10  ** merchantability or fitness for a particular purpose.
    11  **
    12  ** Author contact information:
    13  **   drh@hwaci.com
    14  **   http://www.hwaci.com/drh/
    15  **
    16  *******************************************************************************
    17  **
    18  ** This file contains code to implement a search functions
    19  ** against timeline comments, check-in content, wiki pages, and/or tickets.
    20  **
    21  ** The search can be either a per-query "grep"-like search that scans
    22  ** the entire corpus.  Or it can use the FTS4 or FTS5 search engine of
    23  ** SQLite.  The choice is a administrator configuration option.
    24  **
    25  ** The first option is referred to as "full-scan search".  The second
    26  ** option is called "indexed search".
    27  **
    28  ** The code in this file is ordered approximately as follows:
    29  **
    30  **    (1) The full-scan search engine
    31  **    (2) The indexed search engine
    32  **    (3) Higher level interfaces that use either (1) or (b2) according
    33  **        to the current search configuration settings
    34  */
    35  #include "config.h"
    36  #include "search.h"
    37  #include <assert.h>
    38  
    39  #if INTERFACE
    40  
    41  /* Maximum number of search terms for full-scan search */
    42  #define SEARCH_MAX_TERM   8
    43  
    44  /*
    45  ** A compiled search pattern used for full-scan search.
    46  */
    47  struct Search {
    48    int nTerm;            /* Number of search terms */
    49    struct srchTerm {     /* For each search term */
    50      char *z;               /* Text */
    51      int n;                 /* length */
    52    } a[SEARCH_MAX_TERM];
    53    /* Snippet controls */
    54    char *zPattern;       /* The search pattern */
    55    char *zMarkBegin;     /* Start of a match */
    56    char *zMarkEnd;       /* End of a match */
    57    char *zMarkGap;       /* A gap between two matches */
    58    unsigned fSrchFlg;    /* Flags */
    59    int iScore;           /* Score of the last match attempt */
    60    Blob snip;            /* Snippet for the most recent match */
    61  };
    62  
    63  #define SRCHFLG_HTML    0x01   /* Escape snippet text for HTML */
    64  #define SRCHFLG_STATIC  0x04   /* The static gSearch object */
    65  
    66  #endif
    67  
    68  /*
    69  ** There is a single global Search object:
    70  */
    71  static Search gSearch;
    72  
    73  
    74  /*
    75  ** Theses characters constitute a word boundary
    76  */
    77  static const char isBoundary[] = {
    78    1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
    79    1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
    80    1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
    81    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 1, 1, 1, 1, 1, 1,
    82    1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    83    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 0,
    84    1, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    85    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 1, 1, 1, 1, 1,
    86    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    87    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    88    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    89    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    90    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    91    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    92    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    93    0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
    94  };
    95  #define ISALNUM(x)  (!isBoundary[(x)&0xff])
    96  
    97  
    98  /*
    99  ** Destroy a full-scan search context.
   100  */
   101  void search_end(Search *p){
   102    if( p ){
   103      fossil_free(p->zPattern);
   104      fossil_free(p->zMarkBegin);
   105      fossil_free(p->zMarkEnd);
   106      fossil_free(p->zMarkGap);
   107      if( p->iScore ) blob_reset(&p->snip);
   108      memset(p, 0, sizeof(*p));
   109      if( p!=&gSearch ) fossil_free(p);
   110    }
   111  }
   112  
   113  /*
   114  ** Compile a full-scan search pattern
   115  */
   116  static Search *search_init(
   117    const char *zPattern,       /* The search pattern */
   118    const char *zMarkBegin,     /* Start of a match */
   119    const char *zMarkEnd,       /* End of a match */
   120    const char *zMarkGap,       /* A gap between two matches */
   121    unsigned fSrchFlg           /* Flags */
   122  ){
   123    Search *p;
   124    char *z;
   125    int i;
   126  
   127    if( fSrchFlg & SRCHFLG_STATIC ){
   128      p = &gSearch;
   129      search_end(p);
   130    }else{
   131      p = fossil_malloc(sizeof(*p));
   132      memset(p, 0, sizeof(*p));
   133    }
   134    p->zPattern = z = mprintf("%s", zPattern);
   135    p->zMarkBegin = mprintf("%s", zMarkBegin);
   136    p->zMarkEnd = mprintf("%s", zMarkEnd);
   137    p->zMarkGap = mprintf("%s", zMarkGap);
   138    p->fSrchFlg = fSrchFlg;
   139    blob_init(&p->snip, 0, 0);
   140    while( *z && p->nTerm<SEARCH_MAX_TERM ){
   141      while( *z && !ISALNUM(*z) ){ z++; }
   142      if( *z==0 ) break;
   143      p->a[p->nTerm].z = z;
   144      for(i=1; ISALNUM(z[i]); i++){}
   145      p->a[p->nTerm].n = i;
   146      z += i;
   147      p->nTerm++;
   148    }
   149    return p;
   150  }
   151  
   152  
   153  /*
   154  ** Append n bytes of text to snippet zTxt.  Encode the text appropriately.
   155  */
   156  static void snippet_text_append(
   157    Search *p,             /* The search context */
   158    Blob *pSnip,           /* Append to this snippet */
   159    const char *zTxt,      /* Text to append */
   160    int n                  /* How many bytes to append */
   161  ){
   162    if( n>0 ){
   163      if( p->fSrchFlg & SRCHFLG_HTML ){
   164        blob_appendf(pSnip, "%#h", n, zTxt);
   165      }else{
   166        blob_append(pSnip, zTxt, n);
   167      }
   168    }
   169  }
   170  
   171  /* This the core search engine for full-scan search.
   172  **
   173  ** Compare a search pattern against one or more input strings which
   174  ** collectively comprise a document.  Return a match score.  Any
   175  ** postive value means there was a match.  Zero means that one or
   176  ** more terms are missing.
   177  **
   178  ** The score and a snippet are record for future use.
   179  **
   180  ** Scoring:
   181  **   *  All terms must match at least once or the score is zero
   182  **   *  One point for each matching term
   183  **   *  Extra points if consecutive words of the pattern are consecutive
   184  **      in the document
   185  */
   186  static int search_match(
   187    Search *p,              /* Search pattern and flags */
   188    int nDoc,               /* Number of strings in this document */
   189    const char **azDoc      /* Text of each string */
   190  ){
   191    int score;                         /* Final score */
   192    int i;                             /* Offset into current document */
   193    int ii;                            /* Loop counter */
   194    int j;                             /* Loop over search terms */
   195    int k;                             /* Loop over prior terms */
   196    int iWord = 0;                     /* Current word number */
   197    int iDoc;                          /* Current document number */
   198    int wantGap = 0;                   /* True if a zMarkGap is wanted */
   199    const char *zDoc;                  /* Current document text */
   200    const int CTX = 50;                /* Amount of snippet context */
   201    int anMatch[SEARCH_MAX_TERM];      /* Number of terms in best match */
   202    int aiBestDoc[SEARCH_MAX_TERM];    /* Document containing best match */
   203    int aiBestOfst[SEARCH_MAX_TERM];   /* Byte offset to start of best match */
   204    int aiLastDoc[SEARCH_MAX_TERM];    /* Document containing most recent match */
   205    int aiLastOfst[SEARCH_MAX_TERM];   /* Byte offset to the most recent match */
   206    int aiWordIdx[SEARCH_MAX_TERM];    /* Word index of most recent match */
   207  
   208    memset(anMatch, 0, sizeof(anMatch));
   209    memset(aiWordIdx, 0xff, sizeof(aiWordIdx));
   210    for(iDoc=0; iDoc<nDoc; iDoc++){
   211      zDoc = azDoc[iDoc];
   212      if( zDoc==0 ) continue;
   213      iWord++;
   214      for(i=0; zDoc[i]; i++){
   215        if( !ISALNUM(zDoc[i]) ) continue;
   216        iWord++;
   217        for(j=0; j<p->nTerm; j++){
   218          int n = p->a[j].n;
   219          if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0
   220           && (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*')
   221          ){
   222            aiWordIdx[j] = iWord;
   223            aiLastDoc[j] = iDoc;
   224            aiLastOfst[j] = i;
   225            for(k=1; j-k>=0 && anMatch[j-k] && aiWordIdx[j-k]==iWord-k; k++){}
   226            for(ii=0; ii<k; ii++){
   227              if( anMatch[j-ii]<k ){
   228                anMatch[j-ii] = k*(nDoc-iDoc);
   229                aiBestDoc[j-ii] = aiLastDoc[j-ii];
   230                aiBestOfst[j-ii] = aiLastOfst[j-ii];
   231              }
   232            }
   233            break;
   234          }
   235        }
   236        while( ISALNUM(zDoc[i]) ){ i++; }
   237        if( zDoc[i]==0 ) break;
   238      }
   239    }
   240  
   241    /* Finished search all documents.
   242    ** Every term must be seen or else the score is zero
   243    */
   244    score = 1;
   245    for(j=0; j<p->nTerm; j++) score *= anMatch[j];
   246    blob_reset(&p->snip);
   247    p->iScore = score;
   248    if( score==0 ) return score;
   249  
   250  
   251    /* Prepare a snippet that describes the matching text.
   252    */
   253    while(1){
   254      int iOfst;
   255      int iTail;
   256      int iBest;
   257      for(ii=0; ii<p->nTerm && anMatch[ii]==0; ii++){}
   258      if( ii>=p->nTerm ) break;  /* This is where the loop exits */
   259      iBest = ii;
   260      iDoc = aiBestDoc[ii];
   261      iOfst = aiBestOfst[ii];
   262      for(; ii<p->nTerm; ii++){
   263        if( anMatch[ii]==0 ) continue;
   264        if( aiBestDoc[ii]>iDoc ) continue;
   265        if( aiBestOfst[ii]>iOfst ) continue;
   266        iDoc = aiBestDoc[ii];
   267        iOfst = aiBestOfst[ii];
   268        iBest = ii;
   269      }
   270      iTail = iOfst + p->a[iBest].n;
   271      anMatch[iBest] = 0;
   272      for(ii=0; ii<p->nTerm; ii++){
   273        if( anMatch[ii]==0 ) continue;
   274        if( aiBestDoc[ii]!=iDoc ) continue;
   275        if( aiBestOfst[ii]<=iTail+CTX*2 ){
   276          if( iTail<aiBestOfst[ii]+p->a[ii].n ){
   277            iTail = aiBestOfst[ii]+p->a[ii].n;
   278          }
   279          anMatch[ii] = 0;
   280          ii = -1;
   281          continue;
   282        }
   283      }
   284      zDoc = azDoc[iDoc];
   285      iOfst -= CTX;
   286      if( iOfst<0 ) iOfst = 0;
   287      while( iOfst>0 && ISALNUM(zDoc[iOfst-1]) ) iOfst--;
   288      while( zDoc[iOfst] && !ISALNUM(zDoc[iOfst]) ) iOfst++;
   289      for(ii=0; ii<CTX && zDoc[iTail]; ii++, iTail++){}
   290      while( ISALNUM(zDoc[iTail]) ) iTail++;
   291      if( iOfst>0 || wantGap ) blob_append(&p->snip, p->zMarkGap, -1);
   292      wantGap = zDoc[iTail]!=0;
   293      zDoc += iOfst;
   294      iTail -= iOfst;
   295  
   296      /* Add a snippet segment using characters iOfst..iOfst+iTail from zDoc */
   297      for(i=0; i<iTail; i++){
   298        if( !ISALNUM(zDoc[i]) ) continue;
   299        for(j=0; j<p->nTerm; j++){
   300          int n = p->a[j].n;
   301          if( sqlite3_strnicmp(p->a[j].z, &zDoc[i], n)==0
   302           && (!ISALNUM(zDoc[i+n]) || p->a[j].z[n]=='*')
   303          ){
   304            snippet_text_append(p, &p->snip, zDoc, i);
   305            zDoc += i;
   306            iTail -= i;
   307            blob_append(&p->snip, p->zMarkBegin, -1);
   308            if( p->a[j].z[n]=='*' ){
   309              while( ISALNUM(zDoc[n]) ) n++;
   310            }
   311            snippet_text_append(p, &p->snip, zDoc, n);
   312            zDoc += n;
   313            iTail -= n;
   314            blob_append(&p->snip, p->zMarkEnd, -1);
   315            i = -1;
   316            break;
   317          } /* end-if */
   318        } /* end for(j) */
   319        if( j<p->nTerm ){
   320          while( ISALNUM(zDoc[i]) && i<iTail ){ i++; }
   321        }
   322      } /* end for(i) */
   323      snippet_text_append(p, &p->snip, zDoc, iTail);
   324    }
   325    if( wantGap ) blob_append(&p->snip, p->zMarkGap, -1);
   326    return score;
   327  }
   328  
   329  /*
   330  ** COMMAND: test-match
   331  **
   332  ** Usage: %fossil test-match SEARCHSTRING FILE1 FILE2 ...
   333  **
   334  ** Run the full-scan search algorithm using SEARCHSTRING against
   335  ** the text of the files listed.  Output matches and snippets.
   336  */
   337  void test_match_cmd(void){
   338    Search *p;
   339    int i;
   340    Blob x;
   341    int score;
   342    char *zDoc;
   343    int flg = 0;
   344    char *zBegin = (char*)find_option("begin",0,1);
   345    char *zEnd = (char*)find_option("end",0,1);
   346    char *zGap = (char*)find_option("gap",0,1);
   347    if( find_option("html",0,0)!=0 ) flg |= SRCHFLG_HTML;
   348    if( find_option("static",0,0)!=0 ) flg |= SRCHFLG_STATIC;
   349    verify_all_options();
   350    if( g.argc<4 ) usage("SEARCHSTRING FILE1...");
   351    if( zBegin==0 ) zBegin = "[[";
   352    if( zEnd==0 ) zEnd = "]]";
   353    if( zGap==0 ) zGap = " ... ";
   354    p = search_init(g.argv[2], zBegin, zEnd, zGap, flg);
   355    for(i=3; i<g.argc; i++){
   356      blob_read_from_file(&x, g.argv[i]);
   357      zDoc = blob_str(&x);
   358      score = search_match(p, 1, (const char**)&zDoc);
   359      fossil_print("%s: %d\n", g.argv[i], p->iScore);
   360      blob_reset(&x);
   361      if( score ){
   362        fossil_print("%.78c\n%s\n%.78c\n\n", '=', blob_str(&p->snip), '=');
   363      }
   364    }
   365    search_end(p);
   366  }
   367  
   368  /*
   369  ** An SQL function to initialize the full-scan search pattern:
   370  **
   371  **     search_init(PATTERN,BEGIN,END,GAP,FLAGS)
   372  **
   373  ** All arguments are optional.  PATTERN is the search pattern.  If it
   374  ** is omitted, then the global search pattern is reset.  BEGIN and END
   375  ** and GAP are the strings used to construct snippets.  FLAGS is an
   376  ** integer bit pattern containing the various SRCH_CKIN, SRCH_DOC,
   377  ** SRCH_TKT, or SRCH_ALL bits to determine what is to be searched.
   378  */
   379  static void search_init_sqlfunc(
   380    sqlite3_context *context,
   381    int argc,
   382    sqlite3_value **argv
   383  ){
   384    const char *zPattern = 0;
   385    const char *zBegin = "<mark>";
   386    const char *zEnd = "</mark>";
   387    const char *zGap = " ... ";
   388    unsigned int flg = SRCHFLG_HTML;
   389    switch( argc ){
   390      default:
   391        flg = (unsigned int)sqlite3_value_int(argv[4]);
   392      case 4:
   393        zGap = (const char*)sqlite3_value_text(argv[3]);
   394      case 3:
   395        zEnd = (const char*)sqlite3_value_text(argv[2]);
   396      case 2:
   397        zBegin = (const char*)sqlite3_value_text(argv[1]);
   398      case 1:
   399        zPattern = (const char*)sqlite3_value_text(argv[0]);
   400    }
   401    if( zPattern && zPattern[0] ){
   402      search_init(zPattern, zBegin, zEnd, zGap, flg | SRCHFLG_STATIC);
   403    }else{
   404      search_end(&gSearch);
   405    }
   406  }
   407  
   408  /*     search_match(TEXT, TEXT, ....)
   409  **
   410  ** Using the full-scan search engine created by the most recent call
   411  ** to search_init(), match the input the TEXT arguments.
   412  ** Remember the results global full-scan search object. 
   413  ** Return non-zero on a match and zero on a miss.
   414  */
   415  static void search_match_sqlfunc(
   416    sqlite3_context *context,
   417    int argc,
   418    sqlite3_value **argv
   419  ){
   420    const char *azDoc[5];
   421    int nDoc;
   422    int rc;
   423    for(nDoc=0; nDoc<ArraySize(azDoc) && nDoc<argc; nDoc++){
   424      azDoc[nDoc] = (const char*)sqlite3_value_text(argv[nDoc]);
   425      if( azDoc[nDoc]==0 ) azDoc[nDoc] = "";
   426    }
   427    rc = search_match(&gSearch, nDoc, azDoc);
   428    sqlite3_result_int(context, rc);
   429  }
   430  
   431  
   432  /*      search_score()
   433  **
   434  ** Return the match score for the last successful search_match call.
   435  */
   436  static void search_score_sqlfunc(
   437    sqlite3_context *context,
   438    int argc,
   439    sqlite3_value **argv
   440  ){
   441    sqlite3_result_int(context, gSearch.iScore);
   442  }
   443  
   444  /*      search_snippet()
   445  **
   446  ** Return a snippet for the last successful search_match() call.
   447  */
   448  static void search_snippet_sqlfunc(
   449    sqlite3_context *context,
   450    int argc,
   451    sqlite3_value **argv
   452  ){
   453    if( blob_size(&gSearch.snip)>0 ){
   454      sqlite3_result_text(context, blob_str(&gSearch.snip), -1, fossil_free);
   455      blob_init(&gSearch.snip, 0, 0);
   456    }
   457  }
   458  
   459  /*       stext(TYPE, RID, ARG)
   460  **
   461  ** This is an SQLite function that computes the searchable text.
   462  ** It is a wrapper around the search_stext() routine.  See the
   463  ** search_stext() routine for further detail.
   464  */
   465  static void search_stext_sqlfunc(
   466    sqlite3_context *context,
   467    int argc,
   468    sqlite3_value **argv
   469  ){
   470    const char *zType = (const char*)sqlite3_value_text(argv[0]);
   471    int rid = sqlite3_value_int(argv[1]);
   472    const char *zName = (const char*)sqlite3_value_text(argv[2]);
   473    sqlite3_result_text(context, search_stext_cached(zType[0],rid,zName,0), -1,
   474                        SQLITE_TRANSIENT);
   475  }
   476  
   477  /*       title(TYPE, RID, ARG)
   478  **
   479  ** Return the title of the document to be search.
   480  */
   481  static void search_title_sqlfunc(
   482    sqlite3_context *context,
   483    int argc,
   484    sqlite3_value **argv
   485  ){
   486    const char *zType = (const char*)sqlite3_value_text(argv[0]);
   487    int rid = sqlite3_value_int(argv[1]);
   488    const char *zName = (const char*)sqlite3_value_text(argv[2]);
   489    int nHdr = 0;
   490    char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
   491    if( nHdr || zType[0]!='d' ){
   492      sqlite3_result_text(context, z, nHdr, SQLITE_TRANSIENT);
   493    }else{
   494      sqlite3_result_value(context, argv[2]);
   495    }
   496  }
   497  
   498  /*       body(TYPE, RID, ARG)
   499  **
   500  ** Return the body of the document to be search.
   501  */
   502  static void search_body_sqlfunc(
   503    sqlite3_context *context,
   504    int argc,
   505    sqlite3_value **argv
   506  ){
   507    const char *zType = (const char*)sqlite3_value_text(argv[0]);
   508    int rid = sqlite3_value_int(argv[1]);
   509    const char *zName = (const char*)sqlite3_value_text(argv[2]);
   510    int nHdr = 0;
   511    char *z = search_stext_cached(zType[0], rid, zName, &nHdr);
   512    sqlite3_result_text(context, z+nHdr+1, -1, SQLITE_TRANSIENT);
   513  }
   514  
   515  /*      urlencode(X)
   516  **
   517  ** Encode a string for use as a query parameter in a URL.  This is
   518  ** the equivalent of printf("%T",X).
   519  */
   520  static void search_urlencode_sqlfunc(
   521    sqlite3_context *context,
   522    int argc,
   523    sqlite3_value **argv
   524  ){
   525    char *z = mprintf("%T",sqlite3_value_text(argv[0]));
   526    sqlite3_result_text(context, z, -1, fossil_free);
   527  }
   528  
   529  /*
   530  ** Register the various SQL functions (defined above) needed to implement
   531  ** full-scan search.
   532  */
   533  void search_sql_setup(sqlite3 *db){
   534    static int once = 0;
   535    if( once++ ) return;
   536    sqlite3_create_function(db, "search_match", -1, SQLITE_UTF8, 0,
   537       search_match_sqlfunc, 0, 0);
   538    sqlite3_create_function(db, "search_score", 0, SQLITE_UTF8, 0,
   539       search_score_sqlfunc, 0, 0);
   540    sqlite3_create_function(db, "search_snippet", 0, SQLITE_UTF8, 0,
   541       search_snippet_sqlfunc, 0, 0);
   542    sqlite3_create_function(db, "search_init", -1, SQLITE_UTF8, 0,
   543       search_init_sqlfunc, 0, 0);
   544    sqlite3_create_function(db, "stext", 3, SQLITE_UTF8, 0,
   545       search_stext_sqlfunc, 0, 0);
   546    sqlite3_create_function(db, "title", 3, SQLITE_UTF8, 0,
   547       search_title_sqlfunc, 0, 0);
   548    sqlite3_create_function(db, "body", 3, SQLITE_UTF8, 0,
   549       search_body_sqlfunc, 0, 0);
   550    sqlite3_create_function(db, "urlencode", 1, SQLITE_UTF8, 0,
   551       search_urlencode_sqlfunc, 0, 0);
   552  }
   553  
   554  /*
   555  ** Testing the search function.
   556  **
   557  ** COMMAND: search*
   558  **
   559  ** Usage: %fossil search [-all|-a] [-limit|-n #] [-width|-W #] pattern...
   560  **
   561  ** Search for timeline entries matching all words provided on the
   562  ** command line. Whole-word matches scope more highly than partial
   563  ** matches.
   564  **
   565  ** Outputs, by default, some top-N fraction of the results. The -all
   566  ** option can be used to output all matches, regardless of their search
   567  ** score.  The -limit option can be used to limit the number of entries
   568  ** returned.  The -width option can be used to set the output width used
   569  ** when printing matches.
   570  **
   571  ** Options:
   572  **
   573  **     -a|--all          Output all matches, not just best matches.
   574  **     -n|--limit N      Limit output to N matches.
   575  **     -W|--width WIDTH  Set display width to WIDTH columns, 0 for
   576  **                       unlimited. Defaults the terminal's width.
   577  */
   578  void search_cmd(void){
   579    Blob pattern;
   580    int i;
   581    Blob sql = empty_blob;
   582    Stmt q;
   583    int iBest;
   584    char fAll = NULL != find_option("all", "a", 0); /* If set, do not lop
   585                                                       off the end of the
   586                                                       results. */
   587    const char *zLimit = find_option("limit","n",1);
   588    const char *zWidth = find_option("width","W",1);
   589    int nLimit = zLimit ? atoi(zLimit) : -1000;   /* Max number of matching
   590                                                     lines/entries to list */
   591    int width;
   592    if( zWidth ){
   593      width = atoi(zWidth);
   594      if( (width!=0) && (width<=20) ){
   595        fossil_fatal("-W|--width value must be >20 or 0");
   596      }
   597    }else{
   598      width = -1;
   599    }
   600  
   601    db_must_be_within_tree();
   602    if( g.argc<2 ) return;
   603    blob_init(&pattern, g.argv[2], -1);
   604    for(i=3; i<g.argc; i++){
   605      blob_appendf(&pattern, " %s", g.argv[i]);
   606    }
   607    (void)search_init(blob_str(&pattern),"*","*","...",SRCHFLG_STATIC);
   608    blob_reset(&pattern);
   609    search_sql_setup(g.db);
   610  
   611    db_multi_exec(
   612       "CREATE TEMP TABLE srch(rid,uuid,date,comment,x);"
   613       "CREATE INDEX srch_idx1 ON srch(x);"
   614       "INSERT INTO srch(rid,uuid,date,comment,x)"
   615       "   SELECT blob.rid, uuid, datetime(event.mtime,toLocal()),"
   616       "          coalesce(ecomment,comment),"
   617       "          search_score()"
   618       "     FROM event, blob"
   619       "    WHERE blob.rid=event.objid"
   620       "      AND search_match(coalesce(ecomment,comment));"
   621    );
   622    iBest = db_int(0, "SELECT max(x) FROM srch");
   623    blob_append(&sql,
   624                "SELECT rid, uuid, date, comment, 0, 0 FROM srch "
   625                "WHERE 1 ", -1);
   626    if(!fAll){
   627      blob_append_sql(&sql,"AND x>%d ", iBest/3);
   628    }
   629    blob_append(&sql, "ORDER BY x DESC, date DESC ", -1);
   630    db_prepare(&q, "%s", blob_sql_text(&sql));
   631    blob_reset(&sql);
   632    print_timeline(&q, nLimit, width, 0);
   633    db_finalize(&q);
   634  }
   635  
   636  #if INTERFACE
   637  /* What to search for */
   638  #define SRCH_CKIN   0x0001    /* Search over check-in comments */
   639  #define SRCH_DOC    0x0002    /* Search over embedded documents */
   640  #define SRCH_TKT    0x0004    /* Search over tickets */
   641  #define SRCH_WIKI   0x0008    /* Search over wiki */
   642  #define SRCH_ALL    0x000f    /* Search over everything */
   643  #endif
   644  
   645  /*
   646  ** Remove bits from srchFlags which are disallowed by either the
   647  ** current server configuration or by user permissions.
   648  */
   649  unsigned int search_restrict(unsigned int srchFlags){
   650    static unsigned int knownGood = 0;
   651    static unsigned int knownBad = 0;
   652    static const struct { unsigned m; const char *zKey; } aSetng[] = {
   653       { SRCH_CKIN,   "search-ci"   },
   654       { SRCH_DOC,    "search-doc"  },
   655       { SRCH_TKT,    "search-tkt"  },
   656       { SRCH_WIKI,   "search-wiki" },
   657    };
   658    int i;
   659    if( g.perm.Read==0 )   srchFlags &= ~(SRCH_CKIN|SRCH_DOC);
   660    if( g.perm.RdTkt==0 )  srchFlags &= ~(SRCH_TKT);
   661    if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI);
   662    for(i=0; i<ArraySize(aSetng); i++){
   663      unsigned int m = aSetng[i].m;
   664      if( (srchFlags & m)==0 ) continue;
   665      if( ((knownGood|knownBad) & m)!=0 ) continue;
   666      if( db_get_boolean(aSetng[i].zKey,0) ){
   667        knownGood |= m;
   668      }else{
   669        knownBad |= m;
   670      }
   671    }
   672    return srchFlags & ~knownBad;
   673  }
   674  
   675  /*
   676  ** When this routine is called, there already exists a table
   677  **
   678  **       x(label,url,score,id,snip).
   679  **
   680  ** label:  The "name" of the document containing the match
   681  ** url:    A URL for the document
   682  ** score:  How well the document matched
   683  ** id:     The document id.  Format: xNNNNN, x: type, N: number
   684  ** snip:   A snippet for the match
   685  **
   686  ** And the srchFlags parameter has been validated.  This routine
   687  ** fills the X table with search results using a full-scan search.
   688  **
   689  ** The companion indexed search routine is search_indexed().
   690  */
   691  static void search_fullscan(
   692    const char *zPattern,       /* The query pattern */
   693    unsigned int srchFlags      /* What to search over */
   694  ){
   695    search_init(zPattern, "<mark>", "</mark>", " ... ",
   696            SRCHFLG_STATIC|SRCHFLG_HTML);
   697    if( (srchFlags & SRCH_DOC)!=0 ){
   698      char *zDocGlob = db_get("doc-glob","");
   699      char *zDocBr = db_get("doc-branch","trunk");
   700      if( zDocGlob && zDocGlob[0] && zDocBr && zDocBr[0] ){
   701        db_multi_exec(
   702          "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
   703        );
   704        db_multi_exec(
   705          "INSERT INTO x(label,url,score,id,date,snip)"
   706          "  SELECT printf('Document: %%s',title('d',blob.rid,foci.filename)),"
   707          "         printf('/doc/%T/%%s',foci.filename),"
   708          "         search_score(),"
   709          "         'd'||blob.rid,"
   710          "         (SELECT datetime(event.mtime) FROM event"
   711          "            WHERE objid=symbolic_name_to_rid('trunk')),"
   712          "         search_snippet()"
   713          "    FROM foci CROSS JOIN blob"
   714          "   WHERE checkinID=symbolic_name_to_rid('trunk')"
   715          "     AND blob.uuid=foci.uuid"
   716          "     AND search_match(title('d',blob.rid,foci.filename),"
   717          "                      body('d',blob.rid,foci.filename))"
   718          "     AND %z",
   719          zDocBr, glob_expr("foci.filename", zDocGlob)
   720        );
   721      }
   722    }
   723    if( (srchFlags & SRCH_WIKI)!=0 ){
   724      db_multi_exec(
   725        "WITH wiki(name,rid,mtime) AS ("
   726        "  SELECT substr(tagname,6), tagxref.rid, max(tagxref.mtime)"
   727        "    FROM tag, tagxref"
   728        "   WHERE tag.tagname GLOB 'wiki-*'"
   729        "     AND tagxref.tagid=tag.tagid"
   730        "   GROUP BY 1"
   731        ")"
   732        "INSERT INTO x(label,url,score,id,date,snip)"
   733        "  SELECT printf('Wiki: %%s',name),"
   734        "         printf('/wiki?name=%%s',urlencode(name)),"
   735        "         search_score(),"
   736        "         'w'||rid,"
   737        "         datetime(mtime),"
   738        "         search_snippet()"
   739        "    FROM wiki"
   740        "   WHERE search_match(title('w',rid,name),body('w',rid,name));"
   741      );
   742    }
   743    if( (srchFlags & SRCH_CKIN)!=0 ){
   744      db_multi_exec(
   745        "WITH ckin(uuid,rid,mtime) AS ("
   746        "  SELECT blob.uuid, event.objid, event.mtime"
   747        "    FROM event, blob"
   748        "   WHERE event.type='ci'"
   749        "     AND blob.rid=event.objid"
   750        ")"
   751        "INSERT INTO x(label,url,score,id,date,snip)"
   752        "  SELECT printf('Check-in [%%.10s] on %%s',uuid,datetime(mtime)),"
   753        "         printf('/timeline?c=%%s&n=8&y=ci',uuid),"
   754        "         search_score(),"
   755        "         'c'||rid,"
   756        "         datetime(mtime),"
   757        "         search_snippet()"
   758        "    FROM ckin"
   759        "   WHERE search_match('',body('c',rid,NULL));"
   760      );
   761    }
   762    if( (srchFlags & SRCH_TKT)!=0 ){
   763      db_multi_exec(
   764        "INSERT INTO x(label,url,score,id,date,snip)"
   765        "  SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,NULL),"
   766                        "datetime(tkt_mtime)),"
   767        "         printf('/tktview/%%.20s',tkt_uuid),"
   768        "         search_score(),"
   769        "         't'||tkt_id,"
   770        "         datetime(tkt_mtime),"
   771        "         search_snippet()"
   772        "    FROM ticket"
   773        "   WHERE search_match(title('t',tkt_id,NULL),body('t',tkt_id,NULL));"
   774      );
   775    }
   776  }
   777  
   778  /*
   779  ** Number of significant bits in a u32
   780  */
   781  static int nbits(u32 x){
   782    int n = 0;
   783    while( x ){ n++; x >>= 1; }
   784    return n;
   785  }
   786  
   787  /*
   788  ** Implemenation of the rank() function used with rank(matchinfo(*,'pcsx')).
   789  */
   790  static void search_rank_sqlfunc(
   791    sqlite3_context *context,
   792    int argc,
   793    sqlite3_value **argv
   794  ){
   795    const unsigned *aVal = (unsigned int*)sqlite3_value_blob(argv[0]);
   796    int nVal = sqlite3_value_bytes(argv[0])/4;
   797    int nCol;           /* Number of columns in the index */
   798    int nTerm;          /* Number of search terms in the query */
   799    int i, j;           /* Loop counter */
   800    double r = 0.0;     /* Score */
   801    const unsigned *aX, *aS;
   802  
   803    if( nVal<2 ) return;
   804    nTerm = aVal[0];
   805    nCol = aVal[1];
   806    if( nVal<2+3*nCol*nTerm+nCol ) return;
   807    aS = aVal+2;
   808    aX = aS+nCol;
   809    for(j=0; j<nCol; j++){
   810      double x;
   811      if( aS[j]>0 ){
   812        x = 0.0;
   813        for(i=0; i<nTerm; i++){
   814          int hits_this_row;
   815          int hits_all_rows;
   816          int rows_with_hit;
   817          double avg_hits_per_row;
   818  
   819          hits_this_row = aX[j + i*nCol*3];
   820          if( hits_this_row==0 )continue;
   821          hits_all_rows = aX[j + i*nCol*3 + 1];
   822          rows_with_hit = aX[j + i*nCol*3 + 2];
   823          if( rows_with_hit==0 ) continue;
   824          avg_hits_per_row = hits_all_rows/(double)rows_with_hit;
   825          x += hits_this_row/(avg_hits_per_row*nbits(rows_with_hit));
   826        }
   827        x *= (1<<((30*(aS[j]-1))/nTerm));
   828      }else{
   829        x = 0.0;
   830      }
   831      r = r*10.0 + x;
   832    }
   833  #define SEARCH_DEBUG_RANK 0
   834  #if SEARCH_DEBUG_RANK
   835    {
   836      Blob x;
   837      blob_init(&x,0,0);
   838      blob_appendf(&x,"%08x", (int)r);
   839      for(i=0; i<nVal; i++){
   840        blob_appendf(&x," %d", aVal[i]);
   841      }
   842      blob_appendf(&x," r=%g", r);
   843      sqlite3_result_text(context, blob_str(&x), -1, fossil_free);
   844    }
   845  #else
   846    sqlite3_result_double(context, r);
   847  #endif
   848  }
   849  
   850  /*
   851  ** When this routine is called, there already exists a table
   852  **
   853  **       x(label,url,score,id,snip).
   854  **
   855  ** label:  The "name" of the document containing the match
   856  ** url:    A URL for the document
   857  ** score:  How well the document matched
   858  ** id:     The document id.  Format: xNNNNN, x: type, N: number
   859  ** snip:   A snippet for the match
   860  **
   861  ** And the srchFlags parameter has been validated.  This routine
   862  ** fills the X table with search results using FTS indexed search.
   863  **
   864  ** The companion full-scan search routine is search_fullscan().
   865  */
   866  static void search_indexed(
   867    const char *zPattern,       /* The query pattern */
   868    unsigned int srchFlags      /* What to search over */
   869  ){
   870    Blob sql;
   871    if( srchFlags==0 ) return;
   872    sqlite3_create_function(g.db, "rank", 1, SQLITE_UTF8, 0,
   873       search_rank_sqlfunc, 0, 0);
   874    blob_init(&sql, 0, 0);
   875    blob_appendf(&sql,
   876      "INSERT INTO x(label,url,score,id,date,snip) "
   877      " SELECT ftsdocs.label,"
   878      "        ftsdocs.url,"
   879      "        rank(matchinfo(ftsidx,'pcsx')),"
   880      "        ftsdocs.type || ftsdocs.rid,"
   881      "        datetime(ftsdocs.mtime),"
   882      "        snippet(ftsidx,'<mark>','</mark>',' ... ',-1,35)"
   883      "   FROM ftsidx CROSS JOIN ftsdocs"
   884      "  WHERE ftsidx MATCH %Q"
   885      "    AND ftsdocs.rowid=ftsidx.docid",
   886      zPattern
   887    );
   888    if( srchFlags!=SRCH_ALL ){
   889      const char *zSep = " AND (";
   890      static const struct { unsigned m; char c; } aMask[] = {
   891         { SRCH_CKIN,  'c' },
   892         { SRCH_DOC,   'd' },
   893         { SRCH_TKT,   't' },
   894         { SRCH_WIKI,  'w' },
   895      };
   896      int i;
   897      for(i=0; i<ArraySize(aMask); i++){
   898        if( srchFlags & aMask[i].m ){
   899          blob_appendf(&sql, "%sftsdocs.type='%c'", zSep, aMask[i].c);
   900          zSep = " OR ";
   901        }
   902      }
   903      blob_append(&sql,")",1);
   904    }
   905    db_multi_exec("%s",blob_str(&sql)/*safe-for-%s*/);
   906  #if SEARCH_DEBUG_RANK
   907    db_multi_exec("UPDATE x SET label=printf('%%s (score=%%s)',label,score)");
   908  #endif
   909  }
   910  
   911  /*
   912  ** If z[] is of the form "<mark>TEXT</mark>" where TEXT contains
   913  ** no white-space or punctuation, then return the length of the mark.
   914  */
   915  static int isSnippetMark(const char *z){
   916    int n;
   917    if( strncmp(z,"<mark>",6)!=0 ) return 0;
   918    n = 6;
   919    while( fossil_isalnum(z[n]) ) n++;
   920    if( strncmp(&z[n],"</mark>",7)!=0 ) return 0;
   921    return n+7;
   922  }
   923  
   924  /*
   925  ** Return a copy of zSnip (in memory obtained from fossil_malloc()) that
   926  ** has all "<" characters, other than those on <mark> and </mark>,
   927  ** converted into "&lt;".  This is similar to htmlize() except that
   928  ** <mark> and </mark> are preserved.
   929  */
   930  static char *cleanSnippet(const char *zSnip){
   931    int i;
   932    int n = 0;
   933    char *z;
   934    for(i=0; zSnip[i]; i++) if( zSnip[i]=='<' ) n++;
   935    z = fossil_malloc( i+n*4+1 );
   936    i = 0;
   937    while( zSnip[0] ){
   938      if( zSnip[0]=='<' ){
   939        n = isSnippetMark(zSnip);
   940        if( n ){
   941          memcpy(&z[i], zSnip, n);
   942          zSnip += n;
   943          i += n;
   944          continue;
   945        }else{
   946          memcpy(&z[i], "&lt;", 4);
   947          i += 4;
   948          zSnip++;
   949        }
   950      }else{
   951        z[i++] = zSnip[0];
   952        zSnip++;
   953      }
   954    }
   955    z[i] = 0;
   956    return z;
   957  }
   958  
   959  
   960  /*
   961  ** This routine generates web-page output for a search operation.
   962  ** Other web-pages can invoke this routine to add search results
   963  ** in the middle of the page.
   964  **
   965  ** This routine works for both full-scan and indexed search.  The
   966  ** appropriate low-level search routine is called according to the
   967  ** current configuration.
   968  **
   969  ** Return the number of rows.
   970  */
   971  int search_run_and_output(
   972    const char *zPattern,       /* The query pattern */
   973    unsigned int srchFlags,     /* What to search over */
   974    int fDebug                  /* Extra debugging output */
   975  ){
   976    Stmt q;
   977    int nRow = 0;
   978  
   979    srchFlags = search_restrict(srchFlags);
   980    if( srchFlags==0 ) return 0;
   981    search_sql_setup(g.db);
   982    add_content_sql_commands(g.db);
   983    db_multi_exec(
   984      "CREATE TEMP TABLE x(label,url,score,id,date,snip);"
   985    );
   986    if( !search_index_exists() ){
   987      search_fullscan(zPattern, srchFlags);  /* Full-scan search */
   988    }else{
   989      search_update_index(srchFlags);        /* Update the index, if necessary */
   990      search_indexed(zPattern, srchFlags);   /* Indexed search */
   991    }
   992    db_prepare(&q, "SELECT url, snip, label, score, id"
   993                   "  FROM x"
   994                   " ORDER BY score DESC, date DESC;");
   995    while( db_step(&q)==SQLITE_ROW ){
   996      const char *zUrl = db_column_text(&q, 0);
   997      const char *zSnippet = db_column_text(&q, 1);
   998      const char *zLabel = db_column_text(&q, 2);
   999      if( nRow==0 ){
  1000        @ <ol>
  1001      }
  1002      nRow++;
  1003      @ <li><p><a href='%R%s(zUrl)'>%h(zLabel)</a>
  1004      if( fDebug ){
  1005        @ (%e(db_column_double(&q,3)), %s(db_column_text(&q,4))
  1006      }
  1007      @ <br /><span class='snippet'>%z(cleanSnippet(zSnippet))</span></li>
  1008    }
  1009    db_finalize(&q);
  1010    if( nRow ){
  1011      @ </ol>
  1012    }
  1013    return nRow;
  1014  }
  1015  
  1016  /*
  1017  ** Generate some HTML for doing search.  At a minimum include the
  1018  ** Search-Text entry form.  If the "s" query parameter is present, also
  1019  ** show search results.
  1020  **
  1021  ** The srchFlags parameter restricts the set of documents to be searched.
  1022  ** srchFlags should normally be either a single search category or all
  1023  ** categories.  Any srchFlags with two or more bits set
  1024  ** is treated like SRCH_ALL for display purposes.
  1025  **
  1026  ** This routine automatically restricts srchFlag according to user
  1027  ** permissions and the server configuration.  The entry box is shown
  1028  ** disabled if srchFlags is 0 after these restrictions are applied.
  1029  **
  1030  ** If useYparam is true, then this routine also looks at the y= query
  1031  ** parameter for further search restrictions.
  1032  */
  1033  void search_screen(unsigned srchFlags, int useYparam){
  1034    const char *zType = 0;
  1035    const char *zClass = 0;
  1036    const char *zDisable1;
  1037    const char *zDisable2;
  1038    const char *zPattern;
  1039    int fDebug = PB("debug");
  1040    srchFlags = search_restrict(srchFlags);
  1041    switch( srchFlags ){
  1042      case SRCH_CKIN:  zType = " Check-ins";  zClass = "Ckin";  break;
  1043      case SRCH_DOC:   zType = " Docs";       zClass = "Doc";   break;
  1044      case SRCH_TKT:   zType = " Tickets";    zClass = "Tkt";   break;
  1045      case SRCH_WIKI:  zType = " Wiki";       zClass = "Wiki";  break;
  1046    }
  1047    if( srchFlags==0 ){
  1048      zDisable1 = " disabled";
  1049      zDisable2 = " disabled";
  1050      zPattern = "";
  1051    }else{
  1052      zDisable1 = " autofocus";
  1053      zDisable2 = "";
  1054      zPattern = PD("s","");
  1055    }
  1056    @ <form method='GET' action='%R/%T(g.zPath)'>
  1057    if( zClass ){
  1058      @ <div class='searchForm searchForm%s(zClass)'>
  1059    }else{
  1060      @ <div class='searchForm'>
  1061    }
  1062    @ <input type="text" name="s" size="40" value="%h(zPattern)"%s(zDisable1)>
  1063    if( useYparam && (srchFlags & (srchFlags-1))!=0 && useYparam ){
  1064      static const struct { char *z; char *zNm; unsigned m; } aY[] = {
  1065         { "all",  "All",        SRCH_ALL  },
  1066         { "c",    "Check-ins",  SRCH_CKIN },
  1067         { "d",    "Docs",       SRCH_DOC  },
  1068         { "t",    "Tickets",    SRCH_TKT  },
  1069         { "w",    "Wiki",       SRCH_WIKI },
  1070      };
  1071      const char *zY = PD("y","all");
  1072      unsigned newFlags = srchFlags;
  1073      int i;
  1074      @ <select size='1' name='y'>
  1075      for(i=0; i<ArraySize(aY); i++){
  1076        if( (aY[i].m & srchFlags)==0 ) continue;
  1077        cgi_printf("<option value='%s'", aY[i].z);
  1078        if( fossil_strcmp(zY,aY[i].z)==0 ){
  1079          newFlags &= aY[i].m;
  1080          cgi_printf(" selected");
  1081        }
  1082        cgi_printf(">%s</option>\n", aY[i].zNm);
  1083      }
  1084      @ </select>
  1085      srchFlags = newFlags;
  1086    }
  1087    if( fDebug ){
  1088      @ <input type="hidden" name="debug" value="1">
  1089    }
  1090    @ <input type="submit" value="Search%s(zType)"%s(zDisable2)>
  1091    if( srchFlags==0 ){
  1092      @ <p class="generalError">Search is disabled</p>
  1093    }
  1094    @ </div></form>
  1095    while( fossil_isspace(zPattern[0]) ) zPattern++;
  1096    if( zPattern[0] ){
  1097      if( zClass ){
  1098        @ <div class='searchResult searchResult%s(zClass)'>
  1099      }else{
  1100        @ <div class='searchResult'>
  1101      }
  1102      if( search_run_and_output(zPattern, srchFlags, fDebug)==0 ){
  1103        @ <p class='searchEmpty'>No matches for: <span>%h(zPattern)</span></p>
  1104      }
  1105      @ </div>
  1106    }
  1107  }
  1108  
  1109  /*
  1110  ** WEBPAGE: search
  1111  **
  1112  ** Search for check-in comments, documents, tickets, or wiki that
  1113  ** match a user-supplied pattern.
  1114  **
  1115  **    s=PATTERN       Specify the full-text pattern to search for
  1116  **    y=TYPE          What to search.
  1117  **                      c -> check-ins
  1118  **                      d -> documentation
  1119  **                      t -> tickets
  1120  **                      w -> wiki
  1121  **                    all -> everything
  1122  */
  1123  void search_page(void){
  1124    login_check_credentials();
  1125    style_header("Search");
  1126    search_screen(SRCH_ALL, 1);
  1127    style_footer();
  1128  }
  1129  
  1130  
  1131  /*
  1132  ** This is a helper function for search_stext().  Writing into pOut
  1133  ** the search text obtained from pIn according to zMimetype.
  1134  **
  1135  ** The title of the document is the first line of text.  All subsequent
  1136  ** lines are the body.  If the document has no title, the first line
  1137  ** is blank.
  1138  */
  1139  static void get_stext_by_mimetype(
  1140    Blob *pIn,
  1141    const char *zMimetype,
  1142    Blob *pOut
  1143  ){
  1144    Blob html, title;
  1145    blob_init(&html, 0, 0);
  1146    blob_init(&title, 0, 0);
  1147    if( zMimetype==0 ) zMimetype = "text/plain";
  1148    if( fossil_strcmp(zMimetype,"text/x-fossil-wiki")==0 ){
  1149      Blob tail;
  1150      blob_init(&tail, 0, 0);
  1151      if( wiki_find_title(pIn, &title, &tail) ){
  1152        blob_appendf(pOut, "%s\n", blob_str(&title));
  1153        wiki_convert(&tail, &html, 0);
  1154        blob_reset(&tail);
  1155      }else{
  1156        blob_append(pOut, "\n", 1);
  1157        wiki_convert(pIn, &html, 0);
  1158      }
  1159      html_to_plaintext(blob_str(&html), pOut);
  1160    }else if( fossil_strcmp(zMimetype,"text/x-markdown")==0 ){
  1161      markdown_to_html(pIn, &title, &html);
  1162      if( blob_size(&title) ){
  1163        blob_appendf(pOut, "%s\n", blob_str(&title));
  1164      }else{
  1165        blob_append(pOut, "\n", 1);
  1166      }
  1167      html_to_plaintext(blob_str(&html), pOut);
  1168    }else if( fossil_strcmp(zMimetype,"text/html")==0 ){
  1169      if( doc_is_embedded_html(pIn, &title) ){
  1170        blob_appendf(pOut, "%s\n", blob_str(&title));
  1171      }
  1172      html_to_plaintext(blob_str(pIn), pOut);
  1173    }else{
  1174      blob_append(pOut, "\n", 1);
  1175      blob_append(pOut, blob_buffer(pIn), blob_size(pIn));
  1176    }
  1177    blob_reset(&html);
  1178    blob_reset(&title);
  1179  }
  1180  
  1181  /*
  1182  ** Query pQuery is pointing at a single row of output.  Append a text
  1183  ** representation of every text-compatible column to pAccum.
  1184  */
  1185  static void append_all_ticket_fields(Blob *pAccum, Stmt *pQuery, int iTitle){
  1186    int n = db_column_count(pQuery);
  1187    int i;
  1188    const char *zMime = 0;
  1189    if( iTitle>=0 && iTitle<n ){
  1190      if( db_column_type(pQuery,iTitle)==SQLITE_TEXT ){
  1191        blob_append(pAccum, db_column_text(pQuery,iTitle), -1);
  1192      }
  1193      blob_append(pAccum, "\n", 1);
  1194    }
  1195    for(i=0; i<n; i++){
  1196      const char *zColName = db_column_name(pQuery,i);
  1197      int eType = db_column_type(pQuery,i);
  1198      if( i==iTitle ) continue;
  1199      if( fossil_strnicmp(zColName,"tkt_",4)==0 ) continue;
  1200      if( fossil_strnicmp(zColName,"private_",8)==0 ) continue;
  1201      if( eType==SQLITE_BLOB || eType==SQLITE_NULL ) continue;
  1202      if( fossil_stricmp(zColName,"mimetype")==0 ){
  1203        zMime = db_column_text(pQuery,i);
  1204        if( fossil_strcmp(zMime,"text/plain")==0 ) zMime = 0;
  1205      }else if( zMime==0 || eType!=SQLITE_TEXT ){
  1206        blob_appendf(pAccum, "%s: %s |\n", zColName, db_column_text(pQuery,i));
  1207      }else{
  1208        Blob txt;
  1209        blob_init(&txt, db_column_text(pQuery,i), -1);
  1210        blob_appendf(pAccum, "%s: ", zColName);
  1211        get_stext_by_mimetype(&txt, zMime, pAccum);
  1212        blob_append(pAccum, " |", 2);
  1213        blob_reset(&txt);
  1214      }
  1215    }
  1216  }
  1217  
  1218  
  1219  /*
  1220  ** Return "search text" - a reduced version of a document appropriate for
  1221  ** full text search and/or for constructing a search result snippet.
  1222  **
  1223  **    cType:            d      Embedded documentation
  1224  **                      w      Wiki page
  1225  **                      c      Check-in comment
  1226  **                      t      Ticket text
  1227  **
  1228  **    rid               The RID of an artifact that defines the object
  1229  **                      being searched.
  1230  **
  1231  **    zName             Name of the object being searched.  This is used
  1232  **                      only to help figure out the mimetype (text/plain,
  1233  **                      test/html, test/x-fossil-wiki, or text/x-markdown)
  1234  **                      so that the code can know how to simplify the text.
  1235  */
  1236  void search_stext(
  1237    char cType,            /* Type of document */
  1238    int rid,               /* BLOB.RID or TAG.TAGID value for document */
  1239    const char *zName,     /* Auxiliary information */
  1240    Blob *pOut             /* OUT: Initialize to the search text */
  1241  ){
  1242    blob_init(pOut, 0, 0);
  1243    switch( cType ){
  1244      case 'd': {   /* Documents */
  1245        Blob doc;
  1246        content_get(rid, &doc);
  1247        blob_to_utf8_no_bom(&doc, 0);
  1248        get_stext_by_mimetype(&doc, mimetype_from_name(zName), pOut);
  1249        blob_reset(&doc);
  1250        break;
  1251      }
  1252      case 'w': {   /* Wiki */
  1253        Manifest *pWiki = manifest_get(rid, CFTYPE_WIKI,0);
  1254        Blob wiki;
  1255        if( pWiki==0 ) break;
  1256        blob_init(&wiki, pWiki->zWiki, -1);
  1257        get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
  1258                              pOut);
  1259        blob_reset(&wiki);
  1260        manifest_destroy(pWiki);
  1261        break;
  1262      }
  1263      case 'c': {   /* Check-in Comments */
  1264        static Stmt q;
  1265        static int isPlainText = -1;
  1266        db_static_prepare(&q,
  1267           "SELECT coalesce(ecomment,comment)"
  1268           "  ||' (user: '||coalesce(euser,user,'?')"
  1269           "  ||', tags: '||"
  1270           "  (SELECT group_concat(substr(tag.tagname,5),',')"
  1271           "     FROM tag, tagxref"
  1272           "    WHERE tagname GLOB 'sym-*' AND tag.tagid=tagxref.tagid"
  1273           "      AND tagxref.rid=event.objid AND tagxref.tagtype>0)"
  1274           "  ||')'"
  1275           "  FROM event WHERE objid=:x AND type='ci'");
  1276        if( isPlainText<0 ){
  1277          isPlainText = db_get_boolean("timeline-plaintext",0);
  1278        }
  1279        db_bind_int(&q, ":x", rid);
  1280        if( db_step(&q)==SQLITE_ROW ){
  1281          blob_append(pOut, "\n", 1);
  1282          if( isPlainText ){
  1283            db_column_blob(&q, 0, pOut);
  1284          }else{
  1285            Blob x;
  1286            blob_init(&x,0,0);
  1287            db_column_blob(&q, 0, &x);
  1288            get_stext_by_mimetype(&x, "text/x-fossil-wiki", pOut);
  1289            blob_reset(&x);
  1290          }
  1291        }
  1292        db_reset(&q);
  1293        break;
  1294      }
  1295      case 't': {   /* Tickets */
  1296        static Stmt q1;
  1297        static int iTitle = -1;
  1298        db_static_prepare(&q1, "SELECT * FROM ticket WHERE tkt_id=:rid");
  1299        db_bind_int(&q1, ":rid", rid);
  1300        if( db_step(&q1)==SQLITE_ROW ){
  1301          if( iTitle<0 ){
  1302            int n = db_column_count(&q1);
  1303            for(iTitle=0; iTitle<n; iTitle++){
  1304              if( fossil_stricmp(db_column_name(&q1,iTitle),"title")==0 ) break;
  1305            }
  1306          }
  1307          append_all_ticket_fields(pOut, &q1, iTitle);
  1308        }
  1309        db_reset(&q1);
  1310        if( db_table_exists("repository","ticketchng") ){
  1311          static Stmt q2;
  1312          db_static_prepare(&q2, "SELECT * FROM ticketchng WHERE tkt_id=:rid"
  1313                                 "  ORDER BY tkt_mtime");
  1314          db_bind_int(&q2, ":rid", rid);
  1315          while( db_step(&q2)==SQLITE_ROW ){
  1316            append_all_ticket_fields(pOut, &q2, -1);
  1317          }
  1318          db_reset(&q2);
  1319        }
  1320        break;
  1321      }
  1322    }
  1323  }
  1324  
  1325  /*
  1326  ** This routine is a wrapper around search_stext().
  1327  **
  1328  ** This routine looks up the search text, stores it in an internal
  1329  ** buffer, and returns a pointer to the text.  Subsequent requests
  1330  ** for the same document return the same pointer.  The returned pointer
  1331  ** is valid until the next invocation of this routine.  Call this routine
  1332  ** with an eType of 0 to clear the cache.
  1333  */
  1334  char *search_stext_cached(
  1335    char cType,            /* Type of document */
  1336    int rid,               /* BLOB.RID or TAG.TAGID value for document */
  1337    const char *zName,     /* Auxiliary information, for mimetype */
  1338    int *pnTitle           /* OUT: length of title in bytes excluding \n */
  1339  ){
  1340    static struct {
  1341      Blob stext;          /* Cached search text */
  1342      char cType;          /* The type */
  1343      int rid;             /* The RID */
  1344      int nTitle;          /* Number of bytes in title */
  1345    } cache;
  1346    int i;
  1347    char *z;
  1348    if( cType!=cache.cType || rid!=cache.rid ){
  1349      if( cache.rid>0 ){
  1350        blob_reset(&cache.stext);
  1351      }else{
  1352        blob_init(&cache.stext,0,0);
  1353      }
  1354      cache.cType = cType;
  1355      cache.rid = rid;
  1356      if( cType==0 ) return 0;
  1357      search_stext(cType, rid, zName, &cache.stext);
  1358      z  = blob_str(&cache.stext);
  1359      for(i=0; z[i] && z[i]!='\n'; i++){}
  1360      cache.nTitle = i;
  1361    }
  1362    if( pnTitle ) *pnTitle = cache.nTitle;
  1363    return blob_str(&cache.stext);
  1364  }
  1365  
  1366  /*
  1367  ** COMMAND: test-search-stext
  1368  **
  1369  ** Usage: fossil test-search-stext TYPE RID NAME
  1370  **
  1371  ** Compute the search text for document TYPE-RID whose name is NAME.
  1372  ** The TYPE is one of "c", "d", "t", or "w".  The RID is the document
  1373  ** ID.  The NAME is used to figure out a mimetype to use for formatting
  1374  ** the raw document text.
  1375  */
  1376  void test_search_stext(void){
  1377    Blob out;
  1378    db_find_and_open_repository(0,0);
  1379    if( g.argc!=5 ) usage("TYPE RID NAME");
  1380    search_stext(g.argv[2][0], atoi(g.argv[3]), g.argv[4], &out);
  1381    fossil_print("%s\n",blob_str(&out));
  1382    blob_reset(&out);
  1383  }
  1384  
  1385  /*
  1386  ** COMMAND: test-convert-stext
  1387  **
  1388  ** Usage: fossil test-convert-stext FILE MIMETYPE
  1389  **
  1390  ** Read the content of FILE and convert it to stext according to MIMETYPE.
  1391  ** Send the result to standard output.
  1392  */
  1393  void test_convert_stext(void){
  1394    Blob in, out;
  1395    db_find_and_open_repository(0,0);
  1396    if( g.argc!=4 ) usage("FILENAME MIMETYPE");
  1397    blob_read_from_file(&in, g.argv[2]);
  1398    blob_init(&out, 0, 0);
  1399    get_stext_by_mimetype(&in, g.argv[3], &out);
  1400    fossil_print("%s\n",blob_str(&out));
  1401    blob_reset(&in);
  1402    blob_reset(&out);
  1403  }
  1404  
  1405  /* The schema for the full-text index
  1406  */
  1407  static const char zFtsSchema[] =
  1408  @ -- One entry for each possible search result
  1409  @ CREATE TABLE IF NOT EXISTS repository.ftsdocs(
  1410  @   rowid INTEGER PRIMARY KEY, -- Maps to the ftsidx.docid
  1411  @   type CHAR(1),              -- Type of document
  1412  @   rid INTEGER,               -- BLOB.RID or TAG.TAGID for the document
  1413  @   name TEXT,                 -- Additional document description
  1414  @   idxed BOOLEAN,             -- True if currently in the index
  1415  @   label TEXT,                -- Label to print on search results
  1416  @   url TEXT,                  -- URL to access this document
  1417  @   mtime DATE,                -- Date when document created
  1418  @   bx TEXT,                   -- Temporary "body" content cache
  1419  @   UNIQUE(type,rid)
  1420  @ );
  1421  @ CREATE INDEX repository.ftsdocIdxed ON ftsdocs(type,rid,name) WHERE idxed==0;
  1422  @ CREATE INDEX repository.ftsdocName ON ftsdocs(name) WHERE type='w';
  1423  @ CREATE VIEW IF NOT EXISTS repository.ftscontent AS
  1424  @   SELECT rowid, type, rid, name, idxed, label, url, mtime,
  1425  @          title(type,rid,name) AS 'title', body(type,rid,name) AS 'body'
  1426  @     FROM ftsdocs;
  1427  @ CREATE VIRTUAL TABLE IF NOT EXISTS repository.ftsidx
  1428  @   USING fts4(content="ftscontent", title, body%s);
  1429  ;
  1430  static const char zFtsDrop[] =
  1431  @ DROP TABLE IF EXISTS repository.ftsidx;
  1432  @ DROP VIEW IF EXISTS repository.ftscontent;
  1433  @ DROP TABLE IF EXISTS repository.ftsdocs;
  1434  ;
  1435  
  1436  /*
  1437  ** Create or drop the tables associated with a full-text index.
  1438  */
  1439  static int searchIdxExists = -1;
  1440  void search_create_index(void){
  1441    int useStemmer = db_get_boolean("search-stemmer",0);
  1442    const char *zExtra = useStemmer ? ",tokenize=porter" : "";
  1443    search_sql_setup(g.db);
  1444    db_multi_exec(zFtsSchema/*works-like:"%s"*/, zExtra/*safe-for-%s*/);
  1445    searchIdxExists = 1;
  1446  }
  1447  void search_drop_index(void){
  1448    db_multi_exec(zFtsDrop/*works-like:""*/);
  1449    searchIdxExists = 0;
  1450  }
  1451  
  1452  /*
  1453  ** Return true if the full-text search index exists
  1454  */
  1455  int search_index_exists(void){
  1456    if( searchIdxExists<0 ){
  1457      searchIdxExists = db_table_exists("repository","ftsdocs");
  1458    }
  1459    return searchIdxExists;
  1460  }
  1461  
  1462  /*
  1463  ** Fill the FTSDOCS table with unindexed entries for everything
  1464  ** in the repository.  This uses INSERT OR IGNORE so entries already
  1465  ** in FTSDOCS are unchanged.
  1466  */
  1467  void search_fill_index(void){
  1468    if( !search_index_exists() ) return;
  1469    search_sql_setup(g.db);
  1470    db_multi_exec(
  1471      "INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
  1472      "  SELECT 'c', objid, 0 FROM event WHERE type='ci';"
  1473    );
  1474    db_multi_exec(
  1475      "WITH latest_wiki(rid,name,mtime) AS ("
  1476      "  SELECT tagxref.rid, substr(tag.tagname,6), max(tagxref.mtime)"
  1477      "    FROM tag, tagxref"
  1478      "   WHERE tag.tagname GLOB 'wiki-*'"
  1479      "     AND tagxref.tagid=tag.tagid"
  1480      "     AND tagxref.value>0"
  1481      "   GROUP BY 2"
  1482      ") INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed)"
  1483      "     SELECT 'w', rid, name, 0 FROM latest_wiki;"
  1484    );
  1485    db_multi_exec(
  1486      "INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
  1487      "  SELECT 't', tkt_id, 0 FROM ticket;"
  1488    );
  1489  }
  1490  
  1491  /*
  1492  ** The document described by cType,rid,zName is about to be added or
  1493  ** updated.  If the document has already been indexed, then unindex it
  1494  ** now while we still have access to the old content.  Add the document
  1495  ** to the queue of documents that need to be indexed or reindexed.
  1496  */
  1497  void search_doc_touch(char cType, int rid, const char *zName){
  1498    if( search_index_exists() ){
  1499      char zType[2];
  1500      zType[0] = cType;
  1501      zType[1] = 0;
  1502      search_sql_setup(g.db);
  1503      db_multi_exec(
  1504         "DELETE FROM ftsidx WHERE docid IN"
  1505         "    (SELECT rowid FROM ftsdocs WHERE type=%Q AND rid=%d AND idxed)",
  1506         zType, rid
  1507      );
  1508      db_multi_exec(
  1509         "REPLACE INTO ftsdocs(type,rid,name,idxed)"
  1510         " VALUES(%Q,%d,%Q,0)",
  1511         zType, rid, zName
  1512      );
  1513      if( cType=='w' ){
  1514        db_multi_exec(
  1515          "DELETE FROM ftsidx WHERE docid IN"
  1516          "    (SELECT rowid FROM ftsdocs WHERE type='w' AND name=%Q AND idxed)",
  1517          zName
  1518        );
  1519        db_multi_exec(
  1520          "DELETE FROM ftsdocs WHERE type='w' AND name=%Q AND rid!=%d",
  1521          zName, rid
  1522        );
  1523      }
  1524    }
  1525  }
  1526  
  1527  /*
  1528  ** If the doc-glob and doc-br settings are valid for document search
  1529  ** and if the latest check-in on doc-br is in the unindexed set of
  1530  ** check-ins, then update all 'd' entries in FTSDOCS that have
  1531  ** changed.
  1532  */
  1533  static void search_update_doc_index(void){
  1534    const char *zDocBr = db_get("doc-branch","trunk");
  1535    int ckid = zDocBr ? symbolic_name_to_rid(zDocBr,"ci") : 0;
  1536    double rTime;
  1537    if( ckid==0 ) return;
  1538    if( !db_exists("SELECT 1 FROM ftsdocs WHERE type='c' AND rid=%d"
  1539                   "   AND NOT idxed", ckid) ) return;
  1540  
  1541    /* If we get this far, it means that changes to 'd' entries are
  1542    ** required. */
  1543    rTime = db_double(0.0, "SELECT mtime FROM event WHERE objid=%d", ckid);
  1544    db_multi_exec(
  1545      "CREATE TEMP TABLE current_docs(rid INTEGER PRIMARY KEY, name);"
  1546      "CREATE VIRTUAL TABLE IF NOT EXISTS temp.foci USING files_of_checkin;"
  1547      "INSERT OR IGNORE INTO current_docs(rid, name)"
  1548      "  SELECT blob.rid, foci.filename FROM foci, blob"
  1549      "   WHERE foci.checkinID=%d AND blob.uuid=foci.uuid"
  1550      "     AND %z",
  1551      ckid, glob_expr("foci.filename", db_get("doc-glob",""))
  1552    );
  1553    db_multi_exec(
  1554      "DELETE FROM ftsidx WHERE docid IN"
  1555      "  (SELECT rowid FROM ftsdocs WHERE type='d'"
  1556      "      AND rid NOT IN (SELECT rid FROM current_docs))"
  1557    );
  1558    db_multi_exec(
  1559      "DELETE FROM ftsdocs WHERE type='d'"
  1560      "      AND rid NOT IN (SELECT rid FROM current_docs)"
  1561    );
  1562    db_multi_exec(
  1563      "INSERT OR IGNORE INTO ftsdocs(type,rid,name,idxed,label,bx,url,mtime)"
  1564      "  SELECT 'd', rid, name, 0,"
  1565      "         title('d',rid,name),"
  1566      "         body('d',rid,name),"
  1567      "         printf('/doc/%T/%%s',urlencode(name)),"
  1568      "         %.17g"
  1569      " FROM current_docs",
  1570      zDocBr, rTime
  1571    );
  1572    db_multi_exec(
  1573      "INSERT INTO ftsidx(docid,title,body)"
  1574      "  SELECT rowid, label, bx FROM ftsdocs WHERE type='d' AND NOT idxed"
  1575    );
  1576    db_multi_exec(
  1577      "UPDATE ftsdocs SET"
  1578      "  idxed=1,"
  1579      "  bx=NULL,"
  1580      "  label='Document: '||label"
  1581      " WHERE type='d' AND NOT idxed"
  1582    );
  1583  }
  1584  
  1585  /*
  1586  ** Deal with all of the unindexed 'c' terms in FTSDOCS
  1587  */
  1588  static void search_update_checkin_index(void){
  1589    db_multi_exec(
  1590      "INSERT INTO ftsidx(docid,title,body)"
  1591      " SELECT rowid, '', body('c',rid,NULL) FROM ftsdocs"
  1592      "  WHERE type='c' AND NOT idxed;"
  1593    );
1594 db_multi_exec( 1595 "UPDATE ftsdocs SET idxed=1, name=NULL," 1596 " (label,url,mtime) = " 1597 " (SELECT printf('Check-in [%%.16s] on %%s',blob.uuid," 1598 " datetime(event.mtime))," 1599 " printf('/timeline?y=ci&c=%%.20s',blob.uuid)," 1600 " event.mtime" 1601 " FROM event, blob" 1602 " WHERE event.objid=ftsdocs.rid" 1603 " AND blob.rid=ftsdocs.rid)" 1604 "WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed" 1605 );
1606 } 1607 1608 /* 1609 ** Deal with all of the unindexed 't' terms in FTSDOCS 1610 */ 1611 static void search_update_ticket_index(void){ 1612 db_multi_exec( 1613 "INSERT INTO ftsidx(docid,title,body)" 1614 " SELECT rowid, title('t',rid,NULL), body('t',rid,NULL) FROM ftsdocs" 1615 " WHERE type='t' AND NOT idxed;" 1616 ); 1617 if( db_changes()==0 ) return; 1618 db_multi_exec( 1619 "UPDATE ftsdocs SET idxed=1, name=NULL," 1620 " (label,url,mtime) =" 1621 " (SELECT printf('Ticket: %%s (%%s)',title('t',tkt_id,null)," 1622 " datetime(tkt_mtime))," 1623 " printf('/tktview/%%.20s',tkt_uuid)," 1624 " tkt_mtime" 1625 " FROM ticket" 1626 " WHERE tkt_id=ftsdocs.rid)" 1627 "WHERE ftsdocs.type='t' AND NOT ftsdocs.idxed" 1628 ); 1629 } 1630 1631 /* 1632 ** Deal with all of the unindexed 'w' terms in FTSDOCS 1633 */ 1634 static void search_update_wiki_index(void){ 1635 db_multi_exec( 1636 "INSERT INTO ftsidx(docid,title,body)" 1637 " SELECT rowid, title('w',rid,NULL),body('w',rid,NULL) FROM ftsdocs" 1638 " WHERE type='w' AND NOT idxed;" 1639 ); 1640 if( db_changes()==0 ) return; 1641 db_multi_exec( 1642 "UPDATE ftsdocs SET idxed=1," 1643 " (name,label,url,mtime) = " 1644 " (SELECT ftsdocs.name," 1645 " 'Wiki: '||ftsdocs.name," 1646 " '/wiki?name='||urlencode(ftsdocs.name)," 1647 " tagxref.mtime" 1648 " FROM tagxref WHERE tagxref.rid=ftsdocs.rid)" 1649 " WHERE ftsdocs.type='w' AND NOT ftsdocs.idxed" 1650 ); 1651 } 1652 1653 /* 1654 ** Deal with all of the unindexed entries in the FTSDOCS table - that 1655 ** is to say, all the entries with FTSDOCS.IDXED=0. Add them to the 1656 ** index. 1657 */ 1658 void search_update_index(unsigned int srchFlags){ 1659 if( !search_index_exists() ) return; 1660 if( !db_exists("SELECT 1 FROM ftsdocs WHERE NOT idxed") ) return; 1661 search_sql_setup(g.db); 1662 if( srchFlags & (SRCH_CKIN|SRCH_DOC) ){ 1663 search_update_doc_index(); 1664 search_update_checkin_index(); 1665 } 1666 if( srchFlags & SRCH_TKT ){ 1667 search_update_ticket_index(); 1668 } 1669 if( srchFlags & SRCH_WIKI ){ 1670 search_update_wiki_index(); 1671 } 1672 } 1673 1674 /* 1675 ** Construct, prepopulate, and then update the full-text index. 1676 */ 1677 void search_rebuild_index(void){ 1678 fossil_print("rebuilding the search index..."); 1679 fflush(stdout); 1680 search_create_index(); 1681 search_fill_index(); 1682 search_update_index(search_restrict(SRCH_ALL)); 1683 fossil_print(" done\n"); 1684 } 1685 1686 /* 1687 ** COMMAND: fts-config* 1688 ** 1689 ** Usage: fossil fts-config ?SUBCOMMAND? ?ARGUMENT? 1690 ** 1691 ** The "fossil fts-config" command configures the full-text search capabilities 1692 ** of the repository. Subcommands: 1693 ** 1694 ** reindex Rebuild the search index. This is a no-op if 1695 ** index search is disabled 1696 ** 1697 ** index (on|off) Turn the search index on or off 1698 ** 1699 ** enable cdtw Enable various kinds of search. c=Check-ins, 1700 ** d=Documents, t=Tickets, w=Wiki. 1701 ** 1702 ** disable cdtw Disable various kinds of search 1703 ** 1704 ** stemmer (on|off) Turn the Porter stemmer on or off for indexed 1705 ** search. (Unindexed search is never stemmed.) 1706 ** 1707 ** The current search settings are displayed after any changes are applied. 1708 ** Run this command with no arguments to simply see the settings. 1709 */ 1710 void fts_config_cmd(void){ 1711 static const struct { int iCmd; const char *z; } aCmd[] = { 1712 { 1, "reindex" }, 1713 { 2, "index" }, 1714 { 3, "disable" }, 1715 { 4, "enable" }, 1716 { 5, "stemmer" }, 1717 }; 1718 static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = { 1719 { "search-ckin", "check-in search:", "c" }, 1720 { "search-doc", "document search:", "d" }, 1721 { "search-tkt", "ticket search:", "t" }, 1722 { "search-wiki", "wiki search:", "w" }, 1723 }; 1724 char *zSubCmd = 0; 1725 int i, j, n; 1726 int iCmd = 0; 1727 int iAction = 0; 1728 db_find_and_open_repository(0, 0); 1729 if( g.argc>2 ){ 1730 zSubCmd = g.argv[2]; 1731 n = (int)strlen(zSubCmd); 1732 for(i=0; i<ArraySize(aCmd); i++){ 1733 if( fossil_strncmp(aCmd[i].z, zSubCmd, n)==0 ) break; 1734 } 1735 if( i>=ArraySize(aCmd) ){ 1736 Blob all; 1737 blob_init(&all,0,0); 1738 for(i=0; i<ArraySize(aCmd); i++) blob_appendf(&all, " %s", aCmd[i].z); 1739 fossil_fatal("unknown \"%s\" - should be on of:%s", 1740 zSubCmd, blob_str(&all)); 1741 return; 1742 } 1743 iCmd = aCmd[i].iCmd; 1744 } 1745 g.perm.Read = 1; 1746 g.perm.RdTkt = 1; 1747 g.perm.RdWiki = 1; 1748 if( iCmd==1 ){ 1749 if( search_index_exists() ) iAction = 2; 1750 } 1751 if( iCmd==2 ){ 1752 if( g.argc<3 ) usage("index (on|off)"); 1753 iAction = 1 + is_truth(g.argv[3]); 1754 } 1755 db_begin_transaction(); 1756 1757 /* Adjust search settings */ 1758 if( iCmd==3 || iCmd==4 ){ 1759 const char *zCtrl; 1760 if( g.argc<4 ) usage(mprintf("%s STRING",zSubCmd)); 1761 zCtrl = g.argv[3]; 1762 for(j=0; j<ArraySize(aSetng); j++){ 1763 if( strchr(zCtrl, aSetng[j].zSw[0])!=0 ){ 1764 db_set_int(aSetng[j].zSetting, iCmd-3, 0); 1765 } 1766 } 1767 } 1768 if( iCmd==5 ){ 1769 if( g.argc<4 ) usage("porter ON/OFF"); 1770 db_set_int("search-stemmer", is_truth(g.argv[3]), 0); 1771 } 1772 1773 1774 /* destroy or rebuild the index, if requested */ 1775 if( iAction>=1 ){ 1776 search_drop_index(); 1777 } 1778 if( iAction>=2 ){ 1779 search_rebuild_index(); 1780 } 1781 1782 /* Always show the status before ending */ 1783 for(i=0; i<ArraySize(aSetng); i++){ 1784 fossil_print("%-16s %s\n", aSetng[i].zName, 1785 db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off"); 1786 } 1787 fossil_print("%-16s %s\n", "Porter stemmer:", 1788 db_get_boolean("search-stemmer",0) ? "on" : "off"); 1789 if( search_index_exists() ){ 1790 fossil_print("%-16s enabled\n", "full-text index:"); 1791 fossil_print("%-16s %d\n", "documents:", 1792 db_int(0, "SELECT count(*) FROM ftsdocs")); 1793 }else{ 1794 fossil_print("%-16s disabled\n", "full-text index:"); 1795 } 1796 db_end_transaction(0); 1797 } 1798 1799 /* 1800 ** WEBPAGE: test-ftsdocs 1801 ** 1802 ** Show a table of all documents currently in the search index. 1803 */ 1804 void search_data_page(void){ 1805 Stmt q; 1806 const char *zId = P("id"); 1807 const char *zType = P("y"); 1808 const char *zIdxed = P("ixed"); 1809 int id; 1810 int cnt = 0; 1811 login_check_credentials(); 1812 if( !g.perm.Admin ){ login_needed(0); return; } 1813 if( !search_index_exists() ){ 1814 @ <p>Indexed search is disabled 1815 style_footer(); 1816 return; 1817 } 1818 if( zId!=0 && (id = atoi(zId))>0 ){ 1819 /* Show information about a single ftsdocs entry */ 1820 style_header("Information about ftsdoc entry %d", id); 1821 db_prepare(&q, 1822 "SELECT type||rid, name, idxed, label, url, datetime(mtime)" 1823 " FROM ftsdocs WHERE rowid=%d", id 1824 ); 1825 if( db_step(&q)==SQLITE_ROW ){ 1826 const char *zUrl = db_column_text(&q,4); 1827 @ <table border=0> 1828 @ <tr><td align='right'>rowid:<td>&nbsp;&nbsp;<td>%d(id) 1829 @ <tr><td align='right'>id:<td><td>%s(db_column_text(&q,0)) 1830 @ <tr><td align='right'>name:<td><td>%h(db_column_text(&q,1)) 1831 @ <tr><td align='right'>idxed:<td><td>%d(db_column_int(&q,2)) 1832 @ <tr><td align='right'>label:<td><td>%h(db_column_text(&q,3)) 1833 @ <tr><td align='right'>url:<td><td> 1834 @ <a href='%R%s(zUrl)'>%h(zUrl)</a> 1835 @ <tr><td align='right'>mtime:<td><td>%s(db_column_text(&q,5)) 1836 @ </table> 1837 } 1838 db_finalize(&q); 1839 style_footer(); 1840 return; 1841 } 1842 if( zType!=0 && zType[0]!=0 && zType[1]==0 && 1843 zIdxed!=0 && (zIdxed[0]=='1' || zIdxed[0]=='0') && zIdxed[1]==0 1844 ){ 1845 int ixed = zIdxed[0]=='1'; 1846 style_header("List of '%c' documents that are%s indexed", 1847 zType[0], ixed ? "" : " not"); 1848 db_prepare(&q, 1849 "SELECT rowid, type||rid ||' '|| coalesce(label,'')" 1850 " FROM ftsdocs WHERE type='%c' AND %s idxed", 1851 zType[0], ixed ? "" : "NOT" 1852 ); 1853 @ <ul> 1854 while( db_step(&q)==SQLITE_ROW ){ 1855 @ <li> <a href='test-ftsdocs?id=%d(db_column_int(&q,0))'> 1856 @ %h(db_column_text(&q,1))</a> 1857 } 1858 @ </ul> 1859 db_finalize(&q); 1860 style_footer(); 1861 return; 1862 } 1863 style_header("Summary of ftsdocs"); 1864 db_prepare(&q, 1865 "SELECT type, idxed, count(*) FROM ftsdocs" 1866 " GROUP BY 1, 2 ORDER BY 3 DESC" 1867 ); 1868 @ <table border=1 cellpadding=3 cellspacing=0> 1869 @ <thead> 1870 @ <tr><th>Type<th>Indexed?<th>Count<th>Link 1871 @ </thead> 1872 @ <tbody> 1873 while( db_step(&q)==SQLITE_ROW ){ 1874 const char *zType = db_column_text(&q,0); 1875 int idxed = db_column_int(&q,1); 1876 int n = db_column_int(&q,2); 1877 @ <tr><td>%h(zType)<td>%d(idxed) 1878 @ <td>%d(n) 1879 @ <td><a href='test-ftsdocs?y=%s(zType)&ixed=%d(idxed)'>listing</a> 1880 @ </tr> 1881 cnt += n; 1882 } 1883 @ </tbody><tfooter> 1884 @ <tr><th>Total<th><th>%d(cnt)<th> 1885 @ </tfooter> 1886 @ </table> 1887 style_footer(); 1888 }