Fossil

Check-in [68194175]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Full-text search for check-in diffs. This works, but it creates a huge index (2x the size of the BLOB table) in spite of being a contentless index. The index is slow to build because of all the diffs that must be computed. Because the index is contentless, the snippet generator runs very slowly on queries - a typical query with a couple hundred hits takes several minutes.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | diff-search
Files: files | file ages | folders
SHA1:68194175fb4ccf1b036c8ddc2da912f5dd7e46b5
User & Date: drh 2016-06-25 03:56:04
Context
2016-06-25
03:56
Full-text search for check-in diffs. This works, but it creates a huge index (2x the size of the BLOB table) in spite of being a contentless index. The index is slow to build because of all the diffs that must be computed. Because the index is contentless, the snippet generator runs very slowly on queries - a typical query with a couple hundred hits takes several minutes. Leaf check-in: 68194175 user: drh tags: diff-search
2016-06-24
03:36
If the FOSSIL_SECURITY_LEVEL environment variable is 2 or more, then present a simple substitution matrix when entering passwords, as a defense against key loggers. For FOSSIL_SECURITY_LEVEL of 1 or more, do not remember the remote-url password. check-in: e1034c4c user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/diff.c.

38
39
40
41
42
43
44

45
46
47
48
49
50
51
....
1802
1803
1804
1805
1806
1807
1808


1809
1810
1811
1812
1813
1814
1815
....
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
#define DIFF_HTML         ((u64)0x20000000) /* Render for HTML */
#define DIFF_LINENO       ((u64)0x40000000) /* Show line numbers */
#define DIFF_NOOPT        (((u64)0x01)<<32) /* Suppress optimizations (debug) */
#define DIFF_INVERT       (((u64)0x02)<<32) /* Invert the diff (debug) */
#define DIFF_CONTEXT_EX   (((u64)0x04)<<32) /* Use context even if zero */
#define DIFF_NOTTOOBIG    (((u64)0x08)<<32) /* Only display if not too big */
#define DIFF_STRIP_EOLCR  (((u64)0x10)<<32) /* Strip trailing CR */


/*
** These error messages are shared in multiple locations.  They are defined
** here for consistency.
*/
#define DIFF_CANNOT_COMPUTE_BINARY \
    "cannot compute difference between binary files\n"
................................................................................
  return w;
}

/*
** Append the error message to pOut.
*/
void diff_errmsg(Blob *pOut, const char *msg, int diffFlags){


  if( diffFlags & DIFF_HTML ){
    blob_appendf(pOut, "<p class=\"generalError\">%s</p>", msg);
  }else{
    blob_append(pOut, msg, -1);
  }
}

................................................................................
  c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
                             &c.nFrom, diffFlags);
  c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
                           &c.nTo, diffFlags);
  if( c.aFrom==0 || c.aTo==0 ){
    fossil_free(c.aFrom);
    fossil_free(c.aTo);
    if( pOut ){
      diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, diffFlags);
    }
    return 0;
  }

  /* Compute the difference */
  diff_all(&c);
  if( ignoreWs && c.nEdit==6 && c.aEdit[1]==0 && c.aEdit[2]==0 ){
    fossil_free(c.aFrom);
    fossil_free(c.aTo);
    fossil_free(c.aEdit);
    if( pOut ) diff_errmsg(pOut, DIFF_WHITESPACE_ONLY, diffFlags);
    return 0;
  }
  if( (diffFlags & DIFF_NOTTOOBIG)!=0 ){
    int i, m, n;
    int *a = c.aEdit;
    int mx = c.nEdit;
    for(i=m=n=0; i<mx; i+=3){ m += a[i]; n += a[i+1]+a[i+2]; }
    if( n>10000 ){
      fossil_free(c.aFrom);
      fossil_free(c.aTo);
      fossil_free(c.aEdit);
      if( pOut ) diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, diffFlags);
      return 0;
    }
  }
  if( (diffFlags & DIFF_NOOPT)==0 ){
    diff_optimize(&c);
  }








>







 







>
>







 







<
|
<









|











|







38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
....
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
....
1859
1860
1861
1862
1863
1864
1865

1866

1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
#define DIFF_HTML         ((u64)0x20000000) /* Render for HTML */
#define DIFF_LINENO       ((u64)0x40000000) /* Show line numbers */
#define DIFF_NOOPT        (((u64)0x01)<<32) /* Suppress optimizations (debug) */
#define DIFF_INVERT       (((u64)0x02)<<32) /* Invert the diff (debug) */
#define DIFF_CONTEXT_EX   (((u64)0x04)<<32) /* Use context even if zero */
#define DIFF_NOTTOOBIG    (((u64)0x08)<<32) /* Only display if not too big */
#define DIFF_STRIP_EOLCR  (((u64)0x10)<<32) /* Strip trailing CR */
#define DIFF_NO_ERRMSG    (((u64)0x20)<<32) /* Do not generate error messages */

/*
** These error messages are shared in multiple locations.  They are defined
** here for consistency.
*/
#define DIFF_CANNOT_COMPUTE_BINARY \
    "cannot compute difference between binary files\n"
................................................................................
  return w;
}

/*
** Append the error message to pOut.
*/
void diff_errmsg(Blob *pOut, const char *msg, int diffFlags){
  if( pOut==0 ) return;
  if( diffFlags & DIFF_NO_ERRMSG ) return;
  if( diffFlags & DIFF_HTML ){
    blob_appendf(pOut, "<p class=\"generalError\">%s</p>", msg);
  }else{
    blob_append(pOut, msg, -1);
  }
}

................................................................................
  c.aFrom = break_into_lines(blob_str(pA_Blob), blob_size(pA_Blob),
                             &c.nFrom, diffFlags);
  c.aTo = break_into_lines(blob_str(pB_Blob), blob_size(pB_Blob),
                           &c.nTo, diffFlags);
  if( c.aFrom==0 || c.aTo==0 ){
    fossil_free(c.aFrom);
    fossil_free(c.aTo);

    diff_errmsg(pOut, DIFF_CANNOT_COMPUTE_BINARY, diffFlags);

    return 0;
  }

  /* Compute the difference */
  diff_all(&c);
  if( ignoreWs && c.nEdit==6 && c.aEdit[1]==0 && c.aEdit[2]==0 ){
    fossil_free(c.aFrom);
    fossil_free(c.aTo);
    fossil_free(c.aEdit);
    diff_errmsg(pOut, DIFF_WHITESPACE_ONLY, diffFlags);
    return 0;
  }
  if( (diffFlags & DIFF_NOTTOOBIG)!=0 ){
    int i, m, n;
    int *a = c.aEdit;
    int mx = c.nEdit;
    for(i=m=n=0; i<mx; i+=3){ m += a[i]; n += a[i+1]+a[i+2]; }
    if( n>10000 ){
      fossil_free(c.aFrom);
      fossil_free(c.aTo);
      fossil_free(c.aEdit);
      diff_errmsg(pOut, DIFF_TOO_MANY_CHANGES, diffFlags);
      return 0;
    }
  }
  if( (diffFlags & DIFF_NOOPT)==0 ){
    diff_optimize(&c);
  }

Changes to src/diffcmd.c.

925
926
927
928
929
930
931













































































  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  if( zFrom==0 || zTo==0 ) fossil_redirect_home();

  cgi_set_content_type("text/plain");
  diff_two_versions(zFrom, zTo, 0, 0, 0, DIFF_VERBOSE, 0);
}




















































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  if( zFrom==0 || zTo==0 ) fossil_redirect_home();

  cgi_set_content_type("text/plain");
  diff_two_versions(zFrom, zTo, 0, 0, 0, DIFF_VERBOSE, 0);
}

/*
** Compute a string (into pOut) that is a diff of check-in "rid" and its
** primary parent.  The diff has no context lines and is designed to support
** full-text search.
*/
void diff_for_search(int rid, Blob *pOut){
  Manifest *pFrom, *pTo;
  ManifestFile *pFromFile, *pToFile;
  const u64 diffFlags = DIFF_NO_ERRMSG | DIFF_IGNORE_ALLWS | DIFF_CONTEXT_EX;

  pTo = manifest_get(rid, CFTYPE_MANIFEST, 0);
  if( pTo==0 ) return;
  if( pTo->nParent==0 ){
    manifest_destroy(pTo);
    return;
  }
  pFrom = manifest_get_by_name(pTo->azParent[0], 0);
  manifest_file_rewind(pFrom);
  manifest_file_rewind(pTo);
  pFromFile = manifest_file_next(pFrom,0);
  pToFile = manifest_file_next(pTo,0);

  while( pFromFile || pToFile ){
    int cmp;
    if( pFromFile==0 ){
      cmp = +1;
    }else if( pToFile==0 ){
      cmp = -1;
    }else{
      cmp = fossil_strcmp(pFromFile->zName, pToFile->zName);
    }
    if( cmp<0 ){
      blob_appendf(pOut, "DELETED %s\n", pFromFile->zName);
      pFromFile = manifest_file_next(pFrom,0);
    }else if( cmp>0 ){
      blob_appendf(pOut, "ADDED   %s\n", pToFile->zName);
      pToFile = manifest_file_next(pTo,0);
    }else if( fossil_strcmp(pFromFile->zUuid, pToFile->zUuid)==0 ){
      /* No changes */
      pFromFile = manifest_file_next(pFrom,0);
      pToFile = manifest_file_next(pTo,0);
    }else{
      Blob fileA, fileB;
      blob_appendf(pOut, "CHANGED %s\n", pToFile->zName);
      rid = uuid_to_rid(pFromFile->zUuid, 0);
      content_get(rid, &fileA);
      rid = uuid_to_rid(pToFile->zUuid, 0);
      content_get(rid, &fileB);
      text_diff(&fileA, &fileB, pOut, 0, diffFlags);
      blob_reset(&fileA);
      blob_reset(&fileB);
      pFromFile = manifest_file_next(pFrom,0);
      pToFile = manifest_file_next(pTo,0);
    }
  }
  manifest_destroy(pFrom);
  manifest_destroy(pTo);
}

/*
** Implement the diff(RID) SQL function that returns a text diff for
** check-in RID.
*/
void diff_sqlfunc(
  sqlite3_context *context,
  int argc,
  sqlite3_value **argv
){
  int rid;
  Blob out;
  rid = sqlite3_value_int(argv[0]);
  blob_init(&out, 0, 0);
  diff_for_search(rid, &out);
  sqlite3_result_text(context, blob_str(&out), blob_size(&out), SQLITE_TRANSIENT);
  blob_reset(&out);
}

Changes to src/search.c.

508
509
510
511
512
513
514


515
516
517
518
519
520
521
...
598
599
600
601
602
603
604

605
606
607
608
609
610
611
612
...
613
614
615
616
617
618
619

620
621
622
623
624
625
626
627
628
629
...
839
840
841
842
843
844
845

846
847
848
849
850
851
852
...
986
987
988
989
990
991
992

993
994
995
996
997
998
999
....
1010
1011
1012
1013
1014
1015
1016

1017
1018
1019
1020
1021
1022
1023
....
1061
1062
1063
1064
1065
1066
1067

1068
1069
1070
1071
1072
1073
1074
....
1167
1168
1169
1170
1171
1172
1173

1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186

1187
1188
1189
1190
1191
1192
1193
....
1199
1200
1201
1202
1203
1204
1205




1206
1207
1208
1209
1210
1211
1212
....
1409
1410
1411
1412
1413
1414
1415




1416
1417
1418
1419
1420
1421
1422
....
1543
1544
1545
1546
1547
1548
1549






















1550
1551
1552
1553
1554
1555
1556
....
1602
1603
1604
1605
1606
1607
1608



1609
1610
1611
1612
1613
1614
1615
....
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
....
1655
1656
1657
1658
1659
1660
1661
1662

1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
....
1681
1682
1683
1684
1685
1686
1687


1688
1689
1690
1691
1692
1693
1694
....
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
     search_stext_sqlfunc, 0, 0);
  sqlite3_create_function(db, "title", 3, SQLITE_UTF8, 0,
     search_title_sqlfunc, 0, 0);
  sqlite3_create_function(db, "body", 3, SQLITE_UTF8, 0,
     search_body_sqlfunc, 0, 0);
  sqlite3_create_function(db, "urlencode", 1, SQLITE_UTF8, 0,
     search_urlencode_sqlfunc, 0, 0);


}

/*
** Testing the search function.
**
** COMMAND: search*
**
................................................................................

#if INTERFACE
/* What to search for */
#define SRCH_CKIN   0x0001    /* Search over check-in comments */
#define SRCH_DOC    0x0002    /* Search over embedded documents */
#define SRCH_TKT    0x0004    /* Search over tickets */
#define SRCH_WIKI   0x0008    /* Search over wiki */

#define SRCH_ALL    0x000f    /* Search over everything */
#endif

/*
** Remove bits from srchFlags which are disallowed by either the
** current server configuration or by user permissions.
*/
unsigned int search_restrict(unsigned int srchFlags){
................................................................................
  static unsigned int knownGood = 0;
  static unsigned int knownBad = 0;
  static const struct { unsigned m; const char *zKey; } aSetng[] = {
     { SRCH_CKIN,   "search-ci"   },
     { SRCH_DOC,    "search-doc"  },
     { SRCH_TKT,    "search-tkt"  },
     { SRCH_WIKI,   "search-wiki" },

  };
  int i;
  if( g.perm.Read==0 )   srchFlags &= ~(SRCH_CKIN|SRCH_DOC);
  if( g.perm.RdTkt==0 )  srchFlags &= ~(SRCH_TKT);
  if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI);
  for(i=0; i<ArraySize(aSetng); i++){
    unsigned int m = aSetng[i].m;
    if( (srchFlags & m)==0 ) continue;
    if( ((knownGood|knownBad) & m)!=0 ) continue;
    if( db_get_boolean(aSetng[i].zKey,0) ){
................................................................................
  if( srchFlags!=SRCH_ALL ){
    const char *zSep = " AND (";
    static const struct { unsigned m; char c; } aMask[] = {
       { SRCH_CKIN,  'c' },
       { SRCH_DOC,   'd' },
       { SRCH_TKT,   't' },
       { SRCH_WIKI,  'w' },

    };
    int i;
    for(i=0; i<ArraySize(aMask); i++){
      if( srchFlags & aMask[i].m ){
        blob_appendf(&sql, "%sftsdocs.type='%c'", zSep, aMask[i].c);
        zSep = " OR ";
      }
................................................................................
  int fDebug = PB("debug");
  srchFlags = search_restrict(srchFlags);
  switch( srchFlags ){
    case SRCH_CKIN:  zType = " Check-ins";  zClass = "Ckin";  break;
    case SRCH_DOC:   zType = " Docs";       zClass = "Doc";   break;
    case SRCH_TKT:   zType = " Tickets";    zClass = "Tkt";   break;
    case SRCH_WIKI:  zType = " Wiki";       zClass = "Wiki";  break;

  }
  if( srchFlags==0 ){
    zDisable1 = " disabled";
    zDisable2 = " disabled";
    zPattern = "";
  }else{
    zDisable1 = " autofocus";
................................................................................
  if( useYparam && (srchFlags & (srchFlags-1))!=0 && useYparam ){
    static const struct { char *z; char *zNm; unsigned m; } aY[] = {
       { "all",  "All",        SRCH_ALL  },
       { "c",    "Check-ins",  SRCH_CKIN },
       { "d",    "Docs",       SRCH_DOC  },
       { "t",    "Tickets",    SRCH_TKT  },
       { "w",    "Wiki",       SRCH_WIKI },

    };
    const char *zY = PD("y","all");
    unsigned newFlags = srchFlags;
    int i;
    @ <select size='1' name='y'>
    for(i=0; i<ArraySize(aY); i++){
      if( (aY[i].m & srchFlags)==0 ) continue;
................................................................................
**
**    s=PATTERN       Specify the full-text pattern to search for
**    y=TYPE          What to search.
**                      c -> check-ins
**                      d -> documentation
**                      t -> tickets
**                      w -> wiki

**                    all -> everything
*/
void search_page(void){
  login_check_credentials();
  style_header("Search");
  search_screen(SRCH_ALL, 1);
  style_footer();
................................................................................
** Return "search text" - a reduced version of a document appropriate for
** full text search and/or for constructing a search result snippet.
**
**    cType:            d      Embedded documentation
**                      w      Wiki page
**                      c      Check-in comment
**                      t      Ticket text

**
**    rid               The RID of an artifact that defines the object
**                      being searched.
**
**    zName             Name of the object being searched.
*/
void search_stext(
  char cType,            /* Type of document */
  int rid,               /* BLOB.RID or TAG.TAGID value for document */
  const char *zName,     /* Auxiliary information */
  Blob *pOut             /* OUT: Initialize to the search text */
){
  blob_init(pOut, 0, 0);

  switch( cType ){
    case 'd': {   /* Documents */
      Blob doc;
      content_get(rid, &doc);
      blob_to_utf8_no_bom(&doc, 0);
      get_stext_by_mimetype(&doc, mimetype_from_name(zName), pOut);
      blob_reset(&doc);
................................................................................
      if( pWiki==0 ) break;
      blob_init(&wiki, pWiki->zWiki, -1);
      get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
                            pOut);
      blob_reset(&wiki);
      manifest_destroy(pWiki);
      break;




    }
    case 'c': {   /* Check-in Comments */
      static Stmt q;
      static int isPlainText = -1;
      db_static_prepare(&q,
         "SELECT coalesce(ecomment,comment)"
         "  ||' (user: '||coalesce(euser,user,'?')"
................................................................................
void search_fill_index(void){
  if( !search_index_exists() ) return;
  search_sql_setup(g.db);
  db_multi_exec(
    "INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
    "  SELECT 'c', objid, 0 FROM event WHERE type='ci';"
  );




  db_multi_exec(
    "WITH latest_wiki(rid,name,mtime) AS ("
    "  SELECT tagxref.rid, substr(tag.tagname,6), max(tagxref.mtime)"
    "    FROM tag, tagxref"
    "   WHERE tag.tagname GLOB 'wiki-*'"
    "     AND tagxref.tagid=tag.tagid"
    "     AND tagxref.value>0"
................................................................................
    "    event.mtime"
    "  FROM ftsdocs, event, blob"
    "  WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed"
    "    AND event.objid=ftsdocs.rid"
    "    AND blob.rid=ftsdocs.rid"
  );
}























/*
** Deal with all of the unindexed 't' terms in FTSDOCS
*/
static void search_update_ticket_index(void){
  db_multi_exec(
    "INSERT INTO ftsidx(docid,title,body)"
................................................................................
  if( !search_index_exists() ) return;
  if( !db_exists("SELECT 1 FROM ftsdocs WHERE NOT idxed") ) return;
  search_sql_setup(g.db);
  if( srchFlags & (SRCH_CKIN|SRCH_DOC) ){
    search_update_doc_index();
    search_update_checkin_index();
  }



  if( srchFlags & SRCH_TKT ){
    search_update_ticket_index();
  }
  if( srchFlags & SRCH_WIKI ){
    search_update_wiki_index();
  }
}
................................................................................
** of the repository.  Subcommands:
**
**     reindex            Rebuild the search index.  This is a no-op if
**                        index search is disabled
**
**     index (on|off)     Turn the search index on or off
**
**     enable cdtw        Enable various kinds of search. c=Check-ins,
**                        d=Documents, t=Tickets, w=Wiki.
**
**     disable cdtw       Disable various kinds of search
**
**     stemmer (on|off)   Turn the Porter stemmer on or off for indexed
**                        search.  (Unindexed search is never stemmed.)
**
** The current search settings are displayed after any changes are applied.
** Run this command with no arguments to simply see the settings.
*/
................................................................................
     { 1,  "reindex"  },
     { 2,  "index"    },
     { 3,  "disable"  },
     { 4,  "enable"   },
     { 5,  "stemmer"  },
  };
  static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
     { "search-ckin",   "check-in search:",  "c" },

     { "search-doc",    "document search:",  "d" },
     { "search-tkt",    "ticket search:",    "t" },
     { "search-wiki",   "wiki search:",      "w" },
  };
  char *zSubCmd = 0;
  int i, j, n;
  int iCmd = 0;
  int iAction = 0;
  db_find_and_open_repository(0, 0);
  if( g.argc>2 ){
................................................................................
      for(i=0; i<ArraySize(aCmd); i++) blob_appendf(&all, " %s", aCmd[i].z);
      fossil_fatal("unknown \"%s\" - should be on of:%s",
                   zSubCmd, blob_str(&all));
      return;
    }
    iCmd = aCmd[i].iCmd;
  }


  if( iCmd==1 ){
    if( search_index_exists() ) iAction = 2;
  }
  if( iCmd==2 ){
    if( g.argc<3 ) usage("index (on|off)");
    iAction = 1 + is_truth(g.argv[3]);
  }
................................................................................
  }
  if( iAction>=2 ){
    search_rebuild_index();
  }

  /* Always show the status before ending */
  for(i=0; i<ArraySize(aSetng); i++){
    fossil_print("%-16s %s\n", aSetng[i].zName,
       db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
  }
  fossil_print("%-16s %s\n", "Porter stemmer:",
       db_get_boolean("search-stemmer",0) ? "on" : "off");
  if( search_index_exists() ){
    fossil_print("%-16s enabled\n", "full-text index:");
    fossil_print("%-16s %d\n", "documents:",
       db_int(0, "SELECT count(*) FROM ftsdocs"));
  }else{
    fossil_print("%-16s disabled\n", "full-text index:");
  }
  db_end_transaction(0);
}







>
>







 







>
|







 







>


|







 







>







 







>







 







>







 







>







 







>













>







 







>
>
>
>







 







>
>
>
>







 







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







 







>
>
>







 







|
|

|







 







|
>
|
|
|







 







>
>







 







|


|


|
|


|



508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
...
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
...
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
...
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
...
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
....
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
....
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
....
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
....
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
....
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
....
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
....
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
....
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
....
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
....
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
....
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
     search_stext_sqlfunc, 0, 0);
  sqlite3_create_function(db, "title", 3, SQLITE_UTF8, 0,
     search_title_sqlfunc, 0, 0);
  sqlite3_create_function(db, "body", 3, SQLITE_UTF8, 0,
     search_body_sqlfunc, 0, 0);
  sqlite3_create_function(db, "urlencode", 1, SQLITE_UTF8, 0,
     search_urlencode_sqlfunc, 0, 0);
  sqlite3_create_function(db, "diff", 1, SQLITE_UTF8, 0,
     diff_sqlfunc, 0, 0);
}

/*
** Testing the search function.
**
** COMMAND: search*
**
................................................................................

#if INTERFACE
/* What to search for */
#define SRCH_CKIN   0x0001    /* Search over check-in comments */
#define SRCH_DOC    0x0002    /* Search over embedded documents */
#define SRCH_TKT    0x0004    /* Search over tickets */
#define SRCH_WIKI   0x0008    /* Search over wiki */
#define SRCH_DIFF   0x0010    /* Search check-in diffs */
#define SRCH_ALL    0x001f    /* Search over everything */
#endif

/*
** Remove bits from srchFlags which are disallowed by either the
** current server configuration or by user permissions.
*/
unsigned int search_restrict(unsigned int srchFlags){
................................................................................
  static unsigned int knownGood = 0;
  static unsigned int knownBad = 0;
  static const struct { unsigned m; const char *zKey; } aSetng[] = {
     { SRCH_CKIN,   "search-ci"   },
     { SRCH_DOC,    "search-doc"  },
     { SRCH_TKT,    "search-tkt"  },
     { SRCH_WIKI,   "search-wiki" },
     { SRCH_DIFF,   "search-diff" },
  };
  int i;
  if( g.perm.Read==0 )   srchFlags &= ~(SRCH_CKIN|SRCH_DOC|SRCH_DIFF);
  if( g.perm.RdTkt==0 )  srchFlags &= ~(SRCH_TKT);
  if( g.perm.RdWiki==0 ) srchFlags &= ~(SRCH_WIKI);
  for(i=0; i<ArraySize(aSetng); i++){
    unsigned int m = aSetng[i].m;
    if( (srchFlags & m)==0 ) continue;
    if( ((knownGood|knownBad) & m)!=0 ) continue;
    if( db_get_boolean(aSetng[i].zKey,0) ){
................................................................................
  if( srchFlags!=SRCH_ALL ){
    const char *zSep = " AND (";
    static const struct { unsigned m; char c; } aMask[] = {
       { SRCH_CKIN,  'c' },
       { SRCH_DOC,   'd' },
       { SRCH_TKT,   't' },
       { SRCH_WIKI,  'w' },
       { SRCH_DIFF,  'x' },
    };
    int i;
    for(i=0; i<ArraySize(aMask); i++){
      if( srchFlags & aMask[i].m ){
        blob_appendf(&sql, "%sftsdocs.type='%c'", zSep, aMask[i].c);
        zSep = " OR ";
      }
................................................................................
  int fDebug = PB("debug");
  srchFlags = search_restrict(srchFlags);
  switch( srchFlags ){
    case SRCH_CKIN:  zType = " Check-ins";  zClass = "Ckin";  break;
    case SRCH_DOC:   zType = " Docs";       zClass = "Doc";   break;
    case SRCH_TKT:   zType = " Tickets";    zClass = "Tkt";   break;
    case SRCH_WIKI:  zType = " Wiki";       zClass = "Wiki";  break;
    case SRCH_DIFF:  zType = " Diffs";      zClass = "Diff";  break;
  }
  if( srchFlags==0 ){
    zDisable1 = " disabled";
    zDisable2 = " disabled";
    zPattern = "";
  }else{
    zDisable1 = " autofocus";
................................................................................
  if( useYparam && (srchFlags & (srchFlags-1))!=0 && useYparam ){
    static const struct { char *z; char *zNm; unsigned m; } aY[] = {
       { "all",  "All",        SRCH_ALL  },
       { "c",    "Check-ins",  SRCH_CKIN },
       { "d",    "Docs",       SRCH_DOC  },
       { "t",    "Tickets",    SRCH_TKT  },
       { "w",    "Wiki",       SRCH_WIKI },
       { "z",    "Diff",       SRCH_DIFF },
    };
    const char *zY = PD("y","all");
    unsigned newFlags = srchFlags;
    int i;
    @ <select size='1' name='y'>
    for(i=0; i<ArraySize(aY); i++){
      if( (aY[i].m & srchFlags)==0 ) continue;
................................................................................
**
**    s=PATTERN       Specify the full-text pattern to search for
**    y=TYPE          What to search.
**                      c -> check-ins
**                      d -> documentation
**                      t -> tickets
**                      w -> wiki
**                      x -> diff
**                    all -> everything
*/
void search_page(void){
  login_check_credentials();
  style_header("Search");
  search_screen(SRCH_ALL, 1);
  style_footer();
................................................................................
** Return "search text" - a reduced version of a document appropriate for
** full text search and/or for constructing a search result snippet.
**
**    cType:            d      Embedded documentation
**                      w      Wiki page
**                      c      Check-in comment
**                      t      Ticket text
**                      x      Check-in diffs
**
**    rid               The RID of an artifact that defines the object
**                      being searched.
**
**    zName             Name of the object being searched.
*/
void search_stext(
  char cType,            /* Type of document */
  int rid,               /* BLOB.RID or TAG.TAGID value for document */
  const char *zName,     /* Auxiliary information */
  Blob *pOut             /* OUT: Initialize to the search text */
){
  blob_init(pOut, 0, 0);
  /* printf("stext(%c%d)\n", cType, rid); fflush(stdout); */
  switch( cType ){
    case 'd': {   /* Documents */
      Blob doc;
      content_get(rid, &doc);
      blob_to_utf8_no_bom(&doc, 0);
      get_stext_by_mimetype(&doc, mimetype_from_name(zName), pOut);
      blob_reset(&doc);
................................................................................
      if( pWiki==0 ) break;
      blob_init(&wiki, pWiki->zWiki, -1);
      get_stext_by_mimetype(&wiki, wiki_filter_mimetypes(pWiki->zMimetype),
                            pOut);
      blob_reset(&wiki);
      manifest_destroy(pWiki);
      break;
    }
    case 'x': {   /* Check-in diff */
      diff_for_search(rid, pOut);
      break;
    }
    case 'c': {   /* Check-in Comments */
      static Stmt q;
      static int isPlainText = -1;
      db_static_prepare(&q,
         "SELECT coalesce(ecomment,comment)"
         "  ||' (user: '||coalesce(euser,user,'?')"
................................................................................
void search_fill_index(void){
  if( !search_index_exists() ) return;
  search_sql_setup(g.db);
  db_multi_exec(
    "INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
    "  SELECT 'c', objid, 0 FROM event WHERE type='ci';"
  );
  db_multi_exec(
    "INSERT OR IGNORE INTO ftsdocs(type,rid,idxed)"
    "  SELECT 'x', objid, 0 FROM event WHERE type='ci';"
  );
  db_multi_exec(
    "WITH latest_wiki(rid,name,mtime) AS ("
    "  SELECT tagxref.rid, substr(tag.tagname,6), max(tagxref.mtime)"
    "    FROM tag, tagxref"
    "   WHERE tag.tagname GLOB 'wiki-*'"
    "     AND tagxref.tagid=tag.tagid"
    "     AND tagxref.value>0"
................................................................................
    "    event.mtime"
    "  FROM ftsdocs, event, blob"
    "  WHERE ftsdocs.type='c' AND NOT ftsdocs.idxed"
    "    AND event.objid=ftsdocs.rid"
    "    AND blob.rid=ftsdocs.rid"
  );
}

/*
** Deal with all of the unindexed 'x' terms in FTSDOCS
*/
static void search_update_diff_index(void){
  db_multi_exec(
    "INSERT INTO ftsidx(docid,title,body)"
    " SELECT rowid, '', body('x',rid,NULL) FROM ftsdocs"
    "  WHERE type='x' AND NOT idxed;"
  );
  db_multi_exec(
    "REPLACE INTO ftsdocs(rowid,idxed,type,rid,name,label,url,mtime)"
    "  SELECT ftsdocs.rowid, 1, 'x', ftsdocs.rid, NULL,"
    "    printf('Check-in [%%.16s] on %%s',blob.uuid,datetime(event.mtime)),"
    "    printf('/info/%%.20s',blob.uuid),"
    "    event.mtime"
    "  FROM ftsdocs, event, blob"
    "  WHERE ftsdocs.type='x' AND NOT ftsdocs.idxed"
    "    AND event.objid=ftsdocs.rid"
    "    AND blob.rid=ftsdocs.rid"
  );
}

/*
** Deal with all of the unindexed 't' terms in FTSDOCS
*/
static void search_update_ticket_index(void){
  db_multi_exec(
    "INSERT INTO ftsidx(docid,title,body)"
................................................................................
  if( !search_index_exists() ) return;
  if( !db_exists("SELECT 1 FROM ftsdocs WHERE NOT idxed") ) return;
  search_sql_setup(g.db);
  if( srchFlags & (SRCH_CKIN|SRCH_DOC) ){
    search_update_doc_index();
    search_update_checkin_index();
  }
  if( srchFlags & SRCH_DIFF ){
    search_update_diff_index();
  }
  if( srchFlags & SRCH_TKT ){
    search_update_ticket_index();
  }
  if( srchFlags & SRCH_WIKI ){
    search_update_wiki_index();
  }
}
................................................................................
** of the repository.  Subcommands:
**
**     reindex            Rebuild the search index.  This is a no-op if
**                        index search is disabled
**
**     index (on|off)     Turn the search index on or off
**
**     enable cdtwx       Enable various kinds of search. c=Check-ins,
**                        d=Documents, t=Tickets, w=Wiki, x=Diffs.
**
**     disable cdtwx      Disable various kinds of search
**
**     stemmer (on|off)   Turn the Porter stemmer on or off for indexed
**                        search.  (Unindexed search is never stemmed.)
**
** The current search settings are displayed after any changes are applied.
** Run this command with no arguments to simply see the settings.
*/
................................................................................
     { 1,  "reindex"  },
     { 2,  "index"    },
     { 3,  "disable"  },
     { 4,  "enable"   },
     { 5,  "stemmer"  },
  };
  static const struct { char *zSetting; char *zName; char *zSw; } aSetng[] = {
     { "search-ckin",   "check-in comment search:",  "c" },
     { "search-diff",   "check-in diff search:",     "x" },
     { "search-doc",    "document search:",          "d" },
     { "search-tkt",    "ticket search:",            "t" },
     { "search-wiki",   "wiki search:",              "w" },
  };
  char *zSubCmd = 0;
  int i, j, n;
  int iCmd = 0;
  int iAction = 0;
  db_find_and_open_repository(0, 0);
  if( g.argc>2 ){
................................................................................
      for(i=0; i<ArraySize(aCmd); i++) blob_appendf(&all, " %s", aCmd[i].z);
      fossil_fatal("unknown \"%s\" - should be on of:%s",
                   zSubCmd, blob_str(&all));
      return;
    }
    iCmd = aCmd[i].iCmd;
  }
  login_set_capabilities("s",0);
  g.zTop = "";
  if( iCmd==1 ){
    if( search_index_exists() ) iAction = 2;
  }
  if( iCmd==2 ){
    if( g.argc<3 ) usage("index (on|off)");
    iAction = 1 + is_truth(g.argv[3]);
  }
................................................................................
  }
  if( iAction>=2 ){
    search_rebuild_index();
  }

  /* Always show the status before ending */
  for(i=0; i<ArraySize(aSetng); i++){
    fossil_print("%-25s %s\n", aSetng[i].zName,
       db_get_boolean(aSetng[i].zSetting,0) ? "on" : "off");
  }
  fossil_print("%-25s %s\n", "Porter stemmer:",
       db_get_boolean("search-stemmer",0) ? "on" : "off");
  if( search_index_exists() ){
    fossil_print("%-25s enabled\n", "full-text index:");
    fossil_print("%-25s %d\n", "documents:",
       db_int(0, "SELECT count(*) FROM ftsdocs"));
  }else{
    fossil_print("%-25s disabled\n", "full-text index:");
  }
  db_end_transaction(0);
}

Changes to src/setup.c.

2153
2154
2155
2156
2157
2158
2159


2160
2161
2162
2163
2164
2165
2166
  @ <hr />
  entry_attribute("Document Branch", 20, "doc-branch", "db", "trunk", 0);
  @ <p>When searching documents, use the versions of the files found at the
  @ type of the "Document Branch" branch.  Recommended value: "trunk".
  @ Document search is disabled if blank.
  @ <hr />
  onoff_attribute("Search Check-in Comments", "search-ci", "sc", 0, 0);


  @ <br />
  onoff_attribute("Search Documents", "search-doc", "sd", 0, 0);
  @ <br />
  onoff_attribute("Search Tickets", "search-tkt", "st", 0, 0);
  @ <br />
  onoff_attribute("Search Wiki","search-wiki", "sw", 0, 0);
  @ <hr />







>
>







2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
  @ <hr />
  entry_attribute("Document Branch", 20, "doc-branch", "db", "trunk", 0);
  @ <p>When searching documents, use the versions of the files found at the
  @ type of the "Document Branch" branch.  Recommended value: "trunk".
  @ Document search is disabled if blank.
  @ <hr />
  onoff_attribute("Search Check-in Comments", "search-ci", "sc", 0, 0);
  @ <br />
  onoff_attribute("Search Check-in Diffs","search-diff", "sx", 0, 0);
  @ <br />
  onoff_attribute("Search Documents", "search-doc", "sd", 0, 0);
  @ <br />
  onoff_attribute("Search Tickets", "search-tkt", "st", 0, 0);
  @ <br />
  onoff_attribute("Search Wiki","search-wiki", "sw", 0, 0);
  @ <hr />