Fossil

Check-in [cefadbd5]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Merge the /artifact_size_stats page into the /artifact_stats page. Make that page accessible to anybody with check-in privilege.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA3-256:cefadbd5ce4dba55d73f5819cd224476d685d992dadab910559b283455c03720
User & Date: drh 2017-12-11 14:52:47
Context
2017-12-11
15:21
Add links to /bigbloblist from the /artifact_stats page. check-in: 9fc51940 user: drh tags: trunk
14:52
Merge the /artifact_size_stats page into the /artifact_stats page. Make that page accessible to anybody with check-in privilege. check-in: cefadbd5 user: drh tags: trunk
2017-12-10
02:30
Add the experimental /artifact_size_stats webpage. There are no links to this page, yet. check-in: 4439f15d user: drh tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to src/name.c.

1021
1022
1023
1024
1025
1026
1027


1028
1029
1030
1031
1032
1033
1034
....
1095
1096
1097
1098
1099
1100
1101


1102
1103
1104
1105
1106
1107
1108

  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  style_header("List Of Artifacts");
  style_submenu_element("250 Largest", "bigbloblist");
  if( g.perm.Admin ){
    style_submenu_element("Artifact Log", "rcvfromlist");


    style_submenu_element("Artifact Stats", "artifact_stats");
  }
  if( !unpubOnly && mx>n && P("s")==0 ){
    int i;
    @ <p>Select a range of artifacts to view:</p>
    @ <ul>
    for(i=1; i<=mx; i+=n){
................................................................................
  Stmt q;
  int n = atoi(PD("n","250"));

  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  if( g.perm.Admin ){
    style_submenu_element("Artifact Log", "rcvfromlist");


    style_submenu_element("Artifact Stats", "artifact_stats");
  }
  style_submenu_element("All Artifacts", "bloblist");
  style_header("%d Largest Artifacts", n);
  db_multi_exec(
    "CREATE TEMP TABLE toshow(rid INTEGER PRIMARY KEY);"
    "INSERT INTO toshow(rid)"







>
>







 







>
>







1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
....
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112

  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  style_header("List Of Artifacts");
  style_submenu_element("250 Largest", "bigbloblist");
  if( g.perm.Admin ){
    style_submenu_element("Artifact Log", "rcvfromlist");
  }
  if( g.perm.Write ){
    style_submenu_element("Artifact Stats", "artifact_stats");
  }
  if( !unpubOnly && mx>n && P("s")==0 ){
    int i;
    @ <p>Select a range of artifacts to view:</p>
    @ <ul>
    for(i=1; i<=mx; i+=n){
................................................................................
  Stmt q;
  int n = atoi(PD("n","250"));

  login_check_credentials();
  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }
  if( g.perm.Admin ){
    style_submenu_element("Artifact Log", "rcvfromlist");
  }
  if( g.perm.Write ){
    style_submenu_element("Artifact Stats", "artifact_stats");
  }
  style_submenu_element("All Artifacts", "bloblist");
  style_header("%d Largest Artifacts", n);
  db_multi_exec(
    "CREATE TEMP TABLE toshow(rid INTEGER PRIMARY KEY);"
    "INSERT INTO toshow(rid)"

Changes to src/stat.c.

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
...
573
574
575
576
577
578
579


580
581
582
583
584
585
586
...
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650














651
652

653






654



655


656
657

658
659
660
661
662
663
664
665
666





























667
668
669
670
671
672
673
674
675
676
677

678
679
680
681
682
683

684
685
686
687
688

689
690
691

692
693
694
695
696
697
698
699
700
701
702
703
704
...
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723

724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746


747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764

765
766
767
768
769
770
771
...
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
  @ %s(zBuf)
  @ </td></tr>
  if( !brief ){
    @ <tr><th>Number&nbsp;Of&nbsp;Artifacts:</th><td>
    n = db_int(0, "SELECT count(*) FROM blob");
    m = db_int(0, "SELECT count(*) FROM delta");
    @ %d(n) (%d(n-m) fulltext and %d(m) deltas)
    if( g.perm.Admin ){
      @ <a href='%R/artifact_stats'>Details</a>
    }
    @ </td></tr>
    if( n>0 ){
      int a, b;
      Stmt q;
      @ <tr><th>Uncompressed&nbsp;Artifact&nbsp;Size:</th><td>
................................................................................
    @ </svg></center>
  }
  style_footer();
}

/*
** Gather statistics on artifact types, counts, and sizes.


*/
static void gather_artifact_stats(int bWithTypes){
  static const char zSql[] = 
    @ CREATE TEMP TABLE artstat(
    @   id INTEGER PRIMARY KEY,   -- Corresponds to BLOB.RID
    @   atype TEXT,               -- 'data', 'manifest', 'tag', 'wiki', etc.
    @   isDelta BOOLEAN,          -- true if stored as a delta
................................................................................
  db_multi_exec("%s", zSql/*safe-for-%s*/);
  if( bWithTypes ){
    db_multi_exec("%s", zSql2/*safe-for-%s*/);
  }
}

/*
** WEBPAGE: artifact_size_stats
**
** Show information about the sizes of artifacts.
*/
void artifact_size_stats_page(void){
  Stmt q;
  int nTotal = 0;
  int nDelta = 0;
  int nFull = 0;














  int med;
  double r;

  login_check_credentials();






  if( !g.perm.Read ){ login_needed(g.anon.Read); return; }



  style_header("Artifact Sizes");


  gather_artifact_stats(0);
  @ <table class="label-value">

  db_prepare(&q,
    "SELECT count(*), sum(isDelta), avg(szCmpr), avg(szExp), max(szCmpr),"
    "       max(szExp)"
    "  FROM artstat"
  );
  if( db_step(&q)==SQLITE_ROW ){
    nTotal = db_column_int(&q,0);
    nDelta = db_column_int(&q,1);
    nFull = nTotal - nDelta;





























    @ <tr><th>Number of artifacts:</th><td>%d(nTotal)</td></tr>
    if( nTotal>0 ){
      @ <tr><th>Number of deltas:</th>\
      @ <td>%d(nDelta) (%d(nDelta*100/nTotal)%%)</td></tr>

      @ <tr><th>Number of full-text:</th><td>%d(nFull) \
      @ (%d(nFull*100/nTotal)%%)</td></tr>
    }
    @ <tr><th>Largest compressed artifact size:</th>\
    @ <td>%d(db_column_int(&q,4))</td></tr>


    @ <tr><th>Average compressed artifact size:</th> \
    @ <td>%.2f(db_column_double(&q,2))</td></tr>

    db_multi_exec("CREATE INDEX artstatx1 ON artstat(szCmpr, isDelta);");
    med = db_int(0, "SELECT szCmpr FROM artstat ORDER BY szCmpr"
                    " LIMIT 1 OFFSET %d", nTotal/2);

    @ <tr><th>Median compressed artifact size:</th><td>%d(med)</td></tr>

    @ <tr><th>Largest uncompressed artifact size:</td>\
    @ <td>%d(db_column_int(&q,5))</td></tr>


    @ <tr><th>Average uncompressed artifact size:</th> \
    @ <td>%.2f(db_column_double(&q,3))</td></tr>


    med = db_int(0, "SELECT szExp FROM artstat ORDER BY szExp"
                    " LIMIT 1 OFFSET %d", nTotal/2);
    @ <tr><th>Median uncompressed artifact size:</th><td>%d(med)</td></tr>

  }
  db_finalize(&q);
  db_prepare(&q,
    "SELECT avg(szCmpr), max(szCmpr) FROM artstat WHERE isDelta"
  );
  if( db_step(&q)==SQLITE_ROW ){
    @ <tr><th>Largest delta:</td>\
    @ <td>%d(db_column_int(&q,1))</td></tr>

................................................................................
    @ <td>%.2f(db_column_double(&q,0))</td></tr>

    med = db_int(0, "SELECT szCmpr FROM artstat WHERE isDelta ORDER BY szCmpr"
                    " LIMIT 1 OFFSET %d", nDelta/2);
    @ <tr><th>Median delta:</th><td>%d(med)</td></tr>
  }
  db_finalize(&q);

  r = db_double(0.0, "SELECT avg(szCmpr) FROM artstat WHERE NOT isDelta;");
  @ <tr><th>Average full-text artifact:</th><td>%.2f(r)</td></tr>

  med = db_int(0, "SELECT szCmpr FROM artstat WHERE NOT isDelta ORDER BY szCmpr"
                  " LIMIT 1 OFFSET %d", nFull/2);
  @ <tr><th>Median full-text artifact:</th><td>%d(med)</td></tr>
  @ </table>
  if( nTotal>0 ){
    sqlite3_int64 szTotal;
    sqlite3_int64 szPart;

    @ <h2>Artifact size distribution facts:</h2>
    @ <ol>
    szTotal = db_int64(0, "SELECT sum(szCmpr) FROM artstat");
    szPart = db_int64(0,
       "SELECT sum(x) FROM (SELECT szCmpr AS x FROM artstat ORDER BY 1 DESC"
                          " LIMIT %d)", (nTotal+99)/100);
    @ <li><p>The largest 1%% of artifacts (the largest %d((nTotal+99)/100) \
    @ artifacts) use %lld(szPart*100/szTotal)%% of the total artifact space.
    szPart = db_int64(0,
       "SELECT sum(x) FROM (SELECT szCmpr AS x FROM artstat ORDER BY 1 DESC"
                          " LIMIT %d)", (nTotal+9)/10);
    @ <li><p>The largest 10%% of artifacts (the largest %d((nTotal+9)/10) \
    @ artifacts) use %lld(szPart*100/szTotal)%% of the total artifact space.
    szPart = db_int64(0,
       "SELECT sum(x) FROM (SELECT szCmpr AS x FROM artstat ORDER BY 1 DESC"
                          " LIMIT %d)", nTotal/4);
    @ <li><p>The largest 25%% of artifacts (the largest %d(nTotal/4) \
    @ artifacts) use %lld(szPart*100/szTotal)%% of the total artifact space.
    szPart = db_int64(0,
       "SELECT sum(x) FROM (SELECT szCmpr AS x FROM artstat ORDER BY 1 DESC"
                          " LIMIT %d)", nTotal/2);
    @ <li><p>The largest 50%% of artifacts (the largest %d(nTotal/2) \
    @ artifacts) use %lld(szPart*100/szTotal)%% of the total artifact space.


    @ </ol>
  }
  style_footer();
}

/*
** WEBPAGE: artifact_stats
**
** Show information about the sizes of artifacts in this repository
*/
void artifact_stats_page(void){
  Stmt q;
  login_check_credentials();
  if( !g.perm.Admin ){ login_needed(g.anon.Admin); return; }
  style_header("Artifact Statistics");
  style_submenu_element("Repository Stats", "stat");
  style_submenu_element("Artifact List", "bloblist");
  gather_artifact_stats(1);

  db_prepare(&q,
    "SELECT atype, count(*), sum(isDelta), sum(szCmpr), sum(szExp)"
    "  FROM artstat GROUP BY 1"
    " UNION ALL "
    "SELECT 'ALL', count(*), sum(isDelta), sum(szCmpr), sum(szExp)"
    "  FROM artstat"
    " ORDER BY 4;"
................................................................................
    @ <td data-sortkey='%016x(szExp)' align='right'>%s(z)</td>
    sqlite3_free(z);
  }
  @ </tbody></table>
  db_finalize(&q);

  if( db_exists("SELECT 1 FROM artstat WHERE atype='unknown'") ){
    @ <h2>Unknown Artifacts</h2>
    db_prepare(&q,
      "SELECT artstat.id, blob.uuid, user.login,"
      "       datetime(rcvfrom.mtime), rcvfrom.rcvid"
      "  FROM artstat JOIN blob ON artstat.id=blob.rid"
      "       LEFT JOIN rcvfrom USING(rcvid)"
      "       LEFT JOIN user USING(uid)"
      " WHERE atype='unknown'"







|







 







>
>







 







|

|

|

|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>


>

>
>
>
>
>
>
|
>
>
>
|
>
>
|
<
>

|
|


|
|
|
|
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
|
<
|
|
<
|
|
<
|
<
<
>
|
<
<
<
<
<
>
|
<
|
<
<
>
|
<
<
>
|
|
|
<
<
<







 







<


<




<
<
<
>
|
|
<
<
<
<
|
|
<
<
<
|
|
<
<
<
|
|
<
<
<
|
|
>
>
|
|
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
<
>







 







|







93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
...
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
...
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684

685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724

725
726

727
728

729


730
731





732
733

734


735
736


737
738
739
740



741
742
743
744
745
746
747
...
749
750
751
752
753
754
755

756
757

758
759
760
761



762
763
764




765
766



767
768



769
770



771
772
773
774
775
776
















777
778
779
780
781
782
783
784
...
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
  @ %s(zBuf)
  @ </td></tr>
  if( !brief ){
    @ <tr><th>Number&nbsp;Of&nbsp;Artifacts:</th><td>
    n = db_int(0, "SELECT count(*) FROM blob");
    m = db_int(0, "SELECT count(*) FROM delta");
    @ %d(n) (%d(n-m) fulltext and %d(m) deltas)
    if( g.perm.Write ){
      @ <a href='%R/artifact_stats'>Details</a>
    }
    @ </td></tr>
    if( n>0 ){
      int a, b;
      Stmt q;
      @ <tr><th>Uncompressed&nbsp;Artifact&nbsp;Size:</th><td>
................................................................................
    @ </svg></center>
  }
  style_footer();
}

/*
** Gather statistics on artifact types, counts, and sizes.
**
** Only populate the artstat.atype field if the bWithTypes parameter is true.
*/
static void gather_artifact_stats(int bWithTypes){
  static const char zSql[] = 
    @ CREATE TEMP TABLE artstat(
    @   id INTEGER PRIMARY KEY,   -- Corresponds to BLOB.RID
    @   atype TEXT,               -- 'data', 'manifest', 'tag', 'wiki', etc.
    @   isDelta BOOLEAN,          -- true if stored as a delta
................................................................................
  db_multi_exec("%s", zSql/*safe-for-%s*/);
  if( bWithTypes ){
    db_multi_exec("%s", zSql2/*safe-for-%s*/);
  }
}

/*
** WEBPAGE: artifact_stats
**
** Show information about the sizes of artifacts in this repository
*/
void artifact_stats_page(void){
  Stmt q;
  int nTotal = 0;            /* Total number of artifacts */
  int nDelta = 0;            /* Total number of deltas */
  int nFull = 0;             /* Total number of full-texts */
  double avgCmpr = 0.0;      /* Average size of an artifact after compression */
  double avgExp = 0.0;       /* Average size of an uncompressed artifact */
  int mxCmpr = 0;            /* Maximum compressed artifact size */
  int mxExp = 0;             /* Maximum uncompressed artifact size */
  sqlite3_int64 sumCmpr = 0; /* Total size of all compressed artifacts */
  sqlite3_int64 sumExp = 0;  /* Total size of all expanded artifacts */
  sqlite3_int64 sz1pct = 0;  /* Space used by largest 1% */
  sqlite3_int64 sz10pct = 0; /* Space used by largest 10% */
  sqlite3_int64 sz25pct = 0; /* Space used by largest 25% */
  sqlite3_int64 sz50pct = 0; /* Space used by largest 50% */
  int n50pct = 0;            /* Artifacts using the first 50% of space */
  int n;                     /* Loop counter */
  int medCmpr = 0;           /* Median compressed artifact size */
  int medExp = 0;            /* Median expanded artifact size */
  int med;
  double r;

  login_check_credentials();

  /* These stats are expensive to compute.  To disable them for
  ** user without check-in privileges, to prevent excessive usage by
  ** robots and random passers-by on the internet
  */
  if( !g.perm.Write ){
    login_needed(g.anon.Admin);
    return;
  }

  style_header("Artifact Statistics");
  style_submenu_element("Repository Stats", "stat");
  style_submenu_element("Artifact List", "bloblist");
  gather_artifact_stats(1);


  db_prepare(&q,
    "SELECT count(*), sum(isDelta), max(szCmpr),"
    "       max(szExp), sum(szCmpr), sum(szExp)"
    "  FROM artstat"
  );
  db_step(&q);
  nTotal = db_column_int(&q,0);
  nDelta = db_column_int(&q,1);
  nFull = nTotal - nDelta;
  mxCmpr = db_column_int(&q, 2);
  mxExp = db_column_int(&q, 3);
  sumCmpr = db_column_int64(&q, 4);
  sumExp = db_column_int64(&q, 5);
  db_finalize(&q);
  if( nTotal==0 ){
    @ No artifacts in this repository!
    style_footer();
    return;
  }
  avgCmpr = (double)sumCmpr/nTotal;
  avgExp = (double)sumExp/nTotal;

  db_prepare(&q, "SELECT szCmpr FROM artstat ORDER BY 1 DESC");
  r = 0;
  n = 0;
  while( db_step(&q)==SQLITE_ROW ){
    r += db_column_int(&q, 0);
    if( n50pct==0 && r>=sumCmpr/2 ) n50pct = n;
    if( n==(nTotal+99)/100 ) sz1pct = r;
    if( n==(nTotal+9)/10 ) sz10pct = r;
    if( n==(nTotal+4)/5 ) sz25pct = r;
    if( n==(nTotal+1)/2 ){ sz50pct = r; medCmpr = db_column_int(&q,0); }
    n++;
  }
  db_finalize(&q);

  @ <h1>Overall Artifact Size Statistics:</h1>
  @ <table class="label-value">
  @ <tr><th>Number of artifacts:</th><td>%d(nTotal)</td></tr>

  @ <tr><th>Number of deltas:</th>\
  @ <td>%d(nDelta) (%d(nDelta*100/nTotal)%%)</td></tr>

  @ <tr><th>Number of full-text:</th><td>%d(nFull) \
  @ (%d(nFull*100/nTotal)%%)</td></tr>

  @ <tr><th>Largest compressed artifact size:</th>\


  @ <td>%d(mxCmpr)</td></tr>
  @ <tr><th>Average compressed artifact size:</th> \





  @ <td>%.2f(avgCmpr)</td></tr>
  @ <tr><th>Median compressed artifact size:</th><td>%d(medCmpr)</td></tr>

  @ <tr><th>Largest uncompressed artifact size:</td>\


  @ <td>%d(mxExp)</td></tr>
  @ <tr><th>Average uncompressed artifact size:</th> \


  @ <td>%.2f(avgExp)</td></tr>
  medExp = db_int(0, "SELECT szExp FROM artstat ORDER BY szExp"
                     " LIMIT 1 OFFSET %d", nTotal/2);
  @ <tr><th>Median uncompressed artifact size:</th><td>%d(medExp)</td></tr>



  db_prepare(&q,
    "SELECT avg(szCmpr), max(szCmpr) FROM artstat WHERE isDelta"
  );
  if( db_step(&q)==SQLITE_ROW ){
    @ <tr><th>Largest delta:</td>\
    @ <td>%d(db_column_int(&q,1))</td></tr>

................................................................................
    @ <td>%.2f(db_column_double(&q,0))</td></tr>

    med = db_int(0, "SELECT szCmpr FROM artstat WHERE isDelta ORDER BY szCmpr"
                    " LIMIT 1 OFFSET %d", nDelta/2);
    @ <tr><th>Median delta:</th><td>%d(med)</td></tr>
  }
  db_finalize(&q);

  r = db_double(0.0, "SELECT avg(szCmpr) FROM artstat WHERE NOT isDelta;");
  @ <tr><th>Average full-text artifact:</th><td>%.2f(r)</td></tr>

  med = db_int(0, "SELECT szCmpr FROM artstat WHERE NOT isDelta ORDER BY szCmpr"
                  " LIMIT 1 OFFSET %d", nFull/2);
  @ <tr><th>Median full-text artifact:</th><td>%d(med)</td></tr>
  @ </table>




  @ <h1>Artifact size distribution facts:</h1>
  @ <ol>




  @ <li><p>The largest 1%% of artifacts (the largest %d((nTotal+99)/100) \
  @ artifacts) use %lld(sz1pct*100/sumCmpr)%% of the total artifact space.



  @ <li><p>The largest 10%% of artifacts (the largest %d((nTotal+9)/10) \
  @ artifacts) use %lld(sz10pct*100/sumCmpr)%% of the total artifact space.



  @ <li><p>The largest 25%% of artifacts (the largest %d(nTotal/4) \
  @ artifacts) use %lld(sz25pct*100/sumCmpr)%% of the total artifact space.



  @ <li><p>The largest 50%% of artifacts (the largest %d(nTotal/2) \
  @ artifacts) use %lld(sz50pct*100/sumCmpr)%% of the total artifact space.
  @ <li><p>Half of the total artifact space is used by the %d(n50pct) \
  @ (%.1f(n50pct*100.0/nTotal)%%) largest artifacts.
  @ </ol>

















  @ <h1>Artifact Sizes By Type:</h1>
  db_prepare(&q,
    "SELECT atype, count(*), sum(isDelta), sum(szCmpr), sum(szExp)"
    "  FROM artstat GROUP BY 1"
    " UNION ALL "
    "SELECT 'ALL', count(*), sum(isDelta), sum(szCmpr), sum(szExp)"
    "  FROM artstat"
    " ORDER BY 4;"
................................................................................
    @ <td data-sortkey='%016x(szExp)' align='right'>%s(z)</td>
    sqlite3_free(z);
  }
  @ </tbody></table>
  db_finalize(&q);

  if( db_exists("SELECT 1 FROM artstat WHERE atype='unknown'") ){
    @ <h1>Unknown Artifacts:</h1>
    db_prepare(&q,
      "SELECT artstat.id, blob.uuid, user.login,"
      "       datetime(rcvfrom.mtime), rcvfrom.rcvid"
      "  FROM artstat JOIN blob ON artstat.id=blob.rid"
      "       LEFT JOIN rcvfrom USING(rcvid)"
      "       LEFT JOIN user USING(uid)"
      " WHERE atype='unknown'"