Fossil

Check-in [c2ad73ed]
Login

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:Added high-level logging for memory tracing to the code breaking the preliminary changesets. First runs indicate that the DEPC array becomes so very large, caused by a high amount of indirect dependencies (several hundred).
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:c2ad73ed9243ac9497046fa8eae19f904b56bc77
User & Date: aku 2008-02-21 05:13:14
Context
2008-02-23
06:40
Merged bugfix [b3d61d7829] into the main branch for optimization of memory usage. check-in: efec424a user: aku tags: trunk
2008-02-21
05:13
Added high-level logging for memory tracing to the code breaking the preliminary changesets. First runs indicate that the DEPC array becomes so very large, caused by a high amount of indirect dependencies (several hundred). check-in: c2ad73ed user: aku tags: trunk
2008-02-17
02:06
Reworked the basic structure of pass InitCSets to keep memory consumption down. Now incremental creates, breaks, saves, and releases changesets, instead of piling them on before saving all at the end. Memory tracking confirms that this changes the accumulating mountain into a near-constant usage, with the expected spikes from the breaking. check-in: f46458d5 user: aku tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to tools/cvs2fossil/lib/c2f_prev.tcl.

201
202
203
204
205
206
207


208




209
210
211
212
213
214
215
...
797
798
799
800
801
802
803



804
805
806
807
808
809
810
811
812
813
814


815
816
817
818
819
820
821
822
823
824
825
826
827


828
829
830
831
832
833
834
...
846
847
848
849
850
851
852



853


854
855
856
857
858
859
860
...
867
868
869
870
871
872
873


874
875
876
877
878
879
880
....
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
....
1142
1143
1144
1145
1146
1147
1148


1149
1150
1151
1152
1153
1154
1155
1156
1157

1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169


1170
1171



1172
1173
1174
1175
1176
1177
1178
1179
	# after, after upding the crossing information.

	# Data structures:
	# Map:  POS   revision id      -> position in list.
	#       CROSS position in list -> number of dependencies crossing it
	#       DEPC  dependency       -> positions it crosses
	# List: RANGE Of the positions itself.


	# A dependency is a single-element map parent -> child





	InitializeBreakState $myitems

	set fragments {}
	set new       [list $range]
	array set breaks {}

................................................................................
    proc InitializeBreakState {revisions} {
	upvar 1 pos pos cross cross range range depc depc delta delta \
	    dependencies dependencies

	# First we create a map of positions to make it easier to
	# determine whether a dependency crosses a particular index.




	array set pos   {}
	array set cross {}
	array set depc  {}
	set range       {}
	set n 0
	foreach rev $revisions {
	    lappend range $n
	    set pos($rev) $n
	    set cross($n) 0
	    incr n
	}



	# Secondly we count the crossings per position, by iterating
	# over the recorded internal dependencies.

	# Note: If the timestamps are badly out of order it is
	#       possible to have a backward successor dependency,
	#       i.e. with start > end. We may have to swap the indices
	#       to ensure that the following loop runs correctly.
	#
	# Note 2: start == end is not possible. It indicates a
	#         self-dependency due to the uniqueness of positions,
	#         and that is something we have ruled out already, see
	#         'rev internalsuccessors'.



	foreach {rid children} [array get dependencies] {
	    foreach child $children {
		set dkey    [list $rid $child]
		set start   $pos($rid)
		set end     $pos($child)
		set crosses {}
................................................................................
			incr start
		    }
		}
		set depc($dkey) $crosses
	    }
	}




	InitializeDeltas $revisions


	return
    }

    proc InitializeDeltas {revisions} {
	upvar 1 delta delta

	# Pull the timestamps for all revisions in the changesets and
................................................................................
	foreach {rid time} [state run [subst -nocommands -nobackslashes {
	    SELECT R.rid, R.date
	    FROM revision R
	    WHERE R.rid IN $theset
	}]] {
	    set stamp($rid) $time
	}



	set n 0
	foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
	    set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
	    incr n
	}
	return
................................................................................
    }

    # var(dv) = dict (revision -> list (revision))
    typemethod internalsuccessors {dv revisions} {
	upvar 1 $dv dependencies
	set theset ('[join $revisions {','}]')

	log write 14 cset internalsuccessors

	# See 'successors' below for the main explanation of
	# the various cases. This piece is special in that it
	# restricts the successors we look for to the same set of
	# revisions we start from. Sensible as we are looking for
	# changeset internal dependencies.

................................................................................
	# will greatly reduces the risk of getting far separated
	# revisions of the same file into one changeset.

	# We allow revisions to be far apart in time in the same
	# changeset, but in turn need the pseudo-dependencies to
	# handle this.



	log write 14 cset pseudo-internalsuccessors

	array set fids {}
	foreach {rid fid} [state run [subst -nocommands -nobackslashes {
	    SELECT R.rid, R.fid
            FROM   revision R
            WHERE  R.rid IN $theset
	}]] { lappend fids($fid) $rid }


	foreach {fid rids} [array get fids] {
	    if {[llength $rids] < 2} continue
	    foreach a $rids {
		foreach b $rids {
		    if {$a == $b} continue
		    if {[info exists dep($a,$b)]} continue
		    if {[info exists dep($b,$a)]} continue
		    lappend dependencies($a) $b
		    set dep($a,$b) .
		    set dep($b,$a) .
		}
	    }


	}




	log write 14 cset complete
	return
    }

    # result = 4-list (itemtype itemid nextitemtype nextitemid ...)
    typemethod loops {revisions} {
	# Note: Tags and branches cannot cause the loop. Their id's,
	# being of a fundamentally different type than the revisions







>
>

>
>
>
>







 







>
>
>











>
>













>
>







 







>
>
>

>
>







 







>
>







 







|







 







>
>
|








>












>
>


>
>
>
|







201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
...
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
...
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
...
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
....
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
....
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
	# after, after upding the crossing information.

	# Data structures:
	# Map:  POS   revision id      -> position in list.
	#       CROSS position in list -> number of dependencies crossing it
	#       DEPC  dependency       -> positions it crosses
	# List: RANGE Of the positions itself.
	# Map:  DELTA position in list -> time delta between its revision
	#                                 and the next, if any.
	# A dependency is a single-element map parent -> child

	# InitializeBreakState initializes their contents after
	# upvar'ing them from this scope. It uses the information in
	# DEPENDENCIES to do so.

	InitializeBreakState $myitems

	set fragments {}
	set new       [list $range]
	array set breaks {}

................................................................................
    proc InitializeBreakState {revisions} {
	upvar 1 pos pos cross cross range range depc depc delta delta \
	    dependencies dependencies

	# First we create a map of positions to make it easier to
	# determine whether a dependency crosses a particular index.

	log write 14 csets {IBS: #rev [llength $revisions]}
	log write 14 csets {IBS: pos map, cross counter}

	array set pos   {}
	array set cross {}
	array set depc  {}
	set range       {}
	set n 0
	foreach rev $revisions {
	    lappend range $n
	    set pos($rev) $n
	    set cross($n) 0
	    incr n
	}

	log write 14 csets {IBS: pos/[array size pos], cross/[array size cross]}

	# Secondly we count the crossings per position, by iterating
	# over the recorded internal dependencies.

	# Note: If the timestamps are badly out of order it is
	#       possible to have a backward successor dependency,
	#       i.e. with start > end. We may have to swap the indices
	#       to ensure that the following loop runs correctly.
	#
	# Note 2: start == end is not possible. It indicates a
	#         self-dependency due to the uniqueness of positions,
	#         and that is something we have ruled out already, see
	#         'rev internalsuccessors'.

	log write 14 csets {IBS: cross counter filling, pos/cross map}

	foreach {rid children} [array get dependencies] {
	    foreach child $children {
		set dkey    [list $rid $child]
		set start   $pos($rid)
		set end     $pos($child)
		set crosses {}
................................................................................
			incr start
		    }
		}
		set depc($dkey) $crosses
	    }
	}

	log write 14 csets {IBS: pos/[array size pos], cross/[array size cross], depc/[array size depc] (for [llength $revisions])}
	log write 14 csets {IBS: timestamps, deltas}

	InitializeDeltas $revisions

	log write 14 csets {IBS: delta [array size delta]}
	return
    }

    proc InitializeDeltas {revisions} {
	upvar 1 delta delta

	# Pull the timestamps for all revisions in the changesets and
................................................................................
	foreach {rid time} [state run [subst -nocommands -nobackslashes {
	    SELECT R.rid, R.date
	    FROM revision R
	    WHERE R.rid IN $theset
	}]] {
	    set stamp($rid) $time
	}

	log write 14 csets {IBS: stamp [array size stamp]}

	set n 0
	foreach rid [lrange $revisions 0 end-1] rnext [lrange $revisions 1 end] {
	    set delta($n) [expr {$stamp($rnext) - $stamp($rid)}]
	    incr n
	}
	return
................................................................................
    }

    # var(dv) = dict (revision -> list (revision))
    typemethod internalsuccessors {dv revisions} {
	upvar 1 $dv dependencies
	set theset ('[join $revisions {','}]')

	log write 14 csets internalsuccessors

	# See 'successors' below for the main explanation of
	# the various cases. This piece is special in that it
	# restricts the successors we look for to the same set of
	# revisions we start from. Sensible as we are looking for
	# changeset internal dependencies.

................................................................................
	# will greatly reduces the risk of getting far separated
	# revisions of the same file into one changeset.

	# We allow revisions to be far apart in time in the same
	# changeset, but in turn need the pseudo-dependencies to
	# handle this.

	log write 14 csets {internal  [array size dep]}
	log write 14 csets {collected [array size dependencies]}
	log write 14 csets pseudo-internalsuccessors

	array set fids {}
	foreach {rid fid} [state run [subst -nocommands -nobackslashes {
	    SELECT R.rid, R.fid
            FROM   revision R
            WHERE  R.rid IN $theset
	}]] { lappend fids($fid) $rid }

	set groups {}
	foreach {fid rids} [array get fids] {
	    if {[llength $rids] < 2} continue
	    foreach a $rids {
		foreach b $rids {
		    if {$a == $b} continue
		    if {[info exists dep($a,$b)]} continue
		    if {[info exists dep($b,$a)]} continue
		    lappend dependencies($a) $b
		    set dep($a,$b) .
		    set dep($b,$a) .
		}
	    }
	    set n [llength $rids]
	    lappend groups [list $n [expr {($n*$n-$n)/2}]]
	}

	log write 14 csets {pseudo    [array size fids] ([lsort -index 0 -decreasing -integer $groups])}
	log write 14 csets {internal  [array size dep]}
	log write 14 csets {collected [array size dependencies]}
	log write 14 csets complete
	return
    }

    # result = 4-list (itemtype itemid nextitemtype nextitemid ...)
    typemethod loops {revisions} {
	# Note: Tags and branches cannot cause the loop. Their id's,
	# being of a fundamentally different type than the revisions