Monotone fast-import source

Mike Rogers MRogers at tntech.edu
Fri Dec 19 05:42:21 GMT 2008


At the Computer Science department at Tennessee Technological University we have decided to move from Monotone to Bazaar as our supported revision control system.  We needed to import our Monotone repositories, so  I wrote my own Fast-Import source.

My question is: Is it worthwhile to submit a merge request to the Bazaar fast-import plugin?  You see, it is written in Tcl, so it may not be a desirable addition given that the plugin and the other fast-import sources are written in Python.  Also, it may be that no one would be interested given Monotone's lagging popularity.

I have included the source if anyone wants to take a look at it.   I would clean it up and add some more error checking if it is worthwhile to submit.

Thanks,

Mike Rogers
Associate Professor
Computer Science Department
Tennessee Technological University


#!/usr/bin/tclsh

######
# Note that you must have all Monotone branches merged in order for this fast-import to work.
# If a Monotone branch has parent revisions in another branch, then this script will fail.
proc processArgs {argv} {
    set idx 0    
    set args(mtnBranch) [lindex $argv $idx]
    set args(othBranch) "branch"
    incr idx
    set args(db) [lindex $argv $idx]
    if {$args(mtnBranch) == "" || $args(othBranch) == "" } { 
	puts stderr "usage: $argv0 <monotone branch> <other branch>"
	exit
    }
    return [array get args]
}

proc processRevision {rinfo parVar modVar cpVar renVar delVar} {
    # if a revision has more than one parent, it is a merge
    set all [split [string map [list "\n\n" "\uFFFF"] $rinfo] "\uFFFF"]
    upvar $parVar parents
    upvar $modVar modified
    upvar $delVar deleted
    upvar $renVar renamed
    upvar $cpVar copy
    set modified [list]
    set parents [list]
    set deleted [list]
    set renamed [list]
    set copy [list]
    set modVar
    foreach i $all {
	set key [lindex $i 0]
	set second [lindex $i 1]
	switch -exact -- $key {
	    old_revision  {
		set value [string range $second 1 end-1]
		if {$value !=""} {
		    lappend parents [string range $second 1 end-1]
		}
	    }
	    add_file -
	    patch {
		lappend modified $second
	    }
	    delete {
		lappend deleted $second 
	    }
	    rename {
		set to [lindex $i 3]
		# try as a true rename - does not work for fast-import
		#lappend renamed $second $to
		
		# try as a modify followed by a delete
		lappend modified $to
		lappend deleted $second

		# try as a copy followed by a delete - copy does not work for fast-import
		#lappend copy $second $to
		#lappend deleted $second
	    }
	}
    }
   
    return
}
    
proc getCerts {rev db certArVar} {
    upvar $certArVar certs
    array unset certs
    set all [exec mtn automate certs $rev -d $db]
    set last 0
    while {[regexp -indices -start $last -- $::certRegexp $all match name value]} {
	#puts "Found!!!"
	set last [lindex $value 1]
	set name [string range  $all [lindex $name 0] [lindex $name 1]]
	set value [string range  $all [lindex $value 0] $last]
	set certs($name) $value
	incr last
    } 
}
proc assert {ex} { if {!$ex} { error "assertion failed" } }

# the next function was taken from the Tcler's Wiki
proc RFC2822TimezoneOffset {} {
    set gmt [clock seconds]
    set local1 [clock format [clock seconds] -format "%Y%m%d %H:%M:%S"]
    set local [clock scan $local1 -gmt 1]
    set offset [expr $local - $gmt]
    if {$offset < 0} {
	return "-[clock format [expr abs($offset)] -format "%H%M" -gmt 1]"
    } else {
	return "+[clock format $offset -format "%H%M" -gmt 1]"
    }
}

proc getSortedRevisions {db} {
    set txtTree [exec mtn automate graph -d $db]
    set nextMark 1
    
    set offutc [RFC2822TimezoneOffset]
    
    set ltxtTree [lrange $txtTree 0 end]
    if {[catch {set fd [open "mtnargs[pid]" w]} res]} {
	puts stderr "Cannot open temp file mtnargs[pid]"
	exit 1
    }
    puts -nonewline $fd $ltxtTree
    close $fd
    
    # Let Monotone topologically (by ancestor) sort the branches
    set txtTree [exec mtn automate toposort --xargs mtnargs[pid] -d $db]
    catch {file delete -force mtnargs[pid]}
    return $txtTree
    
}


proc setBranchForFork {parents rev brBase branchesVar brCountVar} {
    # The next piece of code turns Monotone forks into Bazaar/git branches
    upvar $branchesVar branches
    upvar $brCountVar brCount

    set plen [llength $parents]
    if {$plen == 0} {
	# If this revision has no parents, then start with the root branch name
	set branches($rev) $brBase
    } elseif {$plen == 1} {
	# If this revision has exactly 1 branch
	set branches($rev) $branches($parents)
    } else {
	if {$rev == [lindex $parents 0]} {
	    # I am the left-most child, so I am on the same
	    #  branch as my parent.
	    set branches($rev) $branches([lindex $parents 0])
	} else {
	    # Not the left-most child, so make be part of a new
	    #  branch.
	    set branches($rev) $branches([lindex $parents 0]).$brCount
	    incr brCount
	}
    }
    
}

proc putCommit {rev branchesVar marksVar nextMarkVar} {
    upvar $nextMarkVar nextMark
    upvar $branchesVar branches
    upvar $marksVar marks

    set out ""
    if {[info exists marks($rev)]} {
	# the revision graph will have a revision in it more than once,
	#  so we skip this revision is we have already seen it.
	return 1
    } else {
	# Create a new Mark for this revision.
	set myMark $nextMark
	incr nextMark
	set marks($rev) $myMark
    }
    
    puts -nonewline "progress Processing revision $rev...\n"
    append out "commit " $branches($rev) "\n"
    append out "mark :" $myMark "\n"
    puts -nonewline $out
    return 0
}
 

proc putChangeLogMeta {certsVar} {
    upvar $certsVar certs
    set offutc [RFC2822TimezoneOffset]
    set out ""
    set clist [split $certs(author) "@"]
    set name [lindex $clist 0]
    set tm [clock scan $certs(date)]
    append out "author "  $name " <" $certs(author) "> " $tm " "  $offutc "\n"
    append out "committer "  $name " <" $certs(author) "> " $tm " "  $offutc "\n"
    append out "data " [string length $certs(changelog)] "\n"
    append out $certs(changelog)
    puts -nonewline $out
}

proc putFrom {parents marksVar} {
    upvar $marksVar marks
    if {[llength $parents] == 1} {
	puts -nonewline "from :$marks([lindex $parents 0])\n"
    }
}

proc putMerge {rev parents branchesVar marksVar } {
    upvar $branchesVar branches
    upvar $marksVar marks
    set out ""
    if {[llength $parents] > 1} {
	# add merge to out
	foreach j $parents {
	    if {$branches($j) == $branches($rev)} { continue }
	    append out "merge " ":$marks($j)" "\n" 
	}
	puts -nonewline $out
    } 
}

proc putModified {rev db modified} {
    foreach m $modified {
	set out ""
	#get file contents
	append out "M 644 inline " $m "\n"
	set fname mtnout[pid]
	exec mtn automate get_file_of $m --revision=$rev -d $db > $fname
	set size [file size $fname]
	append out "data " $size "\n"
	puts -nonewline $out
	set out ""
	
	set fd [open $fname r]
	fconfigure $fd -translation binary
	set bytes [read $fd 4096]
	#assert [expr {$size == [string length $bytes]}]
	puts -nonewline $bytes
	while {![eof $fd]} {
	    set bytes [read $fd 4096]
	    puts -nonewline $bytes
	}
	catch {close $fd}
	catch {file delete -force $fname}	
    }
}

proc putRenamed {renames} {
    set out ""
    foreach {r1 r2} $renames {
	#add rename to out
	append out "R " $r1 " " $r2 "\n" 
    }
    puts -nonewline $out
}

proc putCopied {copies} {
    set out ""
    foreach {c1 c2} $copies {
	append out "C " $c1 " " $c2 "\n"
    }
    puts -nonewline $out
}

proc putDeleted {deletes } {
    foreach d $deletes {
	#add delete to out
	append out "D " $d "\n"
    }
    puts -nonewline $out
}

proc convert {argList} {
    array set args $argList  
    
    set brCount 0
    set nextMark 1

    # get the list of revisions for this montone repo and iterate over them...
    foreach i [getSortedRevisions $args(db)] {
	
	# get the changeset info for this revision
	set revInfo [exec mtn automate get_revision $i -d $args(db)]

	# puts the changeset info into lists and arrays that we can use
	processRevision $revInfo parents modified copies renames deletes
	
	# Set the branch name.  It's here where we turn a Montone fork into a branch
	setBranchForFork $parents $i $args(othBranch) branches brCount
	
	# Get the Monotone certs for this revision.  The certs contain (among other things)
	#  the author and changelog
	getCerts $i $args(db) certs 

	assert [info exists certs(branch)]
	
	# We skip any revision that does not belong to this branch
	if {$certs(branch) != $args(mtnBranch)} {
	    # Only create output for the specific branch
	    continue
	}
	
	
	# output the "commit" message
	if {[putCommit $i branches marks nextMark]} {
	    continue
	}

	# output the "author", "committer" and changelog
	putChangeLogMeta certs
	
	# There must be certs for the author, date, and changelog
	assert [info exists certs(author)]
	assert [info exists certs(date)]
	assert [info exists certs(changelog)]
	
	
	# output the "from"
	putFrom $parents marks

	# ouput "merge", if this revision is a merge
	putMerge $i $parents branches marks

	# output all the modified files 
	putModified $i $args(db) $modified

	# output all the renamed files
	putRenamed $renames

	# output all the copied files
	putCopied $copies
	
	puts -nonewline "progress Revision $i has mark :$marks($i) (commit)\n"
	
    }
}
proc main {argv} {

    set args [processArgs $argv]

    fconfigure stdout -translation binary
    set ::certRegexp {name\s+"((?:\\"|[^"])*)"\s*value\s+"((?:\\"|[^"])*)"}
				       
    convert $args
}

main $argv



More information about the bazaar mailing list