#!/usr/bin/env python
# @(#) $Id: svn2cvs 55 2026-01-14 20:20:10Z leres $ (XSE)
"""svn2cvs - Create a cvs repository from a subversion repository"""

import argparse
import atexit
import errno
import os
import re
import shutil
import stat
import subprocess
import sys
import tempfile
import time
import xml.dom.minidom

OPTS = None
PROG = '?'

MAGIC = '+++cvs2svn+++'

def cleanup():
    """Clean up temporary files"""
    if OPTS.debug > 2 and OPTS.tmp:
        print(f'NOT deleting {OPTS.tmp}')
        OPTS.tmp = None
    if OPTS.tmp:
        if OPTS.verbose:
            print(f'+ shutil.rmtree({OPTS.tmp})')
        shutil.rmtree(OPTS.tmp)
        OPTS.tmp = None

def adddirs(newdirs, dn):
    """copysvn2cvs() helper, returns a list of directory paths"""
    while True:
        dn = os.path.dirname(dn)
        if not dn:
            break
        if dn not in newdirs:
            newdirs.append(dn)
    return newdirs

def copysvn2cvs(revs, svnwork, cvsrepo, cvswork):
    """Copy svn revisions to the cvs repo, return status and list of files"""
    # svn export to cvswork and then cvs import

    seenfiles = []                      # files already cvs add'ed
    ignorefiles = getfiles(cvswork)     # cvs files to ignore

    # Loop through svn revs
    for rev in revs:
        n = rev['revision']
        msg = f"{MAGIC}|{rev['date']}|{rev['author']}\n"
        if 'msg' in rev:
            msg += rev['msg']

        # svn export current revision
        ret, lines = run(['svn', 'export', '--force', f'-r{int(n)}',
            svnwork, '.'], cvswork)
        if lines:
            # Insure one trailing newline
            print(lines)
            lines = f'{lines.rstrip()}\n'
            sys.stdout.write(lines)
        if ret != 0:
            return ret, None

        # Get current list of cvs working files; calculate new dirs and files
        cvsfiles = getfiles(cvswork, ignore=ignorefiles)
        newdirs = []
        newfiles = []
        for fn in cvsfiles:
            if fn not in seenfiles:
                if '/CVS/' in fn:
                    continue
                basefn = os.path.basename(fn)
                if basefn == 'CVS':
                    continue
                seenfiles.append(fn)
                newfiles.append(fn)
                if fn != basefn:
                    # Add new directories
                    newdirs = adddirs(newdirs, fn)

            # Need to sort newdirs at least so the cvs add's play out correctly
            newdirs.sort()
            newfiles.sort()

        # cvs add any new directories and files
        if newfiles or newdirs:
            cmd = ['cvs', '-d', cvsrepo, 'add']
            if newdirs:
                cmd.extend(newdirs)
            if newfiles:
                cmd.extend(newfiles)
            ret, lines = run(cmd, cvswork)
            if lines:
                # Insure one trailing newline
                lines = f'{lines.rstrip()}\n'
                sys.stdout.write(lines)
            if ret != 0:
                return ret, None

        # cvs commit changes
        ret, lines = run(['cvs', '-d', cvsrepo, 'commit', '-m', msg], cvswork)
        if lines:
            # Insure one trailing newline
            lines = f'{lines.rstrip()}\n'
            sys.stdout.write(lines)
        if ret != 0:
            return ret, None

    seenfiles.sort()
    return 0, seenfiles

def createcvsrepo(cvsrepo, empty):
    """Create a cvs repo directory; deal with possible existing directory"""
    try:
        st = os.lstat(cvsrepo)
    except OSError as e:
        if e.errno != errno.ENOENT:
            raise e
        st = None

    if st:
        if not OPTS.force:
            print(f'{PROG}: {cvsrepo} exists (use -f to remove and recreate)',
                file=sys.stderr)
            return 1
        if OPTS.verbose:
            print(f'+ shutil.rmtree({cvsrepo})')
        shutil.rmtree(cvsrepo)

    # Initialize the repo
    ret, lines = run(['cvs', '-d', cvsrepo, 'init'])
    if ret != 0:
        # Insure one trailing newline
        lines = f'{lines.rstrip()}\n'
        sys.stdout.write(lines)
        return ret

    # Do the initial import (of no files)
    ret, lines = run(['cvs', '-d', cvsrepo, 'import', '-d', '-m', 'initial',
        '.', 'root', 'initial'], empty)
    if ret != 0:
        # Insure one trailing newline
        lines = f'{lines.rstrip()}\n'
        sys.stdout.write(lines)
        return ret

    return 0

def createcvswork(cvsrepo, cwd, dn):
    """Create a cvs work directory"""
    ret, lines = run(['cvs', '-d', cvsrepo, 'checkout', dn], cwd)
    if lines:
        # Insure one trailing newline
        lines = f'{lines.rstrip()}\n'
        sys.stdout.write(lines)
    return ret

def date2ts(sdate):
    """Convert svn date string to unix ts"""
    # example: '2009-12-15T19:21:57.783232Z'
    tup = sdate.partition('.')
    thedate = time.strptime(tup[0], '%Y-%m-%dT%H:%M:%S')

    # There doesn't appear to be a "gmt" version of mktime()
    if 'TZ' in os.environ:
        otz = os.environ['TZ']
    else:
        otz = None
    os.environ['TZ'] = 'UTC'

    # Do the conversion
    ts = int(time.mktime(thedate))

    # Restore environment
    if otz:
        os.environ['TZ'] = otz
    else:
        del os.environ['TZ']
    return ts

def fixupcvs(fn):
    """Rewrite a ,v file to reflect the correct dates and authors"""
    ret = writable(fn)
    if ret != 0:
        return ret

    re_rev1 = re.compile(r'\d[\d.]*\d')
    magic = '@' + MAGIC + '|'

    tmp = fn + '-'

    # Pass one: extract and remove info
    revs = {}
    with open(fn, encoding='utf-8') as fin:
        with open(tmp, 'w', encoding='utf-8') as fout:
            # Copy first part
            while True:
                line = fin.readline()
                if line == '':
                    break
                fout.write(line)
                if line == '@@\n':
                    break

            # Collect and revision info
            n = None
            sawlog = False
            for line in fin.readlines():
                if len(line) > 0 and line[0].isdigit() and re_rev1.match(line):
                    # Save revision
                    n = line.rstrip()
                elif line == 'log\n':
                    sawlog = True
                elif sawlog:
                    sawlog = False
                    if line.startswith(magic):
                        tup = line.rstrip().split('|')
                        if len(tup) == 3 and tup[1].isdigit():
                            # Save ts and author
                            revs[n] = [int(tup[1]), tup[2]]
                            # Output just an '@' with no newline
                            line = '@'
                fout.write(line)

    re_rev2 = re.compile(r'(date\s+)\d{4}\.\d{2}\.\d{2}\.\d{2}\.\d{2}\.\d{2}'
        r'(;\s+author\s)[^;]+(;.*)')

    # Pass two: update rev date and authors
    with open(tmp, encoding='utf-8') as fin:
        with open(fn, 'w', encoding='utf-8') as fout:
            while True:
                line = fin.readline()
                if line == '':
                    break
                if len(line) > 0 and line[0].isdigit() and re_rev1.match(line):
                    # Save revision
                    n = line.rstrip()
                elif line.startswith('date') and n in revs:
                    m = re_rev2.match(line)
                    if m:
                        line = m.group(1)
                        thedate = time.gmtime(revs[n][0])
                        line += time.strftime('%Y.%m.%d.%H.%M.%S', thedate)
                        line += m.group(2)
                        line += revs[n][1]
                        line += m.group(3) + '\n'
                fout.write(line)
                if line == '@@\n':
                    break

            # Copy rest of file
            for line in fin.readlines():
                fout.write(line)

    # Clean up
    os.remove(tmp)

    ret = writable(fn, False)
    if ret != 0:
        return ret
    return 0

def fixupcvsfiles(files, cvsrepo):
    """Loop through and update cvs files"""
    for fn in files:
        ret = fixupcvs(os.path.join(cvsrepo, fn + ',v'))
        if ret != 0:
            return ret
    return 0

def getfiles(cvswork, ignore=None, dn=None):
    """Return list of files in cvs working directory"""
    if not ignore:
        ignore = []
    files = []
    for fn in os.listdir(cvswork):
        if dn:
            shortfn = os.path.join(dn, fn)
        else:
            shortfn = fn

        longname = os.path.join(cvswork, fn)
        st = os.lstat(longname)

        # File
        if stat.S_ISREG(st.st_mode):
            if shortfn not in ignore:
                files.append(shortfn)
            continue

        # Directory
        if stat.S_ISDIR(st.st_mode):
            files.extend(getfiles(longname, ignore=ignore, dn=shortfn))

    files.sort()
    return files

def getsvnfiles(svnwork):
    """Return list of files in svn repo"""
    ret, lines = run(['svn', 'ls', '-R', os.path.join(svnwork, 'trunk')])
    if ret != 0:
        if lines:
            sys.stdout.write(lines)
        return ret, None
    if lines == '':
        return ret, []
    files = lines.split('\n')
    files.sort()
    return ret, files

def getsvnrevs(svnwork):
    """Return list of svn revisions and log entries"""
    ret, lines = run(['svn', 'log', '--xml', svnwork])
    if ret != 0:
        if lines:
            sys.stdout.write(lines)
        return ret, None
    xmldoc = xml.dom.minidom.parseString(lines)
    logs = xmldoc.getElementsByTagName('logentry')
    revs = []
    for e in logs:
        d = {}

        # Attributes (only expecting 'revision')
        for i in range(e.attributes.length):
            attr = e.attributes.item(i)
            if attr.nodeName == 'revision':
                d[attr.nodeName] = int(attr.nodeValue)
            else:
                d[attr.nodeName] = attr.nodeValue

        # Child node (expecting 'author', 'date', 'msg')
        for e2 in e.childNodes:
            for e3 in e2.childNodes:
                if e2.nodeName == 'date':
                    d[e2.nodeName] = date2ts(e3.nodeValue)
                else:
                    d[e2.nodeName] = e3.nodeValue

        # Make sure we have these
        for w in ('author', 'msg'):
            if w not in d:
                d[w] = ''
        revs.append(d)

    # Want low revisions first
    revs.reverse()

    return ret, revs

def mymakedirs(dn):
    """Create a directory"""
    if OPTS.verbose:
        print(f'+ os.makedirs({dn})')
    os.makedirs(dn)
    return 0

def run(cmd, cwd=None):
    """Run a command, return the status and list of output lines"""
    if OPTS.verbose:
        scmd = ' '.join(cmd)
        if cwd:
            scmd = f'(cd {cwd} && {scmd})'
        print('+', scmd)

    try:
        with subprocess.Popen(cmd, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE, cwd=cwd) as p:
            lines = p.communicate()
            ret = p.returncode
    except OSError as e:
        if e.errno != errno.ENOENT:
            raise e
        return 1, f'{PROG}: {cmd[0]}: {e.strerror}'
    # Just echo stderr
    if lines[1]:
        print(lines[1].rstrip(), file=sys.stderr)
    return ret, lines[0]

def writable(fn, rw=True):
    """Make a file owner writable"""
    st = os.lstat(fn)
    mode = st.st_mode
    if rw:
        mode |= stat.S_IWUSR
    else:
        mode &= ~stat.S_IWUSR
    os.chmod(fn, mode)
    return 0

def main(argv=None):
    """Parse options, run program"""
    global OPTS
    global PROG

    if not argv:
        argv = sys.argv

    description = """\
Create a new cvs repository from an existing subversion working copy.
SVNWORK is the path to svn working copy. The CVSREPO must not exist
unless the -f flag is used in which case it is clobbered before the
conversion starts. By default the contents of CVSROOT is removed
so that use by cvs2svn won't include it. The -k flag prevents this.
    """

    PROG = os.path.basename(argv[0])
    usage = '%(prog)s [-dfkvD] [-T TMPDIR] SVNWORK CVSREPO'
    #version = '$Revision: 55 $'.strip('$').rstrip()

    parser = argparse.ArgumentParser(usage=usage, description=description)
    #parser.add_argument('--version', action='version', version=version)
    parser.add_argument('-d', dest='debug', action='count', default=0,
        help='increase debugging output')
    parser.add_argument('-v', dest='verbose', action='count', default=0,
        help='increase verbosity')
    parser.add_argument('-f', dest='force', action='store_true',
        help='clobber generated cvs repository')
    parser.add_argument('-k', dest='keep', action='store_true',
        help='keep contents of CVSROOT')
    parser.add_argument('-T', dest='tmpdir',
        default=tempfile.tempdir,
        help='base temporary directory (default: "%(default)s")')

    # Hidden argument used to store our temporary subdirectory
    parser.add_argument('-', dest='tmp', help=argparse.SUPPRESS)

    parser.add_argument('--debugger', action='store_true',
        help='interactive debugging (pdb)')

    parser.add_argument('svnwork', metavar='SVNWORK',
        help='url to subversion repository (file:// only)')
    parser.add_argument('cvsrepo', metavar='CVSREPO',
        help='new cvs repository')

    OPTS = parser.parse_args()

    # argparse debugging
    if OPTS.debug > 1:
        for key in dir(OPTS):
            if not key.startswith('_'):
                print(f'# {key}={getattr(OPTS, key)}', file=sys.stderr)

    # Interactive debugging
    if OPTS.debugger:
        # pylint: disable=import-outside-toplevel
        import pdb
        # pylint: enable=import-outside-toplevel
        # pylint: disable=forgotten-debug-statement
        pdb.set_trace()
        # pylint: enable=forgotten-debug-statement

    # Register an exit handler
    atexit.register(cleanup)

    # Create the temporary directory
    OPTS.tmp = tempfile.mktemp(prefix=f'{PROG}-', dir=OPTS.tmpdir)
    ret = mymakedirs(OPTS.tmp)
    if ret != 0:
        return ret

    # Create cvs repo
    ret = createcvsrepo(OPTS.cvsrepo, OPTS.tmp)
    if ret != 0:
        return ret

    try:
        # Create cvs working directory
        ret = createcvswork(OPTS.cvsrepo, OPTS.tmp, '.')
        if ret != 0:
            return ret

        # Get list of svn revisions, dates and comments
        ret, revs = getsvnrevs(OPTS.svnwork)
        if ret != 0:
            return ret

        # Copy svn revisions to cvs repo
        ret, files = copysvn2cvs(revs, OPTS.svnwork, OPTS.cvsrepo, OPTS.tmp)
        if ret != 0:
            return ret

        # Fix up dates and authors in ,v files
        ret = fixupcvsfiles(files, OPTS.cvsrepo)
        if ret != 0:
            return ret

        # Empty CVSROOT directory or else cvs2svn will include it
        if not OPTS.keep:
            dn = os.path.join(OPTS.cvsrepo, 'CVSROOT')
            if OPTS.verbose:
                print(f'+ shutil.rmtree({dn})')
            shutil.rmtree(dn)
            if OPTS.verbose:
                print(f'+ os.mkdir({dn})')
            os.mkdir(dn)
    except KeyboardInterrupt:
        return 1

    print(f'{PROG}: succesfully created {OPTS.cvsrepo}')
    return 0

if __name__ == "__main__":
    sys.exit(main())
