#!/usr/bin/python
#
# web2png -- convert a web hierarchy from using GIFs to using PNGs
#
# This script is a front end for gif2png that assists you in converting an
# entire website.  Requires gif2png 1.1.0 or later.
#
# by Eric S. Raymond <esr@thyrsus.com>
# Version 0.99 -- 6 October 1999

# Future library code -- I'm going to submit this for Python 1.6

import os, sys
from stat import *

FTW_FILE    = 0	# Item is a normal file
FTW_DIR     = 1	# Item is a directory
FTW_CHRDEV  = 2	# Item is a character special device file
FTW_BLKDEV  = 3	# Item is a block special device file
FTW_FIFO    = 4	# Item is a FIFO
FTW_SYMLINK = 5	# Item is a symbolic link
FTW_SOCKET  = 6	# Item is a socket
FTW_NOSTAT = -1	# stat failed on item
FTW_DNR	   = -2	# item was an unreadable directory

FTWException = "FTWException"

def ftw_inner(func, path=".", extra=[]):
    try:
        st = os.stat(path)
    except:
        return FTW_NOSTAT
    mode = st[ST_MODE]

    if S_ISDIR(mode):
        try:
            subdirs = os.listdir(path)
        except:
            return FTW_DNR

    if S_ISREG(mode):  ftype = FTW_FILE
    if S_ISDIR(mode):  ftype = FTW_DIR
    if S_ISBLK(mode):  ftype = FTW_BLKDEV
    if S_ISCHR(mode):  ftype = FTW_CHRDEV
    if S_ISFIFO(mode): ftype = FTW_FIFO
    if S_ISLNK(mode):  ftype = FTW_SYMLINK
    if S_ISSOCK(mode): ftype = FTW_SOCKET

    terminate = apply(func, [path, st, ftype] + list(extra))
    if terminate < 0:
        raise FTWException, terminate

    if S_ISDIR(mode) and not terminate:
        for f in subdirs:
            ftw_inner(func, os.path.join(path, f), extra)

def ftw(func, path=".", *extra):
    # Python tree-walker -- like ftw(3), but more capable
    try:
        ftw_inner(func, path, extra)
    except FTWException, val:
        return val
    return 0

def find(dir=".", regexp=None):
   # Return a tuple list of entries beneath dir matching a given pattern
   def findhook(path, st, ftype, r, fl):
       if not r or r.search(path):
           fl.append((path, ftype))
   flist=[]
   ftw(findhook, dir, regexp, flist)
   return flist

def findfiles(dir=".", regexp=None):
    # Return a list of files beneath dir matching a given pattern
    return map(lambda x: os.path.normpath(x[0]),
               filter(lambda t: t[1]==FTW_FILE, find(".", regexp)))

#def ftwtest(path, status, ftype):
#    print "%s: path type is %d" % (path, ftype)
#    return 0

# End future library code

import re, getopt, commands, string, shutil

nochange = None
gifre = re.compile(r"\.gif$", re.IGNORECASE)
pngre = re.compile(r"\.png$", re.IGNORECASE)
htmlre = re.compile(r"\.s?html$|\.php$", re.IGNORECASE)
imgre = re.compile(r'<IMG SRC="?([^">]*\.gif)"?>', re.IGNORECASE)
basere = re.compile(r'<BASE HREF="?([^">]*\.gif)"?>', re.IGNORECASE)
bakre = re.compile(r"\.bak$", re.IGNORECASE)

def version_controlled(page):
    # Is given page under version control?
    return os.path.exists(page + ",v") \
           or os.path.exists(os.path.join("RCS", page) + ",v")

def web2png(directory):
    # Convert a web hierarchy rooted on the given directory
    gifs = findfiles(directory, gifre)
    htmls = findfiles(directory, htmlre)

    # There's a standard max on the number of arguments we can feed gif2png.
    # if we see more than these, 
    if len(gifs) > 5120:
        "web2png: Too many GIFs.  Try converting some subtrees first."
        system.exit(1)

    print "This web subtree has", len(gifs), "GIFs and", len(htmls), "pages."

    # Display information on files we won't convert
    rejects = \
	commands.getoutput("gif2png -w "+string.join(gifs," ")+" >/dev/null")
    rejects = re.sub("gif2png: ", "	", rejects)
    if rejects:
        print "The following GIFs will not be converted:"
        print rejects

    # Figure out which files are eligible for conversion
    giflist = \
        string.split(commands.getoutput("gif2png -w "+string.join(gifs," ")+" 2>/dev/null"))
    convert_gifs = []
    for gif in giflist:
        png = re.sub(r"\.gif$", r".png", gif)
        if os.path.exists(png):
            print "\t%s already has a PNG equivalent" % (gif,)
        else:
            convert_gifs.append(gif)

    # Display information on files we will convert
    if convert_gifs:
        print "The following GIFs will be converted:\n\t" + \
		string.join(convert_gifs, "\n\t")
    if not convert_gifs:
        print "All eligible GIFs seem to have been converted already."

    # Create a dictionary mapping pages to sets of references to be mapped
    print "Checking for HTML and PHP pages that need conversion..."
    pagecount = 0
    html_conversions = {}
    for file in htmls:
        fp = open(file, "r")
        contents = fp.read()
        fp.close()
        basedir = basere.search(contents)
        if basedir:
            basedir = base.group(1)
        else:
            basedir = ""
        matches = imgre.findall(contents)
        convert_refs = []
        for ref in matches:
            target = os.path.normpath(os.path.join(basedir, ref))
            ref = ref[:-4]
            if target in giflist:
                convert_refs.append((ref, target))
        if convert_refs:
            print "\tIn %s, I see: %s" % (file, string.join(map(lambda x: x[0]+".gif", convert_refs)," "))
            html_conversions[file] = convert_refs
	    pagecount = pagecount + 1
    print "%d HTML or PHP page(s) need conversion." % (pagecount,)

    # Unless user is willing to make changes, we're done now
    if nochange:
        return

    # Convert gifs verbosely
    if convert_gifs:
        print "GIF conversions begin:"
        os.system("gif2png -v -O " + string.join(convert_gifs, " "))
        # print "GIF conversions complete"

    # Now check to see which conversions did not take
    failures = []
    for gif in convert_gifs:
        png = re.sub(r"\.gif$", r".png", gif)
        if not os.path.exists(png):
            failures.append(gif)
    if failures:
        print "Some conversions failed:", string.join(failures)

    # Now hack the references in the web pages
    for page in html_conversions.keys():
        print "Converting %s..." % (page,)
        if version_controlled(page):
            system("co -l " + page)
        else:
            shutil.copyfile(page, page + ".bak")
        fp = open(page, "r")
        contents = fp.read()
        fp.close()
        basedir = basere.search(contents)
        if basedir:
            basedir = base.group(1)
        else:
            basedir = ""
        for (ref, target) in html_conversions[page]:
            if target in giflist and not target in failures:
                contents = re.sub(ref + r"\.gif", ref + ".png", contents)
        fp = open(page, "w")
        fp.write(contents)
        fp.close()
    print "Web page conversions complete."

def cleanup(directory):
    # Clean up superfluous .gif and .bak files left over after a conversion
    map(os.unlink, findfiles(directory, bakre))
    pnglist = findfiles(directory, pngre)
    for png in pnglist:
        gif = file[:-4] + ".gif"
        if os.path.exists(gif):
            os.unlink(gif)

def unconvert(directory):
    # Reverse a conversion    
    pnglist = findfiles(directory, pngre)
    for png in pnglist:
        gif = file[:-4] + ".gif"
        if os.path.exists(gif):
            os.unlink(png)
    htmls = findfiles(directory, htmlre)
    for page in htmls:
        if os.path.exists(page + ".bak"):
            os.rename(page + ".bak", page)
        elif version_controlled(page):
            system("rcs -u " + page)

if __name__ == '__main__': 

    delete = nochange = reverse = 0

    (options, arguments) = getopt.getopt(sys.argv[1:], "dnr")

    if not arguments:
        arguments = ['.']

    for (switch, val) in options:
        if (switch == '-d'):
            delete = 1
        elif (switch == '-n'):
            nochange = 1
        elif (switch == '-r'):
            reverse = 1

    if delete:
        map(cleanup, arguments)
    elif reverse:
        map(unconvert, arguments)
    else:
        map(web2png, arguments)

# The following sets edit modes for GNU EMACS
# Local Variables:
# mode:python
# End:
