#!/usr/bin/python

import re
import urllib2

modeller_version = '9v6'
urltop = 'http://salilab.org/modeller/%s/manual' % modeller_version

def get_node_range():
    linkre = re.compile('HREF="node(\d+)\.html')
    nodes = linkre.findall(urllib2.urlopen('%s/node1.html' % urltop).read())
    return (2, max(int(x) for x in nodes) + 1)

def get_link_targets():
    meths = {}
    classes = {}
    methre = re.compile('<A NAME="CMD:(\w+)\.\w+"><\/A><A NAME="\d+"><\/A><TT>(\w+)\(')
    classre = re.compile('<A NAME="CMD:[^.]+"><\/A><A NAME="\d+"><\/A><TT>(\w+)\(')
    for node in range(*get_node_range()):
        url = urllib2.urlopen('%s/node%d.html' % (urltop, node)).read()
        base = 'node%d.html' % node
        for cls in classre.findall(url):
            classes[cls] = base
        for (cls, meth) in methre.findall(url):
            if cls == 'sequencedb':
                cls = 'sequence_db'
            meths[(cls, meth)] = base
    return meths, classes

def main():
    meths, classes = get_link_targets()
    print "# Sphinx inventory version 1"
    print "# Project: modeller"
    print "# Version: %s" % modeller_version
    for (key, val) in classes.iteritems():
        print "%s class %s" % (key, val)
    for (key, val) in meths.iteritems():
        print "%s.%s method %s" % (key[0], key[1], val)

if __name__ == '__main__':
    main()
