#!/usr/bin/env python2.6 # -*- coding: utf-8 -*- """ bumps-json.py Convert one or more XML results files in the format taken by the bumps XSLT stylesheet (root element bmp:results) to a JSON file. If the input contains more than one bmp:results (in one or more files) then the result result is an array of objects. Otherwise it is a single object representing the competition/class. Author: Ed Davies, 2010-05. """ import sys import xml.dom as dom import xml.dom.minidom as minidom import json bumpsNS = "tag:edavies.nildram.co.uk,2006-05-23:bumps" def attributes(el, attrList): d = dict() for a in attrList: if el.hasAttributeNS(None, a): d[a] = el.getAttributeNS(None, a) return d def doDay(el): """ Process a bmp:day element. """ r = attributes(el, ['day-no', 'date', 'subtitle']) r['day-no'] = int(r['day-no']) return r def doGliderDay(el): """ Process a bmp:glider-day element. """ r = attributes(el, ['day-no', 'ident', 'score', 'hc']) r['day-no'] = int(r['day-no']) r['score'] = int(r['score']) if 'hc' in r: r['hc'] = True return r def doGlider(el, parent): """ Process a bmp:glider element. """ r = attributes(el, ['ident', 'pilot', 'hc', 'href']) if 'hc' in r: r['hc'] = True days = [] for d in parent.getElementsByTagNameNS(bumpsNS, 'glider-day'): d = doGliderDay(d) if d['ident'] == r['ident']: d.pop('ident') dayNo = d['day-no'] while len(days) < dayNo: days.append(None) days[dayNo-1] = d r['days'] = days return r def doResults(el): """ Process a bmp:results element. """ r = attributes(el, ['comp', 'subtitle', 'href']) days = [] for d in el.getElementsByTagNameNS(bumpsNS, 'day'): day = doDay(d) dayNo = day['day-no'] while len(days) < dayNo: days.append(None) days[dayNo-1] = day r['days'] = days gliders = {} for g in el.getElementsByTagNameNS(bumpsNS, 'glider'): g = doGlider(g, el) gliders[g['ident']] = g r['gliders'] = gliders return r def doDocument(f): """ Process a single input document, generating zero or more dictionaries which will become JSON objects representing a competition or class. """ doc = minidom.parse(f) for r in doc.getElementsByTagNameNS(bumpsNS, 'results'): yield doResults(r) def doDocumentList(list): """ Process one or more input documents. These can be file-like objects or file names. Yields one dictionary (to become a JSON object) for each bmp:results element found. """ for f in list: for r in doDocument(f): yield r if __name__ == '__main__': files = sys.argv[1:] if len(files) < 1: files = [sys.stdin] results = list(doDocumentList(files)) if len(results) < 1: raise ValueError("No results found") if len(results) == 1: results = results[0] print 'setCompData(' print json.dumps(results, sort_keys=True, indent=4) print ')'