extract the texts from an XML-file and write it into an *.pot
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #!/usr/bin/env python
"""
Modul: xmlgettext.py
Description: Erzeugt aus dem Textinhalt einer XML-Datei ein *.pot
nicht zu extrahierende Texte koennen mit einem <!--!-->
Kommentar markiert werden.
Version: V1.0b
Copyright: 2003 by Fritz Cizmarov fritz@sol.at
License: GPL
"""
from xml.parsers import expat
from htmlentitydefs import entitydefs
class XMLTextParser:
def __init__(self, file):
self.strings = []
self.current = []
self.no_translate = []
self.parser = expat.ParserCreate()
self.parser.StartElementHandler = self.Start_Elem_Handler
self.parser.EndElementHandler = self.End_Elem_Handler
self.parser.CharacterDataHandler = self.Char_Data_Handler
self.parser.DefaultHandler = self.Default_Handler
self.parser.CommentHandler = self.Comment_Handler
fopen = 0
if type(file) is str:
file = open(file)
fopen = 1
self.parser.ParseFile(file)
if fopen:
file.close()
def Start_Elem_Handler(self, name, attrs):
if name != "br": # bei <br/> gibts nix zu tun
nt = attrs.get("no_translate", "no") in ["yes", "true"]
self.no_translate.append(nt)
self.current.append("")
def End_Elem_Handler(self, name):
if name == "br": # bei <br/> ein nl anhaengen
self.current[-1] += "\n"
else:
res = self.current.pop().strip()
if res != "":
self.strings.append(res)
self.no_translate.pop()
def Char_Data_Handler(self, data):
if not self.no_translate[-1]:
self.current[-1] += data.strip("\t").replace("\n", " ")
def Default_Handler(self, data):
if data[0] == "&" and data[-1] == ";":
self.current[-1] += unicode(entitydefs[data[1:-1]], 'iso-8859-15')
def Comment_Handler(self, data):
if data == "!":
self.no_translate[-1] = 1
import sys, time, locale
if len(sys.argv) == 1 or sys.argv[0] in ["-h","--help"]:
print "Usage: xmlgettext.py infile [outfile]"
sys.exit()
elif len(sys.argv) == 3:
out = open(sys.argv[2],"w")
else:
out = sys.stdout
p = XMLTextParser(sys.argv[1])
datetime = time.strftime(locale.nl_langinfo(locale.D_T_FMT))
codeset = locale.getdefaultlocale()
out.write("# SOME DESCRIPTIVE TITLE.\n")
out.write("# Copyright (C) YEAR ORGANIZATION\n")
out.write("# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n")
out.write("#\n")
out.write('msgid ""\n')
out.write('msgstr""\n')
out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
out.write('"POT-Creation-Date: '+datetime+'\\n"\n')
out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
out.write('"MIME-Version: 1.0\\n"\n')
out.write('"Content-Type: text/plain; charset='+codeset[1]+'\\n"\n')
out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
out.write('"Generated-By: xmlgettext.py 1.4\\n"\n')
for string in p.strings:
quote = '"' in string and "'" or '"'
out.write('msgid '+quote+string.replace('\n', '\\n')+quote+'\n')
out.write('msgstr '+quote*2+'\n'*2)
if out != sys.stdout:
out.close()
|
Tags: xml