extract the texts from an XML-file and write it into an *.pot
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101 | #!/usr/bin/env python
"""
Modul: xmlgettext.py
Description: Erzeugt aus dem Textinhalt einer XML-Datei ein *.pot
nicht zu extrahierende Texte koennen mit einem <!--!-->
Kommentar markiert werden.
Version: V1.0b
Copyright: 2003 by Fritz Cizmarov fritz@sol.at
License: GPL
"""
from xml.parsers import expat
from htmlentitydefs import entitydefs
class XMLTextParser:
def __init__(self, file):
self.strings = []
self.current = []
self.no_translate = []
self.parser = expat.ParserCreate()
self.parser.StartElementHandler = self.Start_Elem_Handler
self.parser.EndElementHandler = self.End_Elem_Handler
self.parser.CharacterDataHandler = self.Char_Data_Handler
self.parser.DefaultHandler = self.Default_Handler
self.parser.CommentHandler = self.Comment_Handler
fopen = 0
if type(file) is str:
file = open(file)
fopen = 1
self.parser.ParseFile(file)
if fopen:
file.close()
def Start_Elem_Handler(self, name, attrs):
if name != "br": # bei <br/> gibts nix zu tun
nt = attrs.get("no_translate", "no") in ["yes", "true"]
self.no_translate.append(nt)
self.current.append("")
def End_Elem_Handler(self, name):
if name == "br": # bei <br/> ein nl anhaengen
self.current[-1] += "\n"
else:
res = self.current.pop().strip()
if res != "":
self.strings.append(res)
self.no_translate.pop()
def Char_Data_Handler(self, data):
if not self.no_translate[-1]:
self.current[-1] += data.strip("\t").replace("\n", " ")
def Default_Handler(self, data):
if data[0] == "&" and data[-1] == ";":
self.current[-1] += unicode(entitydefs[data[1:-1]], 'iso-8859-15')
def Comment_Handler(self, data):
if data == "!":
self.no_translate[-1] = 1
import sys, time, locale
if len(sys.argv) == 1 or sys.argv[0] in ["-h","--help"]:
print "Usage: xmlgettext.py infile [outfile]"
sys.exit()
elif len(sys.argv) == 3:
out = open(sys.argv[2],"w")
else:
out = sys.stdout
p = XMLTextParser(sys.argv[1])
datetime = time.strftime(locale.nl_langinfo(locale.D_T_FMT))
codeset = locale.getdefaultlocale()
out.write("# SOME DESCRIPTIVE TITLE.\n")
out.write("# Copyright (C) YEAR ORGANIZATION\n")
out.write("# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n")
out.write("#\n")
out.write('msgid ""\n')
out.write('msgstr""\n')
out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
out.write('"POT-Creation-Date: '+datetime+'\\n"\n')
out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
out.write('"MIME-Version: 1.0\\n"\n')
out.write('"Content-Type: text/plain; charset='+codeset[1]+'\\n"\n')
out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
out.write('"Generated-By: xmlgettext.py 1.4\\n"\n')
for string in p.strings:
quote = '"' in string and "'" or '"'
out.write('msgid '+quote+string.replace('\n', '\\n')+quote+'\n')
out.write('msgstr '+quote*2+'\n'*2)
if out != sys.stdout:
out.close()
|
Sign in to comment