Welcome, guest | Sign In | My Account | Store | Cart

extract the texts from an XML-file and write it into an *.pot

Python, 101 lines
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
#!/usr/bin/env python
"""
    Modul:          xmlgettext.py
    Description:    Erzeugt aus dem Textinhalt einer XML-Datei ein *.pot
                    nicht zu extrahierende Texte koennen mit einem <!--!-->
                    Kommentar markiert werden.
    Version:        V1.0b
    Copyright:      2003 by Fritz Cizmarov fritz@sol.at
    License:        GPL
"""

from xml.parsers import expat
from htmlentitydefs import entitydefs

class XMLTextParser:
    def __init__(self, file):
        self.strings = []
        self.current = []
        self.no_translate = []
        
        self.parser = expat.ParserCreate()
        self.parser.StartElementHandler = self.Start_Elem_Handler
        self.parser.EndElementHandler = self.End_Elem_Handler
        self.parser.CharacterDataHandler = self.Char_Data_Handler
        self.parser.DefaultHandler = self.Default_Handler
        self.parser.CommentHandler = self.Comment_Handler

        fopen = 0
        if type(file) is str:
            file = open(file)
            fopen = 1
        self.parser.ParseFile(file)
        if fopen:
            file.close()

    def Start_Elem_Handler(self, name, attrs):
        if name != "br": # bei <br/> gibts nix zu tun
            nt = attrs.get("no_translate", "no") in ["yes", "true"]
            self.no_translate.append(nt)
            self.current.append("")
        
    def End_Elem_Handler(self, name):
        if name == "br": # bei <br/> ein nl anhaengen
            self.current[-1] += "\n"
        else:
            res = self.current.pop().strip()
            if res != "":
                self.strings.append(res)
            self.no_translate.pop()
    
    def Char_Data_Handler(self, data):
        if not self.no_translate[-1]:
            self.current[-1] += data.strip("\t").replace("\n", " ")

    def Default_Handler(self, data):
        if data[0] == "&" and data[-1] == ";":
            self.current[-1] += unicode(entitydefs[data[1:-1]], 'iso-8859-15')
        
    def Comment_Handler(self, data):
        if data == "!":
            self.no_translate[-1] = 1
    
import sys, time, locale

if len(sys.argv) == 1 or sys.argv[0] in ["-h","--help"]:
    print "Usage: xmlgettext.py infile [outfile]"
    sys.exit()
elif len(sys.argv) == 3:
    out = open(sys.argv[2],"w")
else:
    out = sys.stdout
    
p = XMLTextParser(sys.argv[1])

datetime = time.strftime(locale.nl_langinfo(locale.D_T_FMT))
codeset = locale.getdefaultlocale()

out.write("# SOME DESCRIPTIVE TITLE.\n")
out.write("# Copyright (C) YEAR ORGANIZATION\n")
out.write("# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n")
out.write("#\n")
out.write('msgid ""\n')
out.write('msgstr""\n')
out.write('"Project-Id-Version: PACKAGE VERSION\\n"\n')
out.write('"POT-Creation-Date: '+datetime+'\\n"\n')
out.write('"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n')
out.write('"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"\n')
out.write('"Language-Team: LANGUAGE <LL@li.org>\\n"\n')
out.write('"MIME-Version: 1.0\\n"\n')
out.write('"Content-Type: text/plain; charset='+codeset[1]+'\\n"\n')
out.write('"Content-Transfer-Encoding: 8bit\\n"\n')
out.write('"Generated-By: xmlgettext.py 1.4\\n"\n')


for string in p.strings:
    quote = '"' in string and "'" or '"'
    out.write('msgid '+quote+string.replace('\n', '\\n')+quote+'\n')
    out.write('msgstr '+quote*2+'\n'*2)

if out != sys.stdout:
    out.close()