Package translate :: Package convert :: Module dtd2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.dtd2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """script to convert a mozilla .dtd UTF-8 localization format to a 
 23  gettext .po localization file using the po and dtd modules, and the 
 24  dtd2po convertor class which is in this module 
 25  You can convert back to .dtd using po2dtd.py""" 
 26   
 27  from translate.storage import po 
 28  from translate.storage import dtd 
 29  from translate.misc import quote 
 30  from translate.convert import accesskey as accesskeyfn 
 31   
 32   
33 -def is_css_entity(entity):
34 """Says if the given entity is likely to contain CSS that should not be 35 translated.""" 36 if '.' in entity: 37 prefix, suffix = entity.rsplit('.', 1) 38 if suffix in ["height", "width", "unixWidth", "macWidth", "size"] or suffix.startswith("style"): 39 return True 40 return False
41 42
43 -class dtd2po:
44
45 - def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
46 self.currentgroup = None 47 self.blankmsgstr = blankmsgstr 48 self.duplicatestyle = duplicatestyle
49
50 - def convertcomments(self, thedtd, thepo):
51 entity = quote.rstripeol(thedtd.entity) 52 if len(entity) > 0: 53 thepo.addlocation(thedtd.entity) 54 for commenttype, comment in thedtd.comments: 55 # handle groups 56 if (commenttype == "locgroupstart"): 57 groupcomment = comment.replace('BEGIN', 'GROUP') 58 self.currentgroup = groupcomment 59 elif (commenttype == "locgroupend"): 60 groupcomment = comment.replace('END', 'GROUP') 61 self.currentgroup = None 62 # handle automatic comment 63 if commenttype == "automaticcomment": 64 thepo.addnote(comment, origin="developer") 65 # handle normal comments 66 else: 67 thepo.addnote(quote.stripcomment(comment), origin="developer") 68 # handle group stuff 69 if self.currentgroup is not None: 70 thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") 71 if is_css_entity(entity): 72 thepo.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
73
74 - def convertstrings(self, thedtd, thepo):
75 # extract the string, get rid of quoting 76 unquoted = dtd.unquotefromdtd(thedtd.definition).replace("\r", "") 77 # escape backslashes... but not if they're for a newline 78 # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n") 79 # now split the string into lines and quote them 80 lines = unquoted.split('\n') 81 while lines and not lines[0].strip(): 82 del lines[0] 83 while lines and not lines[-1].strip(): 84 del lines[-1] 85 # quotes have been escaped already by escapeforpo, so just add the start and end quotes 86 if len(lines) > 1: 87 thepo.source = "\n".join([lines[0].rstrip() + ' '] + \ 88 [line.strip() + ' ' for line in lines[1:-1]] + \ 89 [lines[-1].lstrip()]) 90 elif lines: 91 thepo.source = lines[0] 92 else: 93 thepo.source = "" 94 thepo.target = ""
95
96 - def convertunit(self, thedtd):
97 """converts a dtd unit to a po unit, returns None if empty or not for translation""" 98 if thedtd is None: 99 return None 100 if getattr(thedtd, "entityparameter", None) == "SYSTEM": 101 return None 102 thepo = po.pounit(encoding="UTF-8") 103 # remove unwanted stuff 104 for commentnum in range(len(thedtd.comments)): 105 commenttype, locnote = thedtd.comments[commentnum] 106 # if this is a localization note 107 if commenttype == 'locnote': 108 # parse the locnote into the entity and the actual note 109 typeend = quote.findend(locnote, 'LOCALIZATION NOTE') 110 # parse the id 111 idstart = locnote.find('(', typeend) 112 if idstart == -1: 113 continue 114 idend = locnote.find(')', idstart+1) 115 entity = locnote[idstart+1:idend].strip() 116 # parse the actual note 117 actualnotestart = locnote.find(':', idend+1) 118 actualnoteend = locnote.find('-->', idend) 119 actualnote = locnote[actualnotestart+1:actualnoteend].strip() 120 # if it's for this entity, process it 121 if thedtd.entity == entity: 122 # if it says don't translate (and nothing more), 123 if actualnote.startswith("DONT_TRANSLATE"): 124 # take out the entity,definition and the DONT_TRANSLATE comment 125 thedtd.entity = "" 126 thedtd.definition = "" 127 del thedtd.comments[commentnum] 128 # finished this for loop 129 break 130 else: 131 # convert it into an automatic comment, to be processed by convertcomments 132 thedtd.comments[commentnum] = ("automaticcomment", actualnote) 133 # do a standard translation 134 self.convertcomments(thedtd, thepo) 135 self.convertstrings(thedtd, thepo) 136 if thepo.isblank() and not thepo.getlocations(): 137 return None 138 else: 139 return thepo
140
141 - def convertmixedunit(self, labeldtd, accesskeydtd):
142 labelpo = self.convertunit(labeldtd) 143 accesskeypo = self.convertunit(accesskeydtd) 144 if labelpo is None: 145 return accesskeypo 146 if accesskeypo is None: 147 return labelpo 148 thepo = po.pounit(encoding="UTF-8") 149 thepo.addlocations(labelpo.getlocations()) 150 thepo.addlocations(accesskeypo.getlocations()) 151 thepo.msgidcomment = thepo._extract_msgidcomments() + labelpo._extract_msgidcomments() 152 thepo.msgidcomment = thepo._extract_msgidcomments() + accesskeypo._extract_msgidcomments() 153 thepo.addnote(labelpo.getnotes("developer"), "developer") 154 thepo.addnote(accesskeypo.getnotes("developer"), "developer") 155 thepo.addnote(labelpo.getnotes("translator"), "translator") 156 thepo.addnote(accesskeypo.getnotes("translator"), "translator") 157 # redo the strings from original dtd... 158 label = dtd.unquotefromdtd(labeldtd.definition).decode('UTF-8') 159 accesskey = dtd.unquotefromdtd(accesskeydtd.definition).decode('UTF-8') 160 label = accesskeyfn.combine(label, accesskey) 161 if label is None: 162 return None 163 thepo.source = label 164 thepo.target = "" 165 return thepo
166
167 - def findmixedentities(self, thedtdfile):
168 """creates self.mixedentities from the dtd file...""" 169 self.mixedentities = {} # those entities which have a .label/.title and .accesskey combined 170 for entity in thedtdfile.index.keys(): 171 for labelsuffix in dtd.labelsuffixes: 172 if entity.endswith(labelsuffix): 173 entitybase = entity[:entity.rfind(labelsuffix)] 174 # see if there is a matching accesskey in this line, making this a 175 # mixed entity 176 for akeytype in dtd.accesskeysuffixes: 177 if (entitybase + akeytype) in thedtdfile.index: 178 # add both versions to the list of mixed entities 179 self.mixedentities[entity] = {} 180 self.mixedentities[entitybase+akeytype] = {}
181 # check if this could be a mixed entity (labelsuffix and ".accesskey") 182
183 - def convertdtdunit(self, thedtdfile, thedtd, mixbucket="dtd"):
184 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way...""" 185 # keep track of whether accesskey and label were combined 186 if thedtd.entity in self.mixedentities: 187 # use special convertmixed unit which produces one pounit with 188 # both combined for the label and None for the accesskey 189 alreadymixed = self.mixedentities[thedtd.entity].get(mixbucket, None) 190 if alreadymixed: 191 # we are successfully throwing this away... 192 return None 193 elif alreadymixed is None: 194 # depending on what we come across first, work out the label and the accesskey 195 labeldtd, accesskeydtd = None, None 196 labelentity, accesskeyentity = None, None 197 for labelsuffix in dtd.labelsuffixes: 198 if thedtd.entity.endswith(labelsuffix): 199 entitybase = thedtd.entity[:thedtd.entity.rfind(labelsuffix)] 200 for akeytype in dtd.accesskeysuffixes: 201 if (entitybase + akeytype) in thedtdfile.index: 202 labelentity, labeldtd = thedtd.entity, thedtd 203 accesskeyentity = labelentity[:labelentity.rfind(labelsuffix)] + akeytype 204 accesskeydtd = thedtdfile.index[accesskeyentity] 205 break 206 else: 207 for akeytype in dtd.accesskeysuffixes: 208 if thedtd.entity.endswith(akeytype): 209 accesskeyentity, accesskeydtd = thedtd.entity, thedtd 210 for labelsuffix in dtd.labelsuffixes: 211 labelentity = accesskeyentity[:accesskeyentity.rfind(akeytype)] + labelsuffix 212 if labelentity in thedtdfile.index: 213 labeldtd = thedtdfile.index[labelentity] 214 break 215 else: 216 labelentity = None 217 accesskeyentity = None 218 thepo = self.convertmixedunit(labeldtd, accesskeydtd) 219 if thepo is not None: 220 if accesskeyentity is not None: 221 self.mixedentities[accesskeyentity][mixbucket] = True 222 if labelentity is not None: 223 self.mixedentities[labelentity][mixbucket] = True 224 return thepo 225 else: 226 # otherwise the mix failed. add each one separately and remember they weren't mixed 227 if accesskeyentity is not None: 228 self.mixedentities[accesskeyentity][mixbucket] = False 229 if labelentity is not None: 230 self.mixedentities[labelentity][mixbucket] = False 231 return self.convertunit(thedtd)
232
233 - def convertstore(self, thedtdfile):
234 thetargetfile = po.pofile() 235 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") 236 targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer") 237 238 thedtdfile.makeindex() 239 self.findmixedentities(thedtdfile) 240 # go through the dtd and convert each unit 241 for thedtd in thedtdfile.units: 242 if thedtd.isnull(): 243 continue 244 thepo = self.convertdtdunit(thedtdfile, thedtd) 245 if thepo is not None: 246 thetargetfile.addunit(thepo) 247 thetargetfile.removeduplicates(self.duplicatestyle) 248 return thetargetfile
249
250 - def mergestore(self, origdtdfile, translateddtdfile):
251 thetargetfile = po.pofile() 252 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") 253 targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer") 254 255 origdtdfile.makeindex() 256 self.findmixedentities(origdtdfile) 257 translateddtdfile.makeindex() 258 self.findmixedentities(translateddtdfile) 259 # go through the dtd files and convert each unit 260 for origdtd in origdtdfile.units: 261 if origdtd.isnull(): 262 continue 263 origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig") 264 if origdtd.entity in self.mixedentities: 265 mixedentitydict = self.mixedentities[origdtd.entity] 266 if "orig" not in mixedentitydict: 267 # this means that the entity is mixed in the translation, but not the original - treat as unmixed 268 mixbucket = "orig" 269 del self.mixedentities[origdtd.entity] 270 elif mixedentitydict["orig"]: 271 # the original entity is already mixed successfully 272 mixbucket = "translate" 273 else: 274 # ?? 275 mixbucket = "orig" 276 else: 277 mixbucket = "translate" 278 if origpo is None: 279 # this means its a mixed entity (with accesskey) that's already been dealt with) 280 continue 281 if origdtd.entity in translateddtdfile.index: 282 translateddtd = translateddtdfile.index[origdtd.entity] 283 translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket) 284 else: 285 translatedpo = None 286 if origpo is not None: 287 if translatedpo is not None and not self.blankmsgstr: 288 origpo.target = translatedpo.source 289 thetargetfile.addunit(origpo) 290 thetargetfile.removeduplicates(self.duplicatestyle) 291 return thetargetfile
292 293
294 -def convertdtd(inputfile, outputfile, templatefile, pot=False, duplicatestyle="msgctxt"):
295 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile""" 296 inputstore = dtd.dtdfile(inputfile) 297 convertor = dtd2po(blankmsgstr=pot, duplicatestyle=duplicatestyle) 298 if templatefile is None: 299 outputstore = convertor.convertstore(inputstore) 300 else: 301 templatestore = dtd.dtdfile(templatefile) 302 outputstore = convertor.mergestore(templatestore, inputstore) 303 if outputstore.isempty(): 304 return 0 305 outputfile.write(str(outputstore)) 306 return 1
307 308
309 -def main(argv=None):
310 from translate.convert import convert 311 formats = {"dtd": ("po", convertdtd), ("dtd", "dtd"): ("po", convertdtd)} 312 parser = convert.ConvertOptionParser(formats, usetemplates=True, usepots=True, description=__doc__) 313 parser.add_duplicates_option() 314 parser.passthrough.append("pot") 315 parser.run(argv)
316 317 318 if __name__ == '__main__': 319 main() 320