Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  # 
   4  # Copyright 2004-2011 Zuza Software Foundation 
   5  # 
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  # 
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation 
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check 
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the 
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  import re 
  34   
  35  from translate.filters import helpers 
  36  from translate.filters import decoration 
  37  from translate.filters import prefilters 
  38  from translate.filters import spelling 
  39  from translate.lang import factory 
  40  from translate.lang import data 
  41   
  42  # These are some regular expressions that are compiled for use in some tests 
  43   
  44  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't 
  45  # cover everything we leave \w instead of specifying the exact letters as 
  46  # this should capture printf types defined in other platforms. 
  47  # extended to support Python named format specifiers 
  48  printf_pat = re.compile(''' 
  49          %(                          # initial % 
  50                (?:(?P<ord>\d+)\$|    # variable order, like %1$s 
  51                \((?P<key>\w+)\))?    # Python style variables, like %(var)s 
  52          (?P<fullvar> 
  53              [+#-]*                  # flags 
  54              (?:\d+)?                # width 
  55              (?:\.\d+)?              # precision 
  56              (hh\|h\|l\|ll)?         # length formatting 
  57              (?P<type>[\w%]))        # type (%s, %d, etc.) 
  58          )''', re.VERBOSE) 
  59   
  60  # The name of the XML tag 
  61  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  62   
  63  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  64  #TODO: remove escaped strings once usage is audited 
  65  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  66   
  67  # The whole tag 
  68  tag_re = re.compile("<[^>]+>") 
  69   
  70  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  71   
  72   
73 -def tagname(string):
74 """Returns the name of the XML/HTML tag in string""" 75 return tagname_re.match(string).groups(1)[0]
76 77
78 -def intuplelist(pair, list):
79 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 80 list as wildcards (only allowed in positions "a" and "c"). We take a 81 shortcut by only considering "c" if "b" has already matched.""" 82 a, b, c = pair 83 if (b, c) == (None, None): 84 #This is a tagname 85 return pair 86 for pattern in list: 87 x, y, z = pattern 88 if (x, y) in [(a, b), (None, b)]: 89 if z in [None, c]: 90 return pattern 91 return pair
92 93
94 -def tagproperties(strings, ignore):
95 """Returns all the properties in the XML/HTML tag string as 96 (tagname, propertyname, propertyvalue), but ignore those combinations 97 specified in ignore.""" 98 properties = [] 99 for string in strings: 100 tag = tagname(string) 101 properties += [(tag, None, None)] 102 #Now we isolate the attribute pairs. 103 pairs = property_re.findall(string) 104 for property, value, a, b in pairs: 105 #Strip the quotes: 106 value = value[1:-1] 107 108 canignore = False 109 if (tag, property, value) in ignore or \ 110 intuplelist((tag, property, value), ignore) != (tag, property, value): 111 canignore = True 112 break 113 if not canignore: 114 properties += [(tag, property, value)] 115 return properties
116 117
118 -class FilterFailure(Exception):
119 """This exception signals that a Filter didn't pass, and gives an 120 explanation or a comment""" 121
122 - def __init__(self, messages):
123 if not isinstance(messages, list): 124 messages = [messages] 125 assert isinstance(messages[0], unicode) # Assumption: all of same type 126 self.messages = messages
127
128 - def __unicode__(self):
129 return unicode(u", ".join(self.messages))
130
131 - def __str__(self):
132 return str(u", ".join(self.messages))
133 134
135 -class SeriousFilterFailure(FilterFailure):
136 """This exception signals that a Filter didn't pass, and the bad translation 137 might break an application (so the string will be marked fuzzy)""" 138 pass
139 140 #(tag, attribute, value) specifies a certain attribute which can be changed/ 141 #ignored if it exists inside tag. In the case where there is a third element 142 #in the tuple, it indicates a property value that can be ignored if present 143 #(like defaults, for example) 144 #If a certain item is None, it indicates that it is relevant for all values of 145 #the property/tag that is specified as None. A non-None value of "value" 146 #indicates that the value of the attribute must be taken into account. 147 common_ignoretags = [(None, "xml-lang", None)] 148 common_canchangetags = [("img", "alt", None), 149 (None, "title", None), 150 (None, "dir", None), 151 (None, "lang", None), 152 ] 153 # Actually the title tag is allowed on many tags in HTML (but probably not all) 154 155
156 -class CheckerConfig(object):
157 """object representing the configuration of a checker""" 158
159 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 160 notranslatewords=None, musttranslatewords=None, 161 validchars=None, punctuation=None, endpunctuation=None, 162 ignoretags=None, canchangetags=None, criticaltests=None, 163 credit_sources=None):
164 # Init lists 165 self.accelmarkers = self._init_list(accelmarkers) 166 self.varmatches = self._init_list(varmatches) 167 self.criticaltests = self._init_list(criticaltests) 168 self.credit_sources = self._init_list(credit_sources) 169 # Lang data 170 self.targetlanguage = targetlanguage 171 self.updatetargetlanguage(targetlanguage) 172 self.sourcelang = factory.getlanguage('en') 173 # Inits with default values 174 self.punctuation = self._init_default(data.normalized_unicode(punctuation), 175 self.lang.punctuation) 176 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), 177 self.lang.sentenceend) 178 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 179 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 180 # Other data 181 # TODO: allow user configuration of untranslatable words 182 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 183 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 184 validchars = data.normalized_unicode(validchars) 185 self.validcharsmap = {} 186 self.updatevalidchars(validchars)
187
188 - def _init_list(self, list):
189 """initialise configuration paramaters that are lists 190 191 @type list: List 192 @param list: None (we'll initialise a blank list) or a list paramater 193 @rtype: List 194 """ 195 if list is None: 196 list = [] 197 return list
198
199 - def _init_default(self, param, default):
200 """initialise parameters that can have default options 201 202 @param param: the user supplied paramater value 203 @param default: default values when param is not specified 204 @return: the paramater as specified by the user of the default settings 205 """ 206 if param is None: 207 return default 208 return param
209
210 - def update(self, otherconfig):
211 """combines the info in otherconfig into this config object""" 212 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 213 self.updatetargetlanguage(self.targetlanguage) 214 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 215 self.varmatches.extend(otherconfig.varmatches) 216 self.notranslatewords.update(otherconfig.notranslatewords) 217 self.musttranslatewords.update(otherconfig.musttranslatewords) 218 self.validcharsmap.update(otherconfig.validcharsmap) 219 self.punctuation += otherconfig.punctuation 220 self.endpunctuation += otherconfig.endpunctuation 221 #TODO: consider also updating in the following cases: 222 self.ignoretags = otherconfig.ignoretags 223 self.canchangetags = otherconfig.canchangetags 224 self.criticaltests.extend(otherconfig.criticaltests) 225 self.credit_sources = otherconfig.credit_sources
226
227 - def updatevalidchars(self, validchars):
228 """updates the map that eliminates valid characters""" 229 if validchars is None: 230 return True 231 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 232 self.validcharsmap.update(validcharsmap)
233
234 - def updatetargetlanguage(self, langcode):
235 """Updates the target language in the config to the given target 236 language""" 237 self.lang = factory.getlanguage(langcode)
238 239
240 -def cache_results(f):
241 242 def cached_f(self, param1): 243 key = (f.__name__, param1) 244 res_cache = self.results_cache 245 if key in res_cache: 246 return res_cache[key] 247 else: 248 value = f(self, param1) 249 res_cache[key] = value 250 return value
251 return cached_f 252 253
254 -class UnitChecker(object):
255 """Parent Checker class which does the checking based on functions available 256 in derived classes.""" 257 preconditions = {} 258
259 - def __init__(self, checkerconfig=None, excludefilters=None, 260 limitfilters=None, errorhandler=None):
261 self.errorhandler = errorhandler 262 if checkerconfig is None: 263 self.setconfig(CheckerConfig()) 264 else: 265 self.setconfig(checkerconfig) 266 # exclude functions defined in UnitChecker from being treated as tests. 267 self.helperfunctions = {} 268 for functionname in dir(UnitChecker): 269 function = getattr(self, functionname) 270 if callable(function): 271 self.helperfunctions[functionname] = function 272 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 273 self.results_cache = {}
274
275 - def getfilters(self, excludefilters=None, limitfilters=None):
276 """returns dictionary of available filters, including/excluding those in 277 the given lists""" 278 filters = {} 279 if limitfilters is None: 280 # use everything available unless instructed 281 limitfilters = dir(self) 282 if excludefilters is None: 283 excludefilters = {} 284 for functionname in limitfilters: 285 if functionname in excludefilters: 286 continue 287 if functionname in self.helperfunctions: 288 continue 289 if functionname == "errorhandler": 290 continue 291 filterfunction = getattr(self, functionname, None) 292 if not callable(filterfunction): 293 continue 294 filters[functionname] = filterfunction 295 return filters
296
297 - def setconfig(self, config):
298 """sets the accelerator list""" 299 self.config = config 300 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 301 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 302 for startmatch, endmatch in self.config.varmatches] 303 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, 304 prefilters.varnone) 305 for startmatch, endmatch in self.config.varmatches]
306
307 - def setsuggestionstore(self, store):
308 """Sets the filename that a checker should use for evaluating 309 suggestions.""" 310 self.suggestion_store = store 311 if self.suggestion_store: 312 self.suggestion_store.require_index()
313
314 - def filtervariables(self, str1):
315 """filter out variables from str1""" 316 return helpers.multifilter(str1, self.varfilters)
317 filtervariables = cache_results(filtervariables) 318
319 - def removevariables(self, str1):
320 """remove variables from str1""" 321 return helpers.multifilter(str1, self.removevarfilter)
322 removevariables = cache_results(removevariables) 323
324 - def filteraccelerators(self, str1):
325 """filter out accelerators from str1""" 326 return helpers.multifilter(str1, self.accfilters, None)
327 filteraccelerators = cache_results(filteraccelerators) 328
329 - def filteraccelerators_by_list(self, str1, acceptlist=None):
330 """filter out accelerators from str1""" 331 return helpers.multifilter(str1, self.accfilters, acceptlist)
332
333 - def filterwordswithpunctuation(self, str1):
334 """replaces words with punctuation with their unpunctuated 335 equivalents""" 336 return prefilters.filterwordswithpunctuation(str1)
337 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 338
339 - def filterxml(self, str1):
340 """filter out XML from the string so only text remains""" 341 return tag_re.sub("", str1)
342 filterxml = cache_results(filterxml) 343
344 - def run_test(self, test, unit):
345 """Runs the given test on the given unit. 346 347 Note that this can raise a FilterFailure as part of normal operation""" 348 return test(unit)
349
350 - def run_filters(self, unit):
351 """run all the tests in this suite, return failures as testname, 352 message_or_exception""" 353 self.results_cache = {} 354 failures = {} 355 ignores = self.config.lang.ignoretests[:] 356 functionnames = self.defaultfilters.keys() 357 priorityfunctionnames = self.preconditions.keys() 358 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 359 for functionname in priorityfunctionnames + otherfunctionnames: 360 if functionname in ignores: 361 continue 362 filterfunction = getattr(self, functionname, None) 363 # this filterfunction may only be defined on another checker if 364 # using TeeChecker 365 if filterfunction is None: 366 continue 367 filtermessage = filterfunction.__doc__ 368 try: 369 filterresult = self.run_test(filterfunction, unit) 370 except FilterFailure, e: 371 filterresult = False 372 filtermessage = unicode(e) 373 except Exception, e: 374 if self.errorhandler is None: 375 raise ValueError("error in filter %s: %r, %r, %s" % \ 376 (functionname, unit.source, unit.target, e)) 377 else: 378 filterresult = self.errorhandler(functionname, unit.source, 379 unit.target, e) 380 if not filterresult: 381 # we test some preconditions that aren't actually a cause for 382 # failure 383 if functionname in self.defaultfilters: 384 failures[functionname] = filtermessage 385 if functionname in self.preconditions: 386 for ignoredfunctionname in self.preconditions[functionname]: 387 ignores.append(ignoredfunctionname) 388 self.results_cache = {} 389 return failures
390 391
392 -class TranslationChecker(UnitChecker):
393 """A checker that passes source and target strings to the checks, not the 394 whole unit. 395 396 This provides some speedup and simplifies testing.""" 397
398 - def __init__(self, checkerconfig=None, excludefilters=None, 399 limitfilters=None, errorhandler=None):
400 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, 401 limitfilters, errorhandler)
402
403 - def run_test(self, test, unit):
404 """Runs the given test on the given unit. 405 406 Note that this can raise a FilterFailure as part of normal operation.""" 407 if self.hasplural: 408 filtermessages = [] 409 filterresult = True 410 for pluralform in unit.target.strings: 411 try: 412 if not test(self.str1, unicode(pluralform)): 413 filterresult = False 414 except FilterFailure, e: 415 filterresult = False 416 filtermessages.extend(e.messages) 417 if not filterresult and filtermessages: 418 raise FilterFailure(filtermessages) 419 else: 420 return filterresult 421 else: 422 return test(self.str1, self.str2)
423
424 - def run_filters(self, unit):
425 """Do some optimisation by caching some data of the unit for the benefit 426 of run_test().""" 427 self.str1 = data.normalized_unicode(unit.source) or u"" 428 self.str2 = data.normalized_unicode(unit.target) or u"" 429 self.hasplural = unit.hasplural() 430 self.locations = unit.getlocations() 431 return super(TranslationChecker, self).run_filters(unit)
432 433
434 -class TeeChecker:
435 """A Checker that controls multiple checkers.""" 436
437 - def __init__(self, checkerconfig=None, excludefilters=None, 438 limitfilters=None, checkerclasses=None, errorhandler=None, 439 languagecode=None):
440 """construct a TeeChecker from the given checkers""" 441 self.limitfilters = limitfilters 442 if checkerclasses is None: 443 checkerclasses = [StandardChecker] 444 self.checkers = [checkerclass(checkerconfig=checkerconfig, 445 excludefilters=excludefilters, 446 limitfilters=limitfilters, 447 errorhandler=errorhandler) for checkerclass in checkerclasses] 448 if languagecode: 449 for checker in self.checkers: 450 checker.config.updatetargetlanguage(languagecode) 451 # Let's hook up the language specific checker 452 lang_checker = self.checkers[0].config.lang.checker 453 if lang_checker: 454 self.checkers.append(lang_checker) 455 456 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 457 self.config = checkerconfig or self.checkers[0].config
458
459 - def getfilters(self, excludefilters=None, limitfilters=None):
460 """returns dictionary of available filters, including/excluding those in 461 the given lists""" 462 if excludefilters is None: 463 excludefilters = {} 464 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 465 self.combinedfilters = {} 466 for filters in filterslist: 467 self.combinedfilters.update(filters) 468 # TODO: move this somewhere more sensible (a checkfilters method?) 469 if limitfilters is not None: 470 for filtername in limitfilters: 471 if not filtername in self.combinedfilters: 472 import sys 473 print >> sys.stderr, "warning: could not find filter %s" % filtername 474 return self.combinedfilters
475
476 - def run_filters(self, unit):
477 """run all the tests in the checker's suites""" 478 failures = {} 479 for checker in self.checkers: 480 failures.update(checker.run_filters(unit)) 481 return failures
482
483 - def setsuggestionstore(self, store):
484 """Sets the filename that a checker should use for evaluating 485 suggestions.""" 486 for checker in self.checkers: 487 checker.setsuggestionstore(store)
488 489
490 -class StandardChecker(TranslationChecker):
491 """The basic test suite for source -> target translations.""" 492
493 - def untranslated(self, str1, str2):
494 """checks whether a string has been translated at all""" 495 str2 = prefilters.removekdecomments(str2) 496 return not (len(str1.strip()) > 0 and len(str2) == 0)
497
498 - def unchanged(self, str1, str2):
499 """checks whether a translation is basically identical to the original 500 string""" 501 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 502 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 503 if len(str1) < 2: 504 return True 505 # If the whole string is upperase, or nothing in the string can go 506 # towards uppercase, let's assume there is nothing translatable 507 # TODO: reconsider 508 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 509 return True 510 if self.config.notranslatewords: 511 words1 = str1.split() 512 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 513 #currently equivalent to: 514 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 515 #why do we only test for one notranslate word? 516 return True 517 # we could also check for things like str1.isnumeric(), but the test 518 # above (str1.upper() == str1) makes this unnecessary 519 if str1.lower() == str2.lower(): 520 raise FilterFailure(u"Consider translating") 521 return True
522
523 - def blank(self, str1, str2):
524 """checks whether a translation only contains spaces""" 525 len1 = len(str1.strip()) 526 len2 = len(str2.strip()) 527 if len1 > 0 and len(str2) != 0 and len2 == 0: 528 raise FilterFailure(u"Translation is empty") 529 else: 530 return True
531
532 - def short(self, str1, str2):
533 """checks whether a translation is much shorter than the original 534 string""" 535 len1 = len(str1.strip()) 536 len2 = len(str2.strip()) 537 if (len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)): 538 raise FilterFailure(u"The translation is much shorter than the original") 539 else: 540 return True
541
542 - def long(self, str1, str2):
543 """checks whether a translation is much longer than the original 544 string""" 545 len1 = len(str1.strip()) 546 len2 = len(str2.strip()) 547 if (len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)): 548 raise FilterFailure(u"The translation is much longer than the original") 549 else: 550 return True
551
552 - def escapes(self, str1, str2):
553 """checks whether escaping is consistent between the two strings""" 554 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 555 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 556 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 557 raise SeriousFilterFailure(u"Escapes in original (%s) don't match " 558 "escapes in translation (%s)" % 559 (escapes1, escapes2)) 560 else: 561 return True
562
563 - def newlines(self, str1, str2):
564 """checks whether newlines are consistent between the two strings""" 565 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 566 raise FilterFailure(u"Different line endings") 567 else: 568 return True
569
570 - def tabs(self, str1, str2):
571 """checks whether tabs are consistent between the two strings""" 572 if not helpers.countmatch(str1, str2, "\t"): 573 raise SeriousFilterFailure(u"Different tabs") 574 else: 575 return True
576
577 - def singlequoting(self, str1, str2):
578 """checks whether singlequoting is consistent between the two strings""" 579 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 580 str1 = self.config.lang.punctranslate(str1) 581 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 582 if helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'")): 583 return True 584 else: 585 raise FilterFailure(u"Different quotation marks")
586
587 - def doublequoting(self, str1, str2):
588 """checks whether doublequoting is consistent between the two strings""" 589 str1 = self.filteraccelerators(self.filtervariables(str1)) 590 str1 = self.filterxml(str1) 591 str1 = self.config.lang.punctranslate(str1) 592 str2 = self.filteraccelerators(self.filtervariables(str2)) 593 str2 = self.filterxml(str2) 594 if helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", 595 u"»", u"“", u"”")): 596 return True 597 else: 598 raise FilterFailure(u"Different quotation marks")
599
600 - def doublespacing(self, str1, str2):
601 """checks for bad double-spaces by comparing to original""" 602 str1 = self.filteraccelerators(str1) 603 str2 = self.filteraccelerators(str2) 604 if helpers.countmatch(str1, str2, u" "): 605 return True 606 else: 607 raise FilterFailure(u"Different use of double spaces")
608
609 - def puncspacing(self, str1, str2):
610 """checks for bad spacing after punctuation""" 611 # Convert all nbsp to space, and just check spaces. Useful intermediate 612 # step to stricter nbsp checking? 613 str1 = self.filteraccelerators(self.filtervariables(str1)) 614 str1 = self.config.lang.punctranslate(str1) 615 str1 = str1.replace(u"\u00a0", u" ") 616 if str1.find(u" ") == -1: 617 return True 618 str2 = self.filteraccelerators(self.filtervariables(str2)) 619 str2 = str2.replace(u"\u00a0", u" ") 620 for puncchar in self.config.punctuation: 621 plaincount1 = str1.count(puncchar) 622 if not plaincount1: 623 continue 624 plaincount2 = str2.count(puncchar) 625 if plaincount1 != plaincount2: 626 continue 627 spacecount1 = str1.count(puncchar + u" ") 628 spacecount2 = str2.count(puncchar + u" ") 629 if spacecount1 != spacecount2: 630 # handle extra spaces that are because of transposed punctuation 631 if abs(spacecount1 - spacecount2) == 1 and str1.endswith(puncchar) != str2.endswith(puncchar): 632 continue 633 raise FilterFailure(u"Different spacing around punctuation") 634 return True
635
636 - def printf(self, str1, str2):
637 """checks whether printf format strings match""" 638 count1 = count2 = plural = None 639 # self.hasplural only set by run_filters, not always available 640 if 'hasplural' in self.__dict__: 641 plural = self.hasplural 642 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 643 count2 = var_num2 + 1 644 str2ord = match2.group('ord') 645 str2key = match2.group('key') 646 if str2ord: 647 str1ord = None 648 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 649 count1 = var_num1 + 1 650 if match1.group('ord'): 651 if str2ord == match1.group('ord'): 652 str1ord = str2ord 653 if match2.group('fullvar') != match1.group('fullvar'): 654 raise FilterFailure(u"Different printf variable: %s" % match2.group()) 655 elif int(str2ord) == var_num1 + 1: 656 str1ord = str2ord 657 if match2.group('fullvar') != match1.group('fullvar'): 658 raise FilterFailure(u"Different printf variable: %s" % match2.group()) 659 if str1ord == None: 660 raise FilterFailure(u"Added printf variable: %s" % match2.group()) 661 elif str2key: 662 str1key = None 663 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 664 count1 = var_num1 + 1 665 if match1.group('key') and str2key == match1.group('key'): 666 str1key = match1.group('key') 667 # '%.0s' "placeholder" in plural will match anything 668 if plural and match2.group('fullvar') == '.0s': 669 continue 670 if match1.group('fullvar') != match2.group('fullvar'): 671 raise FilterFailure(u"Different printf variable: %s" % match2.group()) 672 if str1key == None: 673 raise FilterFailure(u"Added printf variable: %s" % match2.group()) 674 else: 675 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 676 count1 = var_num1 + 1 677 # '%.0s' "placeholder" in plural will match anything 678 if plural and match2.group('fullvar') == '.0s': 679 continue 680 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 681 raise FilterFailure(u"Different printf variable: %s" % match2.group()) 682 683 if count2 is None: 684 str1_variables = list(m.group() for m in printf_pat.finditer(str1)) 685 if str1_variables: 686 raise FilterFailure(u"Missing printf variable: %s" % u", ".join(str1_variables)) 687 688 if (count1 or count2) and (count1 != count2): 689 raise FilterFailure(u"Different number of printf variables") 690 return 1
691
692 - def accelerators(self, str1, str2):
693 """checks whether accelerators are consistent between the two strings""" 694 str1 = self.filtervariables(str1) 695 str2 = self.filtervariables(str2) 696 messages = [] 697 for accelmarker in self.config.accelmarkers: 698 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 699 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 700 count1, countbad1 = counter1(str1) 701 count2, countbad2 = counter2(str2) 702 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 703 accel2, bad2 = getaccel(str2) 704 if count1 == count2: 705 continue 706 if count1 == 1 and count2 == 0: 707 if countbad2 == 1: 708 messages.append(u"Accelerator '%s' appears before an invalid " 709 "accelerator character '%s'" % 710 (accelmarker, bad2[0])) 711 else: 712 messages.append(u"Missing accelerator '%s'" % 713 accelmarker) 714 elif count1 == 0: 715 messages.append(u"Added accelerator '%s'" % accelmarker) 716 elif count1 == 1 and count2 > count1: 717 messages.append(u"Accelerator '%s' is repeated in translation" % 718 accelmarker) 719 else: 720 messages.append(u"Accelerator '%s' occurs %d time(s) in original " 721 "and %d time(s) in translation" % 722 (accelmarker, count1, count2)) 723 if messages: 724 if "accelerators" in self.config.criticaltests: 725 raise SeriousFilterFailure(messages) 726 else: 727 raise FilterFailure(messages) 728 return True
729 730 # def acceleratedvariables(self, str1, str2): 731 # """checks that no variables are accelerated""" 732 # messages = [] 733 # for accelerator in self.config.accelmarkers: 734 # for variablestart, variableend in self.config.varmatches: 735 # error = accelerator + variablestart 736 # if str1.find(error) >= 0: 737 # messages.append(u"original has an accelerated variable") 738 # if str2.find(error) >= 0: 739 # messages.append(u"translation has an accelerated variable") 740 # if messages: 741 # raise FilterFailure(messages) 742 # return True 743
744 - def variables(self, str1, str2):
745 """checks whether variables of various forms are consistent between the 746 two strings""" 747 messages = [] 748 mismatch1, mismatch2 = [], [] 749 varnames1, varnames2 = [], [] 750 for startmarker, endmarker in self.config.varmatches: 751 varchecker = decoration.getvariables(startmarker, endmarker) 752 if startmarker and endmarker: 753 if isinstance(endmarker, int): 754 redecorate = lambda var: startmarker + var 755 else: 756 redecorate = lambda var: startmarker + var + endmarker 757 elif startmarker: 758 redecorate = lambda var: startmarker + var 759 else: 760 redecorate = lambda var: var 761 vars1 = varchecker(str1) 762 vars2 = varchecker(str2) 763 if vars1 != vars2: 764 # we use counts to compare so we can handle multiple variables 765 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], \ 766 [var for var in vars2 if vars1.count(var) < vars2.count(var)] 767 # filter variable names we've already seen, so they aren't 768 # matched by more than one filter... 769 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 770 varnames1.extend(vars1) 771 varnames2.extend(vars2) 772 vars1 = map(redecorate, vars1) 773 vars2 = map(redecorate, vars2) 774 mismatch1.extend(vars1) 775 mismatch2.extend(vars2) 776 if mismatch1: 777 messages.append(u"Do not translate: %s" % u", ".join(mismatch1)) 778 elif mismatch2: 779 messages.append(u"Added variables: %s" % u", ".join(mismatch2)) 780 if messages and mismatch1: 781 raise SeriousFilterFailure(messages) 782 elif messages: 783 raise FilterFailure(messages) 784 return True
785
786 - def functions(self, str1, str2):
787 """checks that function names are not translated""" 788 # We can't just use helpers.funcmatch() since it doesn't ignore order 789 if not set(decoration.getfunctions(str1)).symmetric_difference(set(decoration.getfunctions(str2))): 790 return True 791 else: 792 raise FilterFailure(u"Different functions")
793
794 - def emails(self, str1, str2):
795 """checks that emails are not translated""" 796 if helpers.funcmatch(str1, str2, decoration.getemails): 797 return True 798 else: 799 raise FilterFailure(u"Different e-mails")
800
801 - def urls(self, str1, str2):
802 """checks that URLs are not translated""" 803 if helpers.funcmatch(str1, str2, decoration.geturls): 804 return True 805 else: 806 raise FilterFailure(u"Different URLs")
807
808 - def numbers(self, str1, str2):
809 """checks whether numbers of various forms are consistent between the 810 two strings""" 811 if helpers.countsmatch(str1, str2, decoration.getnumbers(str1)): 812 return True 813 else: 814 raise FilterFailure(u"Different numbers")
815
816 - def startwhitespace(self, str1, str2):
817 """checks whether whitespace at the beginning of the strings matches""" 818 if helpers.funcmatch(str1, str2, decoration.spacestart): 819 return True 820 else: 821 raise FilterFailure(u"Different whitespace at the start")
822
823 - def endwhitespace(self, str1, str2):
824 """checks whether whitespace at the end of the strings matches""" 825 str1 = self.config.lang.punctranslate(str1) 826 if helpers.funcmatch(str1, str2, decoration.spaceend): 827 return True 828 else: 829 raise FilterFailure(u"Different whitespace at the end")
830
831 - def startpunc(self, str1, str2):
832 """checks whether punctuation at the beginning of the strings match""" 833 str1 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1)))) 834 str1 = self.config.lang.punctranslate(str1) 835 str2 = self.filterxml(self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2)))) 836 if helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation): 837 return True 838 else: 839 raise FilterFailure(u"Different punctuation at the start")
840
841 - def endpunc(self, str1, str2):
842 """checks whether punctuation at the end of the strings match""" 843 str1 = self.filtervariables(str1) 844 str1 = self.config.lang.punctranslate(str1) 845 str2 = self.filtervariables(str2) 846 str1 = str1.rstrip() 847 str2 = str2.rstrip() 848 if helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":"): 849 return True 850 else: 851 raise FilterFailure(u"Different punctuation at the end")
852
853 - def purepunc(self, str1, str2):
854 """checks that strings that are purely punctuation are not changed""" 855 # this test is a subset of startandend 856 if (decoration.ispurepunctuation(str1)): 857 success = str1 == str2 858 else: 859 success = not decoration.ispurepunctuation(str2) 860 if success: 861 return True 862 else: 863 raise FilterFailure(u"Consider not translating punctuation")
864
865 - def brackets(self, str1, str2):
866 """checks that the number of brackets in both strings match""" 867 str1 = self.filtervariables(str1) 868 str2 = self.filtervariables(str2) 869 messages = [] 870 missing = [] 871 extra = [] 872 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 873 count1 = str1.count(bracket) 874 count2 = str2.count(bracket) 875 if count2 < count1: 876 missing.append(u"'%s'" % bracket) 877 elif count2 > count1: 878 extra.append(u"'%s'" % bracket) 879 if missing: 880 messages.append(u"Missing %s" % u", ".join(missing)) 881 if extra: 882 messages.append(u"Added %s" % u", ".join(extra)) 883 if messages: 884 raise FilterFailure(messages) 885 return True
886
887 - def sentencecount(self, str1, str2):
888 """checks that the number of sentences in both strings match""" 889 str1 = self.filteraccelerators(str1) 890 str2 = self.filteraccelerators(str2) 891 sentences1 = len(self.config.sourcelang.sentences(str1)) 892 sentences2 = len(self.config.lang.sentences(str2)) 893 if not sentences1 == sentences2: 894 raise FilterFailure(u"Different number of sentences: " 895 u"%d ≠ %d" % (sentences1, sentences2)) 896 return True
897
898 - def options(self, str1, str2):
899 """checks that options are not translated""" 900 str1 = self.filtervariables(str1) 901 for word1 in str1.split(): 902 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 903 parts = word1.split(u"=") 904 if not parts[0] in str2: 905 raise FilterFailure(u"Missing or translated option '%s'" % parts[0]) 906 if len(parts) > 1 and parts[1] in str2: 907 raise FilterFailure(u"Consider translating parameter " 908 u"'%(param)s' of option '%(option)s'" 909 % {"param": parts[1], 910 "option": parts[0]}) 911 return True
912
913 - def startcaps(self, str1, str2):
914 """checks that the message starts with the correct capitalisation""" 915 str1 = self.filteraccelerators(str1) 916 str2 = self.filteraccelerators(str2) 917 if len(str1) > 1 and len(str2) > 1: 918 if self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2): 919 return True 920 else: 921 raise FilterFailure(u"Different capitalization at the start") 922 if len(str1) == 0 and len(str2) == 0: 923 return True 924 if len(str1) == 0 or len(str2) == 0: 925 raise FilterFailure(u"Different capitalization at the start") 926 return True
927
928 - def simplecaps(self, str1, str2):
929 """checks the capitalisation of two strings isn't wildly different""" 930 str1 = self.removevariables(str1) 931 str2 = self.removevariables(str2) 932 # TODO: review this. The 'I' is specific to English, so it probably 933 # serves no purpose to get sourcelang.sentenceend 934 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 935 capitals1 = helpers.filtercount(str1, unicode.isupper) 936 capitals2 = helpers.filtercount(str2, unicode.isupper) 937 alpha1 = helpers.filtercount(str1, unicode.isalpha) 938 alpha2 = helpers.filtercount(str2, unicode.isalpha) 939 # Capture the all caps case 940 if capitals1 == alpha1: 941 if capitals2 == alpha2: 942 return True 943 else: 944 raise FilterFailure(u"Different capitalization") 945 # some heuristic tests to try and see that the style of capitals is 946 # vaguely the same 947 if capitals1 == 0 or capitals1 == 1: 948 success = capitals2 == capitals1 949 elif capitals1 < len(str1) / 10: 950 success = capitals2 <= len(str2) / 8 951 elif len(str1) < 10: 952 success = abs(capitals1 - capitals2) < 3 953 elif capitals1 > len(str1) * 6 / 10: 954 success = capitals2 > len(str2) * 6 / 10 955 else: 956 success = abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6 957 if success: 958 return True 959 else: 960 raise FilterFailure(u"Different capitalization")
961
962 - def acronyms(self, str1, str2):
963 """checks that acronyms that appear are unchanged""" 964 acronyms = [] 965 allowed = [] 966 for startmatch, endmatch in self.config.varmatches: 967 allowed += decoration.getvariables(startmatch, endmatch)(str1) 968 allowed += self.config.musttranslatewords.keys() 969 str1 = self.filteraccelerators(self.filtervariables(str1)) 970 iter = self.config.lang.word_iter(str1) 971 str2 = self.filteraccelerators(self.filtervariables(str2)) 972 #TODO: strip XML? - should provide better error messsages 973 # see mail/chrome/messanger/smime.properties.po 974 #TODO: consider limiting the word length for recognising acronyms to 975 #something like 5/6 characters 976 for word in iter: 977 if word.isupper() and len(word) > 1 and word not in allowed: 978 if str2.find(word) == -1: 979 acronyms.append(word) 980 if acronyms: 981 raise FilterFailure(u"Consider not translating acronyms: %s" % 982 u", ".join(acronyms)) 983 return True
984
985 - def doublewords(self, str1, str2):
986 """checks for repeated words in the translation""" 987 lastword = "" 988 without_newlines = "\n".join(str2.split("\n")) 989 words = self.filteraccelerators(self.removevariables(self.filterxml(without_newlines))).replace(u".", u"").lower().split() 990 for word in words: 991 if word == lastword and word not in self.config.lang.validdoublewords: 992 raise FilterFailure(u"The word '%s' is repeated" % word) 993 lastword = word 994 return True
995
996 - def notranslatewords(self, str1, str2):
997 """checks that words configured as untranslatable appear in the 998 translation too""" 999 if not self.config.notranslatewords: 1000 return True 1001 str1 = self.filtervariables(str1) 1002 str2 = self.filtervariables(str2) 1003 #The above is full of strange quotes and things in utf-8 encoding. 1004 #single apostrophe perhaps problematic in words like "doesn't" 1005 for seperator in self.config.punctuation: 1006 str1 = str1.replace(seperator, u" ") 1007 str2 = str2.replace(seperator, u" ") 1008 words1 = self.filteraccelerators(str1).split() 1009 words2 = self.filteraccelerators(str2).split() 1010 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1011 if stopwords: 1012 raise FilterFailure(u"Do not translate: %s" % 1013 (u", ".join(stopwords))) 1014 return True
1015
1016 - def musttranslatewords(self, str1, str2):
1017 """checks that words configured as definitely translatable don't appear 1018 in the translation""" 1019 if not self.config.musttranslatewords: 1020 return True 1021 str1 = self.removevariables(str1) 1022 str2 = self.removevariables(str2) 1023 # The above is full of strange quotes and things in utf-8 encoding. 1024 # single apostrophe perhaps problematic in words like "doesn't" 1025 for seperator in self.config.punctuation: 1026 str1 = str1.replace(seperator, u" ") 1027 str2 = str2.replace(seperator, u" ") 1028 words1 = self.filteraccelerators(str1).split() 1029 words2 = self.filteraccelerators(str2).split() 1030 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 1031 if stopwords: 1032 raise FilterFailure(u"Please translate: %s" % (u", ".join(stopwords))) 1033 return True
1034
1035 - def validchars(self, str1, str2):
1036 """checks that only characters specified as valid appear in the 1037 translation""" 1038 if not self.config.validcharsmap: 1039 return True 1040 invalid1 = str1.translate(self.config.validcharsmap) 1041 invalid2 = str2.translate(self.config.validcharsmap) 1042 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 1043 if invalidchars: 1044 raise FilterFailure(u"Invalid characters: %s" % (u", ".join(invalidchars))) 1045 return True
1046
1047 - def filepaths(self, str1, str2):
1048 """checks that file paths have not been translated""" 1049 for word1 in self.filteraccelerators(str1).split(): 1050 if word1.startswith(u"/"): 1051 if not helpers.countsmatch(str1, str2, (word1,)): 1052 raise FilterFailure(u"Different file paths") 1053 return True
1054
1055 - def xmltags(self, str1, str2):
1056 """checks that XML/HTML tags have not been translated""" 1057 tags1 = tag_re.findall(str1) 1058 if len(tags1) > 0: 1059 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 1060 return True 1061 tags2 = tag_re.findall(str2) 1062 properties1 = tagproperties(tags1, self.config.ignoretags) 1063 properties2 = tagproperties(tags2, self.config.ignoretags) 1064 filtered1 = [] 1065 filtered2 = [] 1066 for property1 in properties1: 1067 filtered1 += [intuplelist(property1, self.config.canchangetags)] 1068 for property2 in properties2: 1069 filtered2 += [intuplelist(property2, self.config.canchangetags)] 1070 1071 # TODO: consider the consequences of different ordering of 1072 # attributes/tags 1073 if filtered1 != filtered2: 1074 raise FilterFailure(u"Different XML tags") 1075 else: 1076 # No tags in str1, let's just check that none were added in str2. 1077 # This might be useful for fuzzy strings wrongly unfuzzied. 1078 tags2 = tag_re.findall(str2) 1079 if len(tags2) > 0: 1080 raise FilterFailure(u"Added XML tags") 1081 return True
1082
1083 - def kdecomments(self, str1, str2):
1084 """checks to ensure that no KDE style comments appear in the 1085 translation""" 1086 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
1087
1088 - def compendiumconflicts(self, str1, str2):
1089 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 1090 return str2.find(u"#-#-#-#-#") == -1
1091
1092 - def simpleplurals(self, str1, str2):
1093 """checks for English style plural(s) for you to review""" 1094 1095 def numberofpatterns(string, patterns): 1096 number = 0 1097 for pattern in patterns: 1098 number += len(re.findall(pattern, string)) 1099 return number
1100 1101 sourcepatterns = ["\(s\)"] 1102 targetpatterns = ["\(s\)"] 1103 sourcecount = numberofpatterns(str1, sourcepatterns) 1104 targetcount = numberofpatterns(str2, targetpatterns) 1105 if self.config.lang.nplurals == 1: 1106 if targetcount: 1107 raise FilterFailure(u"Plural(s) were kept in translation") 1108 else: 1109 return True 1110 if sourcecount == targetcount: 1111 return True 1112 else: 1113 raise FilterFailure(u"The original uses plural(s)")
1114
1115 - def spellcheck(self, str1, str2):
1116 """checks words that don't pass a spell check""" 1117 if not self.config.targetlanguage: 1118 return True 1119 if not spelling.available: 1120 return True 1121 # TODO: filterxml? 1122 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), 1123 self.config.sourcelang.validaccel) 1124 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), 1125 self.config.lang.validaccel) 1126 ignore1 = [] 1127 messages = [] 1128 for word, index, suggestions in spelling.check(str1, lang="en"): 1129 ignore1.append(word) 1130 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 1131 if word in self.config.notranslatewords: 1132 continue 1133 if word in ignore1: 1134 continue 1135 # hack to ignore hyphenisation rules 1136 if word in suggestions: 1137 continue 1138 messages.append(u"Check the spelling of %s (could be %s)" % 1139 (word, u" / ".join(suggestions[:5]))) 1140 if messages: 1141 raise FilterFailure(messages) 1142 return True
1143
1144 - def credits(self, str1, str2):
1145 """checks for messages containing translation credits instead of normal 1146 translations.""" 1147 if str1 in self.config.credit_sources: 1148 raise FilterFailure(u"Don't translate. Just credit the translators.") 1149 else: 1150 return True
1151 1152 # If the precondition filter is run and fails then the other tests listed are ignored 1153 preconditions = { 1154 "untranslated": ("simplecaps", "variables", "startcaps", 1155 "accelerators", "brackets", "endpunc", 1156 "acronyms", "xmltags", "startpunc", 1157 "endwhitespace", "startwhitespace", 1158 "escapes", "doublequoting", "singlequoting", 1159 "filepaths", "purepunc", "doublespacing", 1160 "sentencecount", "numbers", "isfuzzy", 1161 "isreview", "notranslatewords", "musttranslatewords", 1162 "emails", "simpleplurals", "urls", "printf", 1163 "tabs", "newlines", "functions", "options", 1164 "blank", "nplurals", "gconf"), 1165 "blank": ("simplecaps", "variables", "startcaps", 1166 "accelerators", "brackets", "endpunc", 1167 "acronyms", "xmltags", "startpunc", 1168 "endwhitespace", "startwhitespace", 1169 "escapes", "doublequoting", "singlequoting", 1170 "filepaths", "purepunc", "doublespacing", 1171 "sentencecount", "numbers", "isfuzzy", 1172 "isreview", "notranslatewords", "musttranslatewords", 1173 "emails", "simpleplurals", "urls", "printf", 1174 "tabs", "newlines", "functions", "options", 1175 "gconf"), 1176 "credits": ("simplecaps", "variables", "startcaps", 1177 "accelerators", "brackets", "endpunc", 1178 "acronyms", "xmltags", "startpunc", 1179 "escapes", "doublequoting", "singlequoting", 1180 "filepaths", "doublespacing", 1181 "sentencecount", "numbers", 1182 "emails", "simpleplurals", "urls", "printf", 1183 "tabs", "newlines", "functions", "options"), 1184 "purepunc": ("startcaps", "options"), 1185 # This is causing some problems since Python 2.6, as 1186 # startcaps is now seen as an important one to always execute 1187 # and could now be done before it is blocked by a failing 1188 # "untranslated" or "blank" test. This is probably happening 1189 # due to slightly different implementation of the internal 1190 # dict handling since Python 2.6. We should never have relied 1191 # on this ordering anyway. 1192 #"startcaps": ("simplecaps",), 1193 "endwhitespace": ("endpunc",), 1194 "startwhitespace": ("startpunc",), 1195 "unchanged": ("doublewords",), 1196 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1197 "numbers", "startpunc", "long", "variables", 1198 "startcaps", "sentencecount", "simplecaps", 1199 "doublespacing", "endpunc", "xmltags", 1200 "startwhitespace", "endwhitespace", 1201 "singlequoting", "doublequoting", 1202 "filepaths", "purepunc", "doublewords", "printf"), 1203 } 1204 1205 # code to actually run the tests (use unittest?) 1206 1207 openofficeconfig = CheckerConfig( 1208 accelmarkers=["~"], 1209 varmatches=[("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), 1210 ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), 1211 ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1212 ignoretags=[("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), 1213 ("img", "width", None), ("img", "height", None)], 1214 canchangetags=[("link", "name", None)], 1215 ) 1216 1217
1218 -class OpenOfficeChecker(StandardChecker):
1219
1220 - def __init__(self, **kwargs):
1221 checkerconfig = kwargs.get("checkerconfig", None) 1222 if checkerconfig is None: 1223 checkerconfig = CheckerConfig() 1224 kwargs["checkerconfig"] = checkerconfig 1225 checkerconfig.update(openofficeconfig) 1226 StandardChecker.__init__(self, **kwargs)
1227 1228 mozillaconfig = CheckerConfig( 1229 accelmarkers=["&"], 1230 varmatches=[("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), 1231 ("#", 1), ("${", "}"), ("$(^", ")")], 1232 criticaltests=["accelerators"], 1233 ) 1234 1235
1236 -class MozillaChecker(StandardChecker):
1237
1238 - def __init__(self, **kwargs):
1239 checkerconfig = kwargs.get("checkerconfig", None) 1240 if checkerconfig is None: 1241 checkerconfig = CheckerConfig() 1242 kwargs["checkerconfig"] = checkerconfig 1243 checkerconfig.update(mozillaconfig) 1244 StandardChecker.__init__(self, **kwargs)
1245
1246 - def credits(self, str1, str2):
1247 """checks for messages containing translation credits instead of normal 1248 translations.""" 1249 for location in self.locations: 1250 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1251 raise FilterFailure(u"Don't translate. Just credit the translators.") 1252 return True
1253 1254 drupalconfig = CheckerConfig( 1255 varmatches=[("%", None), ("@", None), ("!", None)], 1256 ) 1257 1258
1259 -class DrupalChecker(StandardChecker):
1260
1261 - def __init__(self, **kwargs):
1262 checkerconfig = kwargs.get("checkerconfig", None) 1263 if checkerconfig is None: 1264 checkerconfig = CheckerConfig() 1265 kwargs["checkerconfig"] = checkerconfig 1266 checkerconfig.update(drupalconfig) 1267 StandardChecker.__init__(self, **kwargs)
1268 1269 gnomeconfig = CheckerConfig( 1270 accelmarkers=["_"], 1271 varmatches=[("%", 1), ("$(", ")")], 1272 credit_sources=[u"translator-credits"], 1273 ) 1274 1275
1276 -class GnomeChecker(StandardChecker):
1277
1278 - def __init__(self, **kwargs):
1279 checkerconfig = kwargs.get("checkerconfig", None) 1280 if checkerconfig is None: 1281 checkerconfig = CheckerConfig() 1282 kwargs["checkerconfig"] = checkerconfig 1283 checkerconfig.update(gnomeconfig) 1284 StandardChecker.__init__(self, **kwargs)
1285
1286 - def gconf(self, str1, str2):
1287 """Checks if we have any gconf config settings translated.""" 1288 for location in self.locations: 1289 if location.find('schemas.in') != -1: 1290 gconf_attributes = gconf_attribute_re.findall(str1) 1291 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1292 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1293 if stopwords: 1294 raise FilterFailure(u"Do not translate gconf attributes: %s" % 1295 (u", ".join(stopwords))) 1296 return True
1297 1298 kdeconfig = CheckerConfig( 1299 accelmarkers=["&"], 1300 varmatches=[("%", 1)], 1301 credit_sources=[u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"], 1302 ) 1303 1304
1305 -class KdeChecker(StandardChecker):
1306
1307 - def __init__(self, **kwargs):
1308 # TODO allow setup of KDE plural and translator comments so that they do 1309 # not create false postives 1310 checkerconfig = kwargs.get("checkerconfig", None) 1311 if checkerconfig is None: 1312 checkerconfig = CheckerConfig() 1313 kwargs["checkerconfig"] = checkerconfig 1314 checkerconfig.update(kdeconfig) 1315 StandardChecker.__init__(self, **kwargs)
1316 1317 cclicenseconfig = CheckerConfig(varmatches=[("@", "@")]) 1318 1319
1320 -class CCLicenseChecker(StandardChecker):
1321
1322 - def __init__(self, **kwargs):
1323 checkerconfig = kwargs.get("checkerconfig", None) 1324 if checkerconfig is None: 1325 checkerconfig = CheckerConfig() 1326 kwargs["checkerconfig"] = checkerconfig 1327 checkerconfig.update(cclicenseconfig) 1328 StandardChecker.__init__(self, **kwargs)
1329 1330 projectcheckers = { 1331 "openoffice": OpenOfficeChecker, 1332 "mozilla": MozillaChecker, 1333 "kde": KdeChecker, 1334 "wx": KdeChecker, 1335 "gnome": GnomeChecker, 1336 "creativecommons": CCLicenseChecker, 1337 "drupal": DrupalChecker, 1338 } 1339 1340
1341 -class StandardUnitChecker(UnitChecker):
1342 """The standard checks for common checks on translation units.""" 1343
1344 - def isfuzzy(self, unit):
1345 """Check if the unit has been marked fuzzy.""" 1346 return not unit.isfuzzy()
1347
1348 - def isreview(self, unit):
1349 """Check if the unit has been marked review.""" 1350 return not unit.isreview()
1351
1352 - def nplurals(self, unit):
1353 """Checks for the correct number of noun forms for plural 1354 translations.""" 1355 if unit.hasplural(): 1356 # if we don't have a valid nplurals value, don't run the test 1357 nplurals = self.config.lang.nplurals 1358 if nplurals > 0: 1359 return len(filter(None, unit.target.strings)) == nplurals 1360 return True
1361
1362 - def hassuggestion(self, unit):
1363 """Checks if there is at least one suggested translation for this 1364 unit.""" 1365 self.suggestion_store = getattr(self, 'suggestion_store', None) 1366 suggestions = [] 1367 if self.suggestion_store: 1368 suggestions = self.suggestion_store.findunits(unit.source) 1369 elif getattr(unit, "getalttrans", None): 1370 # TODO: we probably want to filter them somehow 1371 suggestions = unit.getalttrans() 1372 return not bool(suggestions)
1373 1374
1375 -def runtests(str1, str2, ignorelist=()):
1376 """verifies that the tests pass for a pair of strings""" 1377 from translate.storage import base 1378 str1 = data.normalized_unicode(str1) 1379 str2 = data.normalized_unicode(str2) 1380 unit = base.TranslationUnit(str1) 1381 unit.target = str2 1382 checker = StandardChecker(excludefilters=ignorelist) 1383 failures = checker.run_filters(unit) 1384 for test in failures: 1385 print "failure: %s: %s\n %r\n %r" % \ 1386 (test, failures[test], str1, str2) 1387 return failures
1388 1389
1390 -def batchruntests(pairs):
1391 """runs test on a batch of string pairs""" 1392 passed, numpairs = 0, len(pairs) 1393 for str1, str2 in pairs: 1394 if runtests(str1, str2): 1395 passed += 1 1396 print 1397 print "total: %d/%d pairs passed" % (passed, numpairs)
1398 1399 1400 if __name__ == '__main__': 1401 testset = [(r"simple", r"somple"), 1402 (r"\this equals \that", r"does \this equal \that?"), 1403 (r"this \'equals\' that", r"this 'equals' that"), 1404 (r" start and end! they must match.", 1405 r"start and end! they must match."), 1406 (r"check for matching %variables marked like %this", 1407 r"%this %variable is marked"), 1408 (r"check for mismatching %variables marked like %this", 1409 r"%that %variable is marked"), 1410 (r"check for mismatching %variables% too", 1411 r"how many %variable% are marked"), 1412 (r"%% %%", r"%%"), 1413 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1414 (r"simple lowercase", r"it is all lowercase"), 1415 (r"simple lowercase", r"It Is All Lowercase"), 1416 (r"Simple First Letter Capitals", r"First Letters"), 1417 (r"SIMPLE CAPITALS", r"First Letters"), 1418 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1419 (r"forgot to translate", r" "), 1420 ] 1421 batchruntests(testset) 1422