Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2009  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.3" 
  62  __versionTime__ = "14 May 2010 22:21" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104      alphas = string.ascii_lowercase + string.ascii_uppercase 
 105  else: 
 106      _MAX_INT = sys.maxint 
 107      range = xrange 
 108      set = lambda s : dict( [(c,0) for c in s] ) 
 109      alphas = string.lowercase + string.uppercase 
 110       
111 - def _ustr(obj):
112 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 113 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 114 then < returns the unicode object | encodes it with the default encoding | ... >. 115 """ 116 if isinstance(obj,unicode): 117 return obj 118 119 try: 120 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 121 # it won't break any existing code. 122 return str(obj) 123 124 except UnicodeEncodeError: 125 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 126 # state that "The return value must be a string object". However, does a 127 # unicode object (being a subclass of basestring) count as a "string 128 # object"? 129 # If so, then return a unicode object: 130 return unicode(obj)
131 # Else encode it... but how? There are many choices... :) 132 # Replace unprintables with escape codes? 133 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 134 # Replace unprintables with question marks? 135 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 136 # ... 137 138
139 -def _xml_escape(data):
140 """Escape &, <, >, ", ', etc. in a string of data.""" 141 142 # ampersand must be replaced first 143 from_symbols = '&><"\'' 144 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 145 for from_,to_ in zip(from_symbols, to_symbols): 146 data = data.replace(from_, to_) 147 return data
148
149 -class _Constants(object):
150 pass
151 152 nums = string.digits 153 hexnums = nums + "ABCDEFabcdef" 154 alphanums = alphas + nums 155 _bslash = chr(92) 156 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 157
158 -class ParseBaseException(Exception):
159 """base exception class for all parsing runtime exceptions""" 160 # Performance tuning: we construct a *lot* of these, so keep this 161 # constructor as small and fast as possible
162 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
163 self.loc = loc 164 if msg is None: 165 self.msg = pstr 166 self.pstr = "" 167 else: 168 self.msg = msg 169 self.pstr = pstr 170 self.parserElement = elem
171
172 - def __getattr__( self, aname ):
173 """supported attributes by name are: 174 - lineno - returns the line number of the exception text 175 - col - returns the column number of the exception text 176 - line - returns the line containing the exception text 177 """ 178 if( aname == "lineno" ): 179 return lineno( self.loc, self.pstr ) 180 elif( aname in ("col", "column") ): 181 return col( self.loc, self.pstr ) 182 elif( aname == "line" ): 183 return line( self.loc, self.pstr ) 184 else: 185 raise AttributeError(aname)
186
187 - def __str__( self ):
188 return "%s (at char %d), (line:%d, col:%d)" % \ 189 ( self.msg, self.loc, self.lineno, self.column )
190 - def __repr__( self ):
191 return _ustr(self)
192 - def markInputline( self, markerString = ">!<" ):
193 """Extracts the exception line from the input string, and marks 194 the location of the exception with a special symbol. 195 """ 196 line_str = self.line 197 line_column = self.column - 1 198 if markerString: 199 line_str = "".join( [line_str[:line_column], 200 markerString, line_str[line_column:]]) 201 return line_str.strip()
202 - def __dir__(self):
203 return "loc msg pstr parserElement lineno col line " \ 204 "markInputLine __str__ __repr__".split()
205
206 -class ParseException(ParseBaseException):
207 """exception thrown when parse expressions don't match class; 208 supported attributes by name are: 209 - lineno - returns the line number of the exception text 210 - col - returns the column number of the exception text 211 - line - returns the line containing the exception text 212 """ 213 pass
214
215 -class ParseFatalException(ParseBaseException):
216 """user-throwable exception thrown when inconsistent parse content 217 is found; stops all parsing immediately""" 218 pass
219
220 -class ParseSyntaxException(ParseFatalException):
221 """just like ParseFatalException, but thrown internally when an 222 ErrorStop indicates that parsing is to stop immediately because 223 an unbacktrackable syntax error has been found"""
224 - def __init__(self, pe):
225 super(ParseSyntaxException, self).__init__( 226 pe.pstr, pe.loc, pe.msg, pe.parserElement)
227 228 #~ class ReparseException(ParseBaseException): 229 #~ """Experimental class - parse actions can raise this exception to cause 230 #~ pyparsing to reparse the input string: 231 #~ - with a modified input string, and/or 232 #~ - with a modified start location 233 #~ Set the values of the ReparseException in the constructor, and raise the 234 #~ exception in a parse action to cause pyparsing to use the new string/location. 235 #~ Setting the values as None causes no change to be made. 236 #~ """ 237 #~ def __init_( self, newstring, restartLoc ): 238 #~ self.newParseText = newstring 239 #~ self.reparseLoc = restartLoc 240
241 -class RecursiveGrammarException(Exception):
242 """exception thrown by validate() if the grammar could be improperly recursive"""
243 - def __init__( self, parseElementList ):
244 self.parseElementTrace = parseElementList
245
246 - def __str__( self ):
247 return "RecursiveGrammarException: %s" % self.parseElementTrace
248
249 -class _ParseResultsWithOffset(object):
250 - def __init__(self,p1,p2):
251 self.tup = (p1,p2)
252 - def __getitem__(self,i):
253 return self.tup[i]
254 - def __repr__(self):
255 return repr(self.tup)
256 - def setOffset(self,i):
257 self.tup = (self.tup[0],i)
258
259 -class ParseResults(object):
260 """Structured parse results, to provide multiple means of access to the parsed data: 261 - as a list (len(results)) 262 - by list index (results[0], results[1], etc.) 263 - by attribute (results.<resultsName>) 264 """ 265 #~ __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
266 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
267 if isinstance(toklist, cls): 268 return toklist 269 retobj = object.__new__(cls) 270 retobj.__doinit = True 271 return retobj
272 273 # Performance tuning: we construct a *lot* of these, so keep this 274 # constructor as small and fast as possible
275 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
276 if self.__doinit: 277 self.__doinit = False 278 self.__name = None 279 self.__parent = None 280 self.__accumNames = {} 281 if isinstance(toklist, list): 282 self.__toklist = toklist[:] 283 else: 284 self.__toklist = [toklist] 285 self.__tokdict = dict() 286 287 if name is not None and name: 288 if not modal: 289 self.__accumNames[name] = 0 290 if isinstance(name,int): 291 name = _ustr(name) # will always return a str, but use _ustr for consistency 292 self.__name = name 293 if not toklist in (None,'',[]): 294 if isinstance(toklist,basestring): 295 toklist = [ toklist ] 296 if asList: 297 if isinstance(toklist,ParseResults): 298 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 299 else: 300 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 301 self[name].__name = name 302 else: 303 try: 304 self[name] = toklist[0] 305 except (KeyError,TypeError,IndexError): 306 self[name] = toklist
307
308 - def __getitem__( self, i ):
309 if isinstance( i, (int,slice) ): 310 return self.__toklist[i] 311 else: 312 if i not in self.__accumNames: 313 return self.__tokdict[i][-1][0] 314 else: 315 return ParseResults([ v[0] for v in self.__tokdict[i] ])
316
317 - def __setitem__( self, k, v, isinstance=isinstance ):
318 if isinstance(v,_ParseResultsWithOffset): 319 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 320 sub = v[0] 321 elif isinstance(k,int): 322 self.__toklist[k] = v 323 sub = v 324 else: 325 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 326 sub = v 327 if isinstance(sub,ParseResults): 328 sub.__parent = wkref(self)
329
330 - def __delitem__( self, i ):
331 if isinstance(i,(int,slice)): 332 mylen = len( self.__toklist ) 333 del self.__toklist[i] 334 335 # convert int to slice 336 if isinstance(i, int): 337 if i < 0: 338 i += mylen 339 i = slice(i, i+1) 340 # get removed indices 341 removed = list(range(*i.indices(mylen))) 342 removed.reverse() 343 # fixup indices in token dictionary 344 for name in self.__tokdict: 345 occurrences = self.__tokdict[name] 346 for j in removed: 347 for k, (value, position) in enumerate(occurrences): 348 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 349 else: 350 del self.__tokdict[i]
351
352 - def __contains__( self, k ):
353 return k in self.__tokdict
354
355 - def __len__( self ): return len( self.__toklist )
356 - def __bool__(self): return len( self.__toklist ) > 0
357 __nonzero__ = __bool__
358 - def __iter__( self ): return iter( self.__toklist )
359 - def __reversed__( self ): return iter( self.__toklist[::-1] )
360 - def keys( self ):
361 """Returns all named result keys.""" 362 return self.__tokdict.keys()
363
364 - def pop( self, index=-1 ):
365 """Removes and returns item at specified index (default=last). 366 Will work with either numeric indices or dict-key indicies.""" 367 ret = self[index] 368 del self[index] 369 return ret
370
371 - def get(self, key, defaultValue=None):
372 """Returns named result matching the given key, or if there is no 373 such name, then returns the given defaultValue or None if no 374 defaultValue is specified.""" 375 if key in self: 376 return self[key] 377 else: 378 return defaultValue
379
380 - def insert( self, index, insStr ):
381 self.__toklist.insert(index, insStr) 382 # fixup indices in token dictionary 383 for name in self.__tokdict: 384 occurrences = self.__tokdict[name] 385 for k, (value, position) in enumerate(occurrences): 386 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
387
388 - def items( self ):
389 """Returns all named result keys and values as a list of tuples.""" 390 return [(k,self[k]) for k in self.__tokdict]
391
392 - def values( self ):
393 """Returns all named result values.""" 394 return [ v[-1][0] for v in self.__tokdict.values() ]
395
396 - def __getattr__( self, name ):
397 if True: #name not in self.__slots__: 398 if name in self.__tokdict: 399 if name not in self.__accumNames: 400 return self.__tokdict[name][-1][0] 401 else: 402 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 403 else: 404 return "" 405 return None
406
407 - def __add__( self, other ):
408 ret = self.copy() 409 ret += other 410 return ret
411
412 - def __iadd__( self, other ):
413 if other.__tokdict: 414 offset = len(self.__toklist) 415 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 416 otheritems = other.__tokdict.items() 417 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 418 for (k,vlist) in otheritems for v in vlist] 419 for k,v in otherdictitems: 420 self[k] = v 421 if isinstance(v[0],ParseResults): 422 v[0].__parent = wkref(self) 423 424 self.__toklist += other.__toklist 425 self.__accumNames.update( other.__accumNames ) 426 return self
427
428 - def __radd__(self, other):
429 if isinstance(other,int) and other == 0: 430 return self.copy()
431
432 - def __repr__( self ):
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
434
435 - def __str__( self ):
436 out = "[" 437 sep = "" 438 for i in self.__toklist: 439 if isinstance(i, ParseResults): 440 out += sep + _ustr(i) 441 else: 442 out += sep + repr(i) 443 sep = ", " 444 out += "]" 445 return out
446
447 - def _asStringList( self, sep='' ):
448 out = [] 449 for item in self.__toklist: 450 if out and sep: 451 out.append(sep) 452 if isinstance( item, ParseResults ): 453 out += item._asStringList() 454 else: 455 out.append( _ustr(item) ) 456 return out
457
458 - def asList( self ):
459 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 460 out = [] 461 for res in self.__toklist: 462 if isinstance(res,ParseResults): 463 out.append( res.asList() ) 464 else: 465 out.append( res ) 466 return out
467
468 - def asDict( self ):
469 """Returns the named parse results as dictionary.""" 470 return dict( self.items() )
471
472 - def copy( self ):
473 """Returns a new copy of a ParseResults object.""" 474 ret = ParseResults( self.__toklist ) 475 ret.__tokdict = self.__tokdict.copy() 476 ret.__parent = self.__parent 477 ret.__accumNames.update( self.__accumNames ) 478 ret.__name = self.__name 479 return ret
480
481 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 483 nl = "\n" 484 out = [] 485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 486 for v in vlist ] ) 487 nextLevelIndent = indent + " " 488 489 # collapse out indents if formatting is not desired 490 if not formatted: 491 indent = "" 492 nextLevelIndent = "" 493 nl = "" 494 495 selfTag = None 496 if doctag is not None: 497 selfTag = doctag 498 else: 499 if self.__name: 500 selfTag = self.__name 501 502 if not selfTag: 503 if namedItemsOnly: 504 return "" 505 else: 506 selfTag = "ITEM" 507 508 out += [ nl, indent, "<", selfTag, ">" ] 509 510 worklist = self.__toklist 511 for i,res in enumerate(worklist): 512 if isinstance(res,ParseResults): 513 if i in namedItems: 514 out += [ res.asXML(namedItems[i], 515 namedItemsOnly and doctag is None, 516 nextLevelIndent, 517 formatted)] 518 else: 519 out += [ res.asXML(None, 520 namedItemsOnly and doctag is None, 521 nextLevelIndent, 522 formatted)] 523 else: 524 # individual token, see if there is a name for it 525 resTag = None 526 if i in namedItems: 527 resTag = namedItems[i] 528 if not resTag: 529 if namedItemsOnly: 530 continue 531 else: 532 resTag = "ITEM" 533 xmlBodyText = _xml_escape(_ustr(res)) 534 out += [ nl, nextLevelIndent, "<", resTag, ">", 535 xmlBodyText, 536 "</", resTag, ">" ] 537 538 out += [ nl, indent, "</", selfTag, ">" ] 539 return "".join(out)
540
541 - def __lookup(self,sub):
542 for k,vlist in self.__tokdict.items(): 543 for v,loc in vlist: 544 if sub is v: 545 return k 546 return None
547
548 - def getName(self):
549 """Returns the results name for this token expression.""" 550 if self.__name: 551 return self.__name 552 elif self.__parent: 553 par = self.__parent() 554 if par: 555 return par.__lookup(self) 556 else: 557 return None 558 elif (len(self) == 1 and 559 len(self.__tokdict) == 1 and 560 self.__tokdict.values()[0][0][1] in (0,-1)): 561 return self.__tokdict.keys()[0] 562 else: 563 return None
564
565 - def dump(self,indent='',depth=0):
566 """Diagnostic method for listing out the contents of a ParseResults. 567 Accepts an optional indent argument so that this string can be embedded 568 in a nested display of other data.""" 569 out = [] 570 out.append( indent+_ustr(self.asList()) ) 571 keys = self.items() 572 keys.sort() 573 for k,v in keys: 574 if out: 575 out.append('\n') 576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 577 if isinstance(v,ParseResults): 578 if v.keys(): 579 out.append( v.dump(indent,depth+1) ) 580 else: 581 out.append(_ustr(v)) 582 else: 583 out.append(_ustr(v)) 584 return "".join(out)
585 586 # add support for pickle protocol
587 - def __getstate__(self):
588 return ( self.__toklist, 589 ( self.__tokdict.copy(), 590 self.__parent is not None and self.__parent() or None, 591 self.__accumNames, 592 self.__name ) )
593
594 - def __setstate__(self,state):
595 self.__toklist = state[0] 596 self.__tokdict, \ 597 par, \ 598 inAccumNames, \ 599 self.__name = state[1] 600 self.__accumNames = {} 601 self.__accumNames.update(inAccumNames) 602 if par is not None: 603 self.__parent = wkref(par) 604 else: 605 self.__parent = None
606
607 - def __dir__(self):
608 return dir(super(ParseResults,self)) + self.keys()
609
610 -def col (loc,strg):
611 """Returns current column within a string, counting newlines as line separators. 612 The first column is number 1. 613 614 Note: the default parsing behavior is to expand tabs in the input string 615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 616 on parsing strings containing <TAB>s, and suggested methods to maintain a 617 consistent view of the parsed string, the parse location, and line and column 618 positions within the parsed string. 619 """ 620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
621
622 -def lineno(loc,strg):
623 """Returns current line number within a string, counting newlines as line separators. 624 The first line is number 1. 625 626 Note: the default parsing behavior is to expand tabs in the input string 627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 628 on parsing strings containing <TAB>s, and suggested methods to maintain a 629 consistent view of the parsed string, the parse location, and line and column 630 positions within the parsed string. 631 """ 632 return strg.count("\n",0,loc) + 1
633
634 -def line( loc, strg ):
635 """Returns the line of text containing loc within a string, counting newlines as line separators. 636 """ 637 lastCR = strg.rfind("\n", 0, loc) 638 nextCR = strg.find("\n", loc) 639 if nextCR >= 0: 640 return strg[lastCR+1:nextCR] 641 else: 642 return strg[lastCR+1:]
643
644 -def _defaultStartDebugAction( instring, loc, expr ):
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
646
647 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
649
650 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
651 print ("Exception raised:" + _ustr(exc))
652
653 -def nullDebugAction(*args):
654 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 655 pass
656
657 -class ParserElement(object):
658 """Abstract base level parser element class.""" 659 DEFAULT_WHITE_CHARS = " \n\t\r" 660 verbose_stacktrace = False 661
662 - def setDefaultWhitespaceChars( chars ):
663 """Overrides the default whitespace chars 664 """ 665 ParserElement.DEFAULT_WHITE_CHARS = chars
666 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 667
668 - def __init__( self, savelist=False ):
669 self.parseAction = list() 670 self.failAction = None 671 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 672 self.strRepr = None 673 self.resultsName = None 674 self.saveAsList = savelist 675 self.skipWhitespace = True 676 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 677 self.copyDefaultWhiteChars = True 678 self.mayReturnEmpty = False # used when checking for left-recursion 679 self.keepTabs = False 680 self.ignoreExprs = list() 681 self.debug = False 682 self.streamlined = False 683 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 684 self.errmsg = "" 685 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 686 self.debugActions = ( None, None, None ) #custom debug actions 687 self.re = None 688 self.callPreparse = True # used to avoid redundant calls to preParse 689 self.callDuringTry = False
690
691 - def copy( self ):
692 """Make a copy of this ParserElement. Useful for defining different parse actions 693 for the same parsing pattern, using copies of the original parse element.""" 694 cpy = copy.copy( self ) 695 cpy.parseAction = self.parseAction[:] 696 cpy.ignoreExprs = self.ignoreExprs[:] 697 if self.copyDefaultWhiteChars: 698 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 699 return cpy
700
701 - def setName( self, name ):
702 """Define name for this expression, for use in debugging.""" 703 self.name = name 704 self.errmsg = "Expected " + self.name 705 if hasattr(self,"exception"): 706 self.exception.msg = self.errmsg 707 return self
708
709 - def setResultsName( self, name, listAllMatches=False ):
710 """Define name for referencing matching tokens as a nested attribute 711 of the returned parse results. 712 NOTE: this returns a *copy* of the original ParserElement object; 713 this is so that the client can define a basic element, such as an 714 integer, and reference it in multiple places with different names. 715 """ 716 newself = self.copy() 717 newself.resultsName = name 718 newself.modalResults = not listAllMatches 719 return newself
720
721 - def setBreak(self,breakFlag = True):
722 """Method to invoke the Python pdb debugger when this element is 723 about to be parsed. Set breakFlag to True to enable, False to 724 disable. 725 """ 726 if breakFlag: 727 _parseMethod = self._parse 728 def breaker(instring, loc, doActions=True, callPreParse=True): 729 import pdb 730 pdb.set_trace() 731 return _parseMethod( instring, loc, doActions, callPreParse )
732 breaker._originalParseMethod = _parseMethod 733 self._parse = breaker 734 else: 735 if hasattr(self._parse,"_originalParseMethod"): 736 self._parse = self._parse._originalParseMethod 737 return self
738
739 - def _normalizeParseActionArgs( f ):
740 """Internal method used to decorate parse actions that take fewer than 3 arguments, 741 so that all parse actions can be called as f(s,l,t).""" 742 STAR_ARGS = 4 743 744 try: 745 restore = None 746 if isinstance(f,type): 747 restore = f 748 f = f.__init__ 749 if not _PY3K: 750 codeObj = f.func_code 751 else: 752 codeObj = f.code 753 if codeObj.co_flags & STAR_ARGS: 754 return f 755 numargs = codeObj.co_argcount 756 if not _PY3K: 757 if hasattr(f,"im_self"): 758 numargs -= 1 759 else: 760 if hasattr(f,"__self__"): 761 numargs -= 1 762 if restore: 763 f = restore 764 except AttributeError: 765 try: 766 if not _PY3K: 767 call_im_func_code = f.__call__.im_func.func_code 768 else: 769 call_im_func_code = f.__code__ 770 771 # not a function, must be a callable object, get info from the 772 # im_func binding of its bound __call__ method 773 if call_im_func_code.co_flags & STAR_ARGS: 774 return f 775 numargs = call_im_func_code.co_argcount 776 if not _PY3K: 777 if hasattr(f.__call__,"im_self"): 778 numargs -= 1 779 else: 780 if hasattr(f.__call__,"__self__"): 781 numargs -= 0 782 except AttributeError: 783 if not _PY3K: 784 call_func_code = f.__call__.func_code 785 else: 786 call_func_code = f.__call__.__code__ 787 # not a bound method, get info directly from __call__ method 788 if call_func_code.co_flags & STAR_ARGS: 789 return f 790 numargs = call_func_code.co_argcount 791 if not _PY3K: 792 if hasattr(f.__call__,"im_self"): 793 numargs -= 1 794 else: 795 if hasattr(f.__call__,"__self__"): 796 numargs -= 1 797 798 799 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 800 if numargs == 3: 801 return f 802 else: 803 if numargs > 3: 804 def tmp(s,l,t): 805 return f(f.__call__.__self__, s,l,t)
806 if numargs == 2: 807 def tmp(s,l,t): 808 return f(l,t) 809 elif numargs == 1: 810 def tmp(s,l,t): 811 return f(t) 812 else: #~ numargs == 0: 813 def tmp(s,l,t): 814 return f() 815 try: 816 tmp.__name__ = f.__name__ 817 except (AttributeError,TypeError): 818 # no need for special handling if attribute doesnt exist 819 pass 820 try: 821 tmp.__doc__ = f.__doc__ 822 except (AttributeError,TypeError): 823 # no need for special handling if attribute doesnt exist 824 pass 825 try: 826 tmp.__dict__.update(f.__dict__) 827 except (AttributeError,TypeError): 828 # no need for special handling if attribute doesnt exist 829 pass 830 return tmp 831 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 832
833 - def setParseAction( self, *fns, **kwargs ):
834 """Define action to perform when successfully matching parse element definition. 835 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 836 fn(loc,toks), fn(toks), or just fn(), where: 837 - s = the original string being parsed (see note below) 838 - loc = the location of the matching substring 839 - toks = a list of the matched tokens, packaged as a ParseResults object 840 If the functions in fns modify the tokens, they can return them as the return 841 value from fn, and the modified list of tokens will replace the original. 842 Otherwise, fn does not need to return any value. 843 844 Note: the default parsing behavior is to expand tabs in the input string 845 before starting the parsing process. See L{I{parseString}<parseString>} for more information 846 on parsing strings containing <TAB>s, and suggested methods to maintain a 847 consistent view of the parsed string, the parse location, and line and column 848 positions within the parsed string. 849 """ 850 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 851 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 852 return self
853
854 - def addParseAction( self, *fns, **kwargs ):
855 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 856 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 857 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 858 return self
859
860 - def setFailAction( self, fn ):
861 """Define action to perform if parsing fails at this expression. 862 Fail acton fn is a callable function that takes the arguments 863 fn(s,loc,expr,err) where: 864 - s = string being parsed 865 - loc = location where expression match was attempted and failed 866 - expr = the parse expression that failed 867 - err = the exception thrown 868 The function returns no value. It may throw ParseFatalException 869 if it is desired to stop parsing immediately.""" 870 self.failAction = fn 871 return self
872
873 - def _skipIgnorables( self, instring, loc ):
874 exprsFound = True 875 while exprsFound: 876 exprsFound = False 877 for e in self.ignoreExprs: 878 try: 879 while 1: 880 loc,dummy = e._parse( instring, loc ) 881 exprsFound = True 882 except ParseException: 883 pass 884 return loc
885
886 - def preParse( self, instring, loc ):
887 if self.ignoreExprs: 888 loc = self._skipIgnorables( instring, loc ) 889 890 if self.skipWhitespace: 891 wt = self.whiteChars 892 instrlen = len(instring) 893 while loc < instrlen and instring[loc] in wt: 894 loc += 1 895 896 return loc
897
898 - def parseImpl( self, instring, loc, doActions=True ):
899 return loc, []
900
901 - def postParse( self, instring, loc, tokenlist ):
902 return tokenlist
903 904 #~ @profile
905 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
906 debugging = ( self.debug ) #and doActions ) 907 908 if debugging or self.failAction: 909 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 910 if (self.debugActions[0] ): 911 self.debugActions[0]( instring, loc, self ) 912 if callPreParse and self.callPreparse: 913 preloc = self.preParse( instring, loc ) 914 else: 915 preloc = loc 916 tokensStart = loc 917 try: 918 try: 919 loc,tokens = self.parseImpl( instring, preloc, doActions ) 920 except IndexError: 921 raise ParseException( instring, len(instring), self.errmsg, self ) 922 except ParseBaseException: 923 #~ print ("Exception raised:", err) 924 err = None 925 if self.debugActions[2]: 926 err = sys.exc_info()[1] 927 self.debugActions[2]( instring, tokensStart, self, err ) 928 if self.failAction: 929 if err is None: 930 err = sys.exc_info()[1] 931 self.failAction( instring, tokensStart, self, err ) 932 raise 933 else: 934 if callPreParse and self.callPreparse: 935 preloc = self.preParse( instring, loc ) 936 else: 937 preloc = loc 938 tokensStart = loc 939 if self.mayIndexError or loc >= len(instring): 940 try: 941 loc,tokens = self.parseImpl( instring, preloc, doActions ) 942 except IndexError: 943 raise ParseException( instring, len(instring), self.errmsg, self ) 944 else: 945 loc,tokens = self.parseImpl( instring, preloc, doActions ) 946 947 tokens = self.postParse( instring, loc, tokens ) 948 949 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 950 if self.parseAction and (doActions or self.callDuringTry): 951 if debugging: 952 try: 953 for fn in self.parseAction: 954 tokens = fn( instring, tokensStart, retTokens ) 955 if tokens is not None: 956 retTokens = ParseResults( tokens, 957 self.resultsName, 958 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 959 modal=self.modalResults ) 960 except ParseBaseException: 961 #~ print "Exception raised in user parse action:", err 962 if (self.debugActions[2] ): 963 err = sys.exc_info()[1] 964 self.debugActions[2]( instring, tokensStart, self, err ) 965 raise 966 else: 967 for fn in self.parseAction: 968 tokens = fn( instring, tokensStart, retTokens ) 969 if tokens is not None: 970 retTokens = ParseResults( tokens, 971 self.resultsName, 972 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 973 modal=self.modalResults ) 974 975 if debugging: 976 #~ print ("Matched",self,"->",retTokens.asList()) 977 if (self.debugActions[1] ): 978 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 979 980 return loc, retTokens
981
982 - def tryParse( self, instring, loc ):
983 try: 984 return self._parse( instring, loc, doActions=False )[0] 985 except ParseFatalException: 986 raise ParseException( instring, loc, self.errmsg, self)
987 988 # this method gets repeatedly called during backtracking with the same arguments - 989 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
990 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
991 lookup = (self,instring,loc,callPreParse,doActions) 992 if lookup in ParserElement._exprArgCache: 993 value = ParserElement._exprArgCache[ lookup ] 994 if isinstance(value,Exception): 995 raise value 996 return value 997 else: 998 try: 999 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1000 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1001 return value 1002 except ParseBaseException: 1003 pe = sys.exc_info()[1] 1004 ParserElement._exprArgCache[ lookup ] = pe 1005 raise
1006 1007 _parse = _parseNoCache 1008 1009 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1010 _exprArgCache = {}
1011 - def resetCache():
1012 ParserElement._exprArgCache.clear()
1013 resetCache = staticmethod(resetCache) 1014 1015 _packratEnabled = False
1016 - def enablePackrat():
1017 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1018 Repeated parse attempts at the same string location (which happens 1019 often in many complex grammars) can immediately return a cached value, 1020 instead of re-executing parsing/validating code. Memoizing is done of 1021 both valid results and parsing exceptions. 1022 1023 This speedup may break existing programs that use parse actions that 1024 have side-effects. For this reason, packrat parsing is disabled when 1025 you first import pyparsing. To activate the packrat feature, your 1026 program must call the class method ParserElement.enablePackrat(). If 1027 your program uses psyco to "compile as you go", you must call 1028 enablePackrat before calling psyco.full(). If you do not do this, 1029 Python will crash. For best results, call enablePackrat() immediately 1030 after importing pyparsing. 1031 """ 1032 if not ParserElement._packratEnabled: 1033 ParserElement._packratEnabled = True 1034 ParserElement._parse = ParserElement._parseCache
1035 enablePackrat = staticmethod(enablePackrat) 1036
1037 - def parseString( self, instring, parseAll=False ):
1038 """Execute the parse expression with the given string. 1039 This is the main interface to the client code, once the complete 1040 expression has been built. 1041 1042 If you want the grammar to require that the entire input string be 1043 successfully parsed, then set parseAll to True (equivalent to ending 1044 the grammar with StringEnd()). 1045 1046 Note: parseString implicitly calls expandtabs() on the input string, 1047 in order to report proper column numbers in parse actions. 1048 If the input string contains tabs and 1049 the grammar uses parse actions that use the loc argument to index into the 1050 string being parsed, you can ensure you have a consistent view of the input 1051 string by: 1052 - calling parseWithTabs on your grammar before calling parseString 1053 (see L{I{parseWithTabs}<parseWithTabs>}) 1054 - define your parse action using the full (s,loc,toks) signature, and 1055 reference the input string using the parse action's s argument 1056 - explictly expand the tabs in your input string before calling 1057 parseString 1058 """ 1059 ParserElement.resetCache() 1060 if not self.streamlined: 1061 self.streamline() 1062 #~ self.saveAsList = True 1063 for e in self.ignoreExprs: 1064 e.streamline() 1065 if not self.keepTabs: 1066 instring = instring.expandtabs() 1067 try: 1068 loc, tokens = self._parse( instring, 0 ) 1069 if parseAll: 1070 #loc = self.preParse( instring, loc ) 1071 se = StringEnd() 1072 se._parse( instring, loc ) 1073 except ParseBaseException: 1074 if ParserElement.verbose_stacktrace: 1075 raise 1076 else: 1077 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1078 exc = sys.exc_info()[1] 1079 raise exc 1080 else: 1081 return tokens
1082
1083 - def scanString( self, instring, maxMatches=_MAX_INT ):
1084 """Scan the input string for expression matches. Each match will return the 1085 matching tokens, start location, and end location. May be called with optional 1086 maxMatches argument, to clip scanning after 'n' matches are found. 1087 1088 Note that the start and end locations are reported relative to the string 1089 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1090 strings with embedded tabs.""" 1091 if not self.streamlined: 1092 self.streamline() 1093 for e in self.ignoreExprs: 1094 e.streamline() 1095 1096 if not self.keepTabs: 1097 instring = _ustr(instring).expandtabs() 1098 instrlen = len(instring) 1099 loc = 0 1100 preparseFn = self.preParse 1101 parseFn = self._parse 1102 ParserElement.resetCache() 1103 matches = 0 1104 try: 1105 while loc <= instrlen and matches < maxMatches: 1106 try: 1107 preloc = preparseFn( instring, loc ) 1108 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1109 except ParseException: 1110 loc = preloc+1 1111 else: 1112 if nextLoc > loc: 1113 matches += 1 1114 yield tokens, preloc, nextLoc 1115 loc = nextLoc 1116 else: 1117 loc = preloc+1 1118 except ParseBaseException: 1119 if ParserElement.verbose_stacktrace: 1120 raise 1121 else: 1122 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1123 exc = sys.exc_info()[1] 1124 raise exc
1125
1126 - def transformString( self, instring ):
1127 """Extension to scanString, to modify matching text with modified tokens that may 1128 be returned from a parse action. To use transformString, define a grammar and 1129 attach a parse action to it that modifies the returned token list. 1130 Invoking transformString() on a target string will then scan for matches, 1131 and replace the matched text patterns according to the logic in the parse 1132 action. transformString() returns the resulting transformed string.""" 1133 out = [] 1134 lastE = 0 1135 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1136 # keep string locs straight between transformString and scanString 1137 self.keepTabs = True 1138 try: 1139 for t,s,e in self.scanString( instring ): 1140 out.append( instring[lastE:s] ) 1141 if t: 1142 if isinstance(t,ParseResults): 1143 out += t.asList() 1144 elif isinstance(t,list): 1145 out += t 1146 else: 1147 out.append(t) 1148 lastE = e 1149 out.append(instring[lastE:]) 1150 return "".join(map(_ustr,out)) 1151 except ParseBaseException: 1152 if ParserElement.verbose_stacktrace: 1153 raise 1154 else: 1155 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1156 exc = sys.exc_info()[1] 1157 raise exc
1158
1159 - def searchString( self, instring, maxMatches=_MAX_INT ):
1160 """Another extension to scanString, simplifying the access to the tokens found 1161 to match the given parse expression. May be called with optional 1162 maxMatches argument, to clip searching after 'n' matches are found. 1163 """ 1164 try: 1165 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1166 except ParseBaseException: 1167 if ParserElement.verbose_stacktrace: 1168 raise 1169 else: 1170 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1171 exc = sys.exc_info()[1] 1172 raise exc
1173
1174 - def __add__(self, other ):
1175 """Implementation of + operator - returns And""" 1176 if isinstance( other, basestring ): 1177 other = Literal( other ) 1178 if not isinstance( other, ParserElement ): 1179 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1180 SyntaxWarning, stacklevel=2) 1181 return None 1182 return And( [ self, other ] )
1183
1184 - def __radd__(self, other ):
1185 """Implementation of + operator when left operand is not a ParserElement""" 1186 if isinstance( other, basestring ): 1187 other = Literal( other ) 1188 if not isinstance( other, ParserElement ): 1189 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1190 SyntaxWarning, stacklevel=2) 1191 return None 1192 return other + self
1193
1194 - def __sub__(self, other):
1195 """Implementation of - operator, returns And with error stop""" 1196 if isinstance( other, basestring ): 1197 other = Literal( other ) 1198 if not isinstance( other, ParserElement ): 1199 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1200 SyntaxWarning, stacklevel=2) 1201 return None 1202 return And( [ self, And._ErrorStop(), other ] )
1203
1204 - def __rsub__(self, other ):
1205 """Implementation of - operator when left operand is not a ParserElement""" 1206 if isinstance( other, basestring ): 1207 other = Literal( other ) 1208 if not isinstance( other, ParserElement ): 1209 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1210 SyntaxWarning, stacklevel=2) 1211 return None 1212 return other - self
1213
1214 - def __mul__(self,other):
1215 if isinstance(other,int): 1216 minElements, optElements = other,0 1217 elif isinstance(other,tuple): 1218 other = (other + (None, None))[:2] 1219 if other[0] is None: 1220 other = (0, other[1]) 1221 if isinstance(other[0],int) and other[1] is None: 1222 if other[0] == 0: 1223 return ZeroOrMore(self) 1224 if other[0] == 1: 1225 return OneOrMore(self) 1226 else: 1227 return self*other[0] + ZeroOrMore(self) 1228 elif isinstance(other[0],int) and isinstance(other[1],int): 1229 minElements, optElements = other 1230 optElements -= minElements 1231 else: 1232 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1233 else: 1234 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1235 1236 if minElements < 0: 1237 raise ValueError("cannot multiply ParserElement by negative value") 1238 if optElements < 0: 1239 raise ValueError("second tuple value must be greater or equal to first tuple value") 1240 if minElements == optElements == 0: 1241 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1242 1243 if (optElements): 1244 def makeOptionalList(n): 1245 if n>1: 1246 return Optional(self + makeOptionalList(n-1)) 1247 else: 1248 return Optional(self)
1249 if minElements: 1250 if minElements == 1: 1251 ret = self + makeOptionalList(optElements) 1252 else: 1253 ret = And([self]*minElements) + makeOptionalList(optElements) 1254 else: 1255 ret = makeOptionalList(optElements) 1256 else: 1257 if minElements == 1: 1258 ret = self 1259 else: 1260 ret = And([self]*minElements) 1261 return ret 1262
1263 - def __rmul__(self, other):
1264 return self.__mul__(other)
1265
1266 - def __or__(self, other ):
1267 """Implementation of | operator - returns MatchFirst""" 1268 if isinstance( other, basestring ): 1269 other = Literal( other ) 1270 if not isinstance( other, ParserElement ): 1271 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1272 SyntaxWarning, stacklevel=2) 1273 return None 1274 return MatchFirst( [ self, other ] )
1275
1276 - def __ror__(self, other ):
1277 """Implementation of | operator when left operand is not a ParserElement""" 1278 if isinstance( other, basestring ): 1279 other = Literal( other ) 1280 if not isinstance( other, ParserElement ): 1281 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1282 SyntaxWarning, stacklevel=2) 1283 return None 1284 return other | self
1285
1286 - def __xor__(self, other ):
1287 """Implementation of ^ operator - returns Or""" 1288 if isinstance( other, basestring ): 1289 other = Literal( other ) 1290 if not isinstance( other, ParserElement ): 1291 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1292 SyntaxWarning, stacklevel=2) 1293 return None 1294 return Or( [ self, other ] )
1295
1296 - def __rxor__(self, other ):
1297 """Implementation of ^ operator when left operand is not a ParserElement""" 1298 if isinstance( other, basestring ): 1299 other = Literal( other ) 1300 if not isinstance( other, ParserElement ): 1301 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1302 SyntaxWarning, stacklevel=2) 1303 return None 1304 return other ^ self
1305
1306 - def __and__(self, other ):
1307 """Implementation of & operator - returns Each""" 1308 if isinstance( other, basestring ): 1309 other = Literal( other ) 1310 if not isinstance( other, ParserElement ): 1311 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1312 SyntaxWarning, stacklevel=2) 1313 return None 1314 return Each( [ self, other ] )
1315
1316 - def __rand__(self, other ):
1317 """Implementation of & operator when left operand is not a ParserElement""" 1318 if isinstance( other, basestring ): 1319 other = Literal( other ) 1320 if not isinstance( other, ParserElement ): 1321 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1322 SyntaxWarning, stacklevel=2) 1323 return None 1324 return other & self
1325
1326 - def __invert__( self ):
1327 """Implementation of ~ operator - returns NotAny""" 1328 return NotAny( self )
1329
1330 - def __call__(self, name):
1331 """Shortcut for setResultsName, with listAllMatches=default:: 1332 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1333 could be written as:: 1334 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1335 """ 1336 return self.setResultsName(name)
1337
1338 - def suppress( self ):
1339 """Suppresses the output of this ParserElement; useful to keep punctuation from 1340 cluttering up returned output. 1341 """ 1342 return Suppress( self )
1343
1344 - def leaveWhitespace( self ):
1345 """Disables the skipping of whitespace before matching the characters in the 1346 ParserElement's defined pattern. This is normally only used internally by 1347 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1348 """ 1349 self.skipWhitespace = False 1350 return self
1351
1352 - def setWhitespaceChars( self, chars ):
1353 """Overrides the default whitespace chars 1354 """ 1355 self.skipWhitespace = True 1356 self.whiteChars = chars 1357 self.copyDefaultWhiteChars = False 1358 return self
1359
1360 - def parseWithTabs( self ):
1361 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1362 Must be called before parseString when the input grammar contains elements that 1363 match <TAB> characters.""" 1364 self.keepTabs = True 1365 return self
1366
1367 - def ignore( self, other ):
1368 """Define expression to be ignored (e.g., comments) while doing pattern 1369 matching; may be called repeatedly, to define multiple comment or other 1370 ignorable patterns. 1371 """ 1372 if isinstance( other, Suppress ): 1373 if other not in self.ignoreExprs: 1374 self.ignoreExprs.append( other.copy() ) 1375 else: 1376 self.ignoreExprs.append( Suppress( other.copy() ) ) 1377 return self
1378
1379 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1380 """Enable display of debugging messages while doing pattern matching.""" 1381 self.debugActions = (startAction or _defaultStartDebugAction, 1382 successAction or _defaultSuccessDebugAction, 1383 exceptionAction or _defaultExceptionDebugAction) 1384 self.debug = True 1385 return self
1386
1387 - def setDebug( self, flag=True ):
1388 """Enable display of debugging messages while doing pattern matching. 1389 Set flag to True to enable, False to disable.""" 1390 if flag: 1391 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1392 else: 1393 self.debug = False 1394 return self
1395
1396 - def __str__( self ):
1397 return self.name
1398
1399 - def __repr__( self ):
1400 return _ustr(self)
1401
1402 - def streamline( self ):
1403 self.streamlined = True 1404 self.strRepr = None 1405 return self
1406
1407 - def checkRecursion( self, parseElementList ):
1408 pass
1409
1410 - def validate( self, validateTrace=[] ):
1411 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1412 self.checkRecursion( [] )
1413
1414 - def parseFile( self, file_or_filename, parseAll=False ):
1415 """Execute the parse expression on the given file or filename. 1416 If a filename is specified (instead of a file object), 1417 the entire file is opened, read, and closed before parsing. 1418 """ 1419 try: 1420 file_contents = file_or_filename.read() 1421 except AttributeError: 1422 f = open(file_or_filename, "rb") 1423 file_contents = f.read() 1424 f.close() 1425 try: 1426 return self.parseString(file_contents, parseAll) 1427 except ParseBaseException: 1428 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1429 exc = sys.exc_info()[1] 1430 raise exc
1431
1432 - def getException(self):
1433 return ParseException("",0,self.errmsg,self)
1434
1435 - def __getattr__(self,aname):
1436 if aname == "myException": 1437 self.myException = ret = self.getException(); 1438 return ret; 1439 else: 1440 raise AttributeError("no such attribute " + aname)
1441
1442 - def __eq__(self,other):
1443 if isinstance(other, ParserElement): 1444 return self is other or self.__dict__ == other.__dict__ 1445 elif isinstance(other, basestring): 1446 try: 1447 self.parseString(_ustr(other), parseAll=True) 1448 return True 1449 except ParseBaseException: 1450 return False 1451 else: 1452 return super(ParserElement,self)==other
1453
1454 - def __ne__(self,other):
1455 return not (self == other)
1456
1457 - def __hash__(self):
1458 return hash(id(self))
1459
1460 - def __req__(self,other):
1461 return self == other
1462
1463 - def __rne__(self,other):
1464 return not (self == other)
1465 1466
1467 -class Token(ParserElement):
1468 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1469 - def __init__( self ):
1470 super(Token,self).__init__( savelist=False )
1471 #self.myException = ParseException("",0,"",self) 1472
1473 - def setName(self, name):
1474 s = super(Token,self).setName(name) 1475 self.errmsg = "Expected " + self.name 1476 #s.myException.msg = self.errmsg 1477 return s
1478 1479
1480 -class Empty(Token):
1481 """An empty token, will always match."""
1482 - def __init__( self ):
1483 super(Empty,self).__init__() 1484 self.name = "Empty" 1485 self.mayReturnEmpty = True 1486 self.mayIndexError = False
1487 1488
1489 -class NoMatch(Token):
1490 """A token that will never match."""
1491 - def __init__( self ):
1492 super(NoMatch,self).__init__() 1493 self.name = "NoMatch" 1494 self.mayReturnEmpty = True 1495 self.mayIndexError = False 1496 self.errmsg = "Unmatchable token"
1497 #self.myException.msg = self.errmsg 1498
1499 - def parseImpl( self, instring, loc, doActions=True ):
1500 exc = self.myException 1501 exc.loc = loc 1502 exc.pstr = instring 1503 raise exc
1504 1505
1506 -class Literal(Token):
1507 """Token to exactly match a specified string."""
1508 - def __init__( self, matchString ):
1509 super(Literal,self).__init__() 1510 self.match = matchString 1511 self.matchLen = len(matchString) 1512 try: 1513 self.firstMatchChar = matchString[0] 1514 except IndexError: 1515 warnings.warn("null string passed to Literal; use Empty() instead", 1516 SyntaxWarning, stacklevel=2) 1517 self.__class__ = Empty 1518 self.name = '"%s"' % _ustr(self.match) 1519 self.errmsg = "Expected " + self.name 1520 self.mayReturnEmpty = False 1521 #self.myException.msg = self.errmsg 1522 self.mayIndexError = False
1523 1524 # Performance tuning: this routine gets called a *lot* 1525 # if this is a single character match string and the first character matches, 1526 # short-circuit as quickly as possible, and avoid calling startswith 1527 #~ @profile
1528 - def parseImpl( self, instring, loc, doActions=True ):
1529 if (instring[loc] == self.firstMatchChar and 1530 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1531 return loc+self.matchLen, self.match 1532 #~ raise ParseException( instring, loc, self.errmsg ) 1533 exc = self.myException 1534 exc.loc = loc 1535 exc.pstr = instring 1536 raise exc
1537 _L = Literal 1538
1539 -class Keyword(Token):
1540 """Token to exactly match a specified string as a keyword, that is, it must be 1541 immediately followed by a non-keyword character. Compare with Literal:: 1542 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1543 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1544 Accepts two optional constructor arguments in addition to the keyword string: 1545 identChars is a string of characters that would be valid identifier characters, 1546 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1547 matching, default is False. 1548 """ 1549 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1550
1551 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1552 super(Keyword,self).__init__() 1553 self.match = matchString 1554 self.matchLen = len(matchString) 1555 try: 1556 self.firstMatchChar = matchString[0] 1557 except IndexError: 1558 warnings.warn("null string passed to Keyword; use Empty() instead", 1559 SyntaxWarning, stacklevel=2) 1560 self.name = '"%s"' % self.match 1561 self.errmsg = "Expected " + self.name 1562 self.mayReturnEmpty = False 1563 #self.myException.msg = self.errmsg 1564 self.mayIndexError = False 1565 self.caseless = caseless 1566 if caseless: 1567 self.caselessmatch = matchString.upper() 1568 identChars = identChars.upper() 1569 self.identChars = set(identChars)
1570
1571 - def parseImpl( self, instring, loc, doActions=True ):
1572 if self.caseless: 1573 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1574 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1575 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1576 return loc+self.matchLen, self.match 1577 else: 1578 if (instring[loc] == self.firstMatchChar and 1579 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1580 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1581 (loc == 0 or instring[loc-1] not in self.identChars) ): 1582 return loc+self.matchLen, self.match 1583 #~ raise ParseException( instring, loc, self.errmsg ) 1584 exc = self.myException 1585 exc.loc = loc 1586 exc.pstr = instring 1587 raise exc
1588
1589 - def copy(self):
1590 c = super(Keyword,self).copy() 1591 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1592 return c
1593
1594 - def setDefaultKeywordChars( chars ):
1595 """Overrides the default Keyword chars 1596 """ 1597 Keyword.DEFAULT_KEYWORD_CHARS = chars
1598 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1599
1600 -class CaselessLiteral(Literal):
1601 """Token to match a specified string, ignoring case of letters. 1602 Note: the matched results will always be in the case of the given 1603 match string, NOT the case of the input text. 1604 """
1605 - def __init__( self, matchString ):
1606 super(CaselessLiteral,self).__init__( matchString.upper() ) 1607 # Preserve the defining literal. 1608 self.returnString = matchString 1609 self.name = "'%s'" % self.returnString 1610 self.errmsg = "Expected " + self.name
1611 #self.myException.msg = self.errmsg 1612
1613 - def parseImpl( self, instring, loc, doActions=True ):
1614 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1615 return loc+self.matchLen, self.returnString 1616 #~ raise ParseException( instring, loc, self.errmsg ) 1617 exc = self.myException 1618 exc.loc = loc 1619 exc.pstr = instring 1620 raise exc
1621
1622 -class CaselessKeyword(Keyword):
1623 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1624 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1625
1626 - def parseImpl( self, instring, loc, doActions=True ):
1627 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1628 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1629 return loc+self.matchLen, self.match 1630 #~ raise ParseException( instring, loc, self.errmsg ) 1631 exc = self.myException 1632 exc.loc = loc 1633 exc.pstr = instring 1634 raise exc
1635
1636 -class Word(Token):
1637 """Token for matching words composed of allowed character sets. 1638 Defined with string containing all allowed initial characters, 1639 an optional string containing allowed body characters (if omitted, 1640 defaults to the initial character set), and an optional minimum, 1641 maximum, and/or exact length. The default value for min is 1 (a 1642 minimum value < 1 is not valid); the default values for max and exact 1643 are 0, meaning no maximum or exact length restriction. 1644 """
1645 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1646 super(Word,self).__init__() 1647 self.initCharsOrig = initChars 1648 self.initChars = set(initChars) 1649 if bodyChars : 1650 self.bodyCharsOrig = bodyChars 1651 self.bodyChars = set(bodyChars) 1652 else: 1653 self.bodyCharsOrig = initChars 1654 self.bodyChars = set(initChars) 1655 1656 self.maxSpecified = max > 0 1657 1658 if min < 1: 1659 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1660 1661 self.minLen = min 1662 1663 if max > 0: 1664 self.maxLen = max 1665 else: 1666 self.maxLen = _MAX_INT 1667 1668 if exact > 0: 1669 self.maxLen = exact 1670 self.minLen = exact 1671 1672 self.name = _ustr(self) 1673 self.errmsg = "Expected " + self.name 1674 #self.myException.msg = self.errmsg 1675 self.mayIndexError = False 1676 self.asKeyword = asKeyword 1677 1678 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1679 if self.bodyCharsOrig == self.initCharsOrig: 1680 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1681 elif len(self.bodyCharsOrig) == 1: 1682 self.reString = "%s[%s]*" % \ 1683 (re.escape(self.initCharsOrig), 1684 _escapeRegexRangeChars(self.bodyCharsOrig),) 1685 else: 1686 self.reString = "[%s][%s]*" % \ 1687 (_escapeRegexRangeChars(self.initCharsOrig), 1688 _escapeRegexRangeChars(self.bodyCharsOrig),) 1689 if self.asKeyword: 1690 self.reString = r"\b"+self.reString+r"\b" 1691 try: 1692 self.re = re.compile( self.reString ) 1693 except: 1694 self.re = None
1695
1696 - def parseImpl( self, instring, loc, doActions=True ):
1697 if self.re: 1698 result = self.re.match(instring,loc) 1699 if not result: 1700 exc = self.myException 1701 exc.loc = loc 1702 exc.pstr = instring 1703 raise exc 1704 1705 loc = result.end() 1706 return loc,result.group() 1707 1708 if not(instring[ loc ] in self.initChars): 1709 #~ raise ParseException( instring, loc, self.errmsg ) 1710 exc = self.myException 1711 exc.loc = loc 1712 exc.pstr = instring 1713 raise exc 1714 start = loc 1715 loc += 1 1716 instrlen = len(instring) 1717 bodychars = self.bodyChars 1718 maxloc = start + self.maxLen 1719 maxloc = min( maxloc, instrlen ) 1720 while loc < maxloc and instring[loc] in bodychars: 1721 loc += 1 1722 1723 throwException = False 1724 if loc - start < self.minLen: 1725 throwException = True 1726 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1727 throwException = True 1728 if self.asKeyword: 1729 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1730 throwException = True 1731 1732 if throwException: 1733 #~ raise ParseException( instring, loc, self.errmsg ) 1734 exc = self.myException 1735 exc.loc = loc 1736 exc.pstr = instring 1737 raise exc 1738 1739 return loc, instring[start:loc]
1740
1741 - def __str__( self ):
1742 try: 1743 return super(Word,self).__str__() 1744 except: 1745 pass 1746 1747 1748 if self.strRepr is None: 1749 1750 def charsAsStr(s): 1751 if len(s)>4: 1752 return s[:4]+"..." 1753 else: 1754 return s
1755 1756 if ( self.initCharsOrig != self.bodyCharsOrig ): 1757 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1758 else: 1759 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1760 1761 return self.strRepr
1762 1763
1764 -class Regex(Token):
1765 """Token for matching strings that match a given regular expression. 1766 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1767 """ 1768 compiledREtype = type(re.compile("[A-Z]"))
1769 - def __init__( self, pattern, flags=0):
1770 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1771 super(Regex,self).__init__() 1772 1773 if isinstance(pattern, basestring): 1774 if len(pattern) == 0: 1775 warnings.warn("null string passed to Regex; use Empty() instead", 1776 SyntaxWarning, stacklevel=2) 1777 1778 self.pattern = pattern 1779 self.flags = flags 1780 1781 try: 1782 self.re = re.compile(self.pattern, self.flags) 1783 self.reString = self.pattern 1784 except sre_constants.error: 1785 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1786 SyntaxWarning, stacklevel=2) 1787 raise 1788 1789 elif isinstance(pattern, Regex.compiledREtype): 1790 self.re = pattern 1791 self.pattern = \ 1792 self.reString = str(pattern) 1793 self.flags = flags 1794 1795 else: 1796 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1797 1798 self.name = _ustr(self) 1799 self.errmsg = "Expected " + self.name 1800 #self.myException.msg = self.errmsg 1801 self.mayIndexError = False 1802 self.mayReturnEmpty = not not (self.re.match(""))
1803
1804 - def parseImpl( self, instring, loc, doActions=True ):
1805 result = self.re.match(instring,loc) 1806 if not result: 1807 exc = self.myException 1808 exc.loc = loc 1809 exc.pstr = instring 1810 raise exc 1811 1812 loc = result.end() 1813 d = result.groupdict() 1814 ret = ParseResults(result.group()) 1815 if d: 1816 for k in d: 1817 ret[k] = d[k] 1818 return loc,ret
1819
1820 - def __str__( self ):
1821 try: 1822 return super(Regex,self).__str__() 1823 except: 1824 pass 1825 1826 if self.strRepr is None: 1827 self.strRepr = "Re:(%s)" % repr(self.pattern) 1828 1829 return self.strRepr
1830 1831
1832 -class QuotedString(Token):
1833 """Token for matching strings that are delimited by quoting characters. 1834 """
1835 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1836 """ 1837 Defined with the following parameters: 1838 - quoteChar - string of one or more characters defining the quote delimiting string 1839 - escChar - character to escape quotes, typically backslash (default=None) 1840 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1841 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1842 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1843 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1844 """ 1845 super(QuotedString,self).__init__() 1846 1847 # remove white space from quote chars - wont work anyway 1848 quoteChar = quoteChar.strip() 1849 if len(quoteChar) == 0: 1850 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1851 raise SyntaxError() 1852 1853 if endQuoteChar is None: 1854 endQuoteChar = quoteChar 1855 else: 1856 endQuoteChar = endQuoteChar.strip() 1857 if len(endQuoteChar) == 0: 1858 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1859 raise SyntaxError() 1860 1861 self.quoteChar = quoteChar 1862 self.quoteCharLen = len(quoteChar) 1863 self.firstQuoteChar = quoteChar[0] 1864 self.endQuoteChar = endQuoteChar 1865 self.endQuoteCharLen = len(endQuoteChar) 1866 self.escChar = escChar 1867 self.escQuote = escQuote 1868 self.unquoteResults = unquoteResults 1869 1870 if multiline: 1871 self.flags = re.MULTILINE | re.DOTALL 1872 self.pattern = r'%s(?:[^%s%s]' % \ 1873 ( re.escape(self.quoteChar), 1874 _escapeRegexRangeChars(self.endQuoteChar[0]), 1875 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1876 else: 1877 self.flags = 0 1878 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1879 ( re.escape(self.quoteChar), 1880 _escapeRegexRangeChars(self.endQuoteChar[0]), 1881 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1882 if len(self.endQuoteChar) > 1: 1883 self.pattern += ( 1884 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1885 _escapeRegexRangeChars(self.endQuoteChar[i])) 1886 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1887 ) 1888 if escQuote: 1889 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1890 if escChar: 1891 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1892 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1893 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1894 1895 try: 1896 self.re = re.compile(self.pattern, self.flags) 1897 self.reString = self.pattern 1898 except sre_constants.error: 1899 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1900 SyntaxWarning, stacklevel=2) 1901 raise 1902 1903 self.name = _ustr(self) 1904 self.errmsg = "Expected " + self.name 1905 #self.myException.msg = self.errmsg 1906 self.mayIndexError = False 1907 self.mayReturnEmpty = True
1908
1909 - def parseImpl( self, instring, loc, doActions=True ):
1910 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1911 if not result: 1912 exc = self.myException 1913 exc.loc = loc 1914 exc.pstr = instring 1915 raise exc 1916 1917 loc = result.end() 1918 ret = result.group() 1919 1920 if self.unquoteResults: 1921 1922 # strip off quotes 1923 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1924 1925 if isinstance(ret,basestring): 1926 # replace escaped characters 1927 if self.escChar: 1928 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1929 1930 # replace escaped quotes 1931 if self.escQuote: 1932 ret = ret.replace(self.escQuote, self.endQuoteChar) 1933 1934 return loc, ret
1935
1936 - def __str__( self ):
1937 try: 1938 return super(QuotedString,self).__str__() 1939 except: 1940 pass 1941 1942 if self.strRepr is None: 1943 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1944 1945 return self.strRepr
1946 1947
1948 -class CharsNotIn(Token):
1949 """Token for matching words composed of characters *not* in a given set. 1950 Defined with string containing all disallowed characters, and an optional 1951 minimum, maximum, and/or exact length. The default value for min is 1 (a 1952 minimum value < 1 is not valid); the default values for max and exact 1953 are 0, meaning no maximum or exact length restriction. 1954 """
1955 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1956 super(CharsNotIn,self).__init__() 1957 self.skipWhitespace = False 1958 self.notChars = notChars 1959 1960 if min < 1: 1961 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1962 1963 self.minLen = min 1964 1965 if max > 0: 1966 self.maxLen = max 1967 else: 1968 self.maxLen = _MAX_INT 1969 1970 if exact > 0: 1971 self.maxLen = exact 1972 self.minLen = exact 1973 1974 self.name = _ustr(self) 1975 self.errmsg = "Expected " + self.name 1976 self.mayReturnEmpty = ( self.minLen == 0 ) 1977 #self.myException.msg = self.errmsg 1978 self.mayIndexError = False
1979
1980 - def parseImpl( self, instring, loc, doActions=True ):
1981 if instring[loc] in self.notChars: 1982 #~ raise ParseException( instring, loc, self.errmsg ) 1983 exc = self.myException 1984 exc.loc = loc 1985 exc.pstr = instring 1986 raise exc 1987 1988 start = loc 1989 loc += 1 1990 notchars = self.notChars 1991 maxlen = min( start+self.maxLen, len(instring) ) 1992 while loc < maxlen and \ 1993 (instring[loc] not in notchars): 1994 loc += 1 1995 1996 if loc - start < self.minLen: 1997 #~ raise ParseException( instring, loc, self.errmsg ) 1998 exc = self.myException 1999 exc.loc = loc 2000 exc.pstr = instring 2001 raise exc 2002 2003 return loc, instring[start:loc]
2004
2005 - def __str__( self ):
2006 try: 2007 return super(CharsNotIn, self).__str__() 2008 except: 2009 pass 2010 2011 if self.strRepr is None: 2012 if len(self.notChars) > 4: 2013 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2014 else: 2015 self.strRepr = "!W:(%s)" % self.notChars 2016 2017 return self.strRepr
2018
2019 -class White(Token):
2020 """Special matching class for matching whitespace. Normally, whitespace is ignored 2021 by pyparsing grammars. This class is included when some whitespace structures 2022 are significant. Define with a string containing the whitespace characters to be 2023 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, 2024 as defined for the Word class.""" 2025 whiteStrs = { 2026 " " : "<SPC>", 2027 "\t": "<TAB>", 2028 "\n": "<LF>", 2029 "\r": "<CR>", 2030 "\f": "<FF>", 2031 }
2032 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2033 super(White,self).__init__() 2034 self.matchWhite = ws 2035 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2036 #~ self.leaveWhitespace() 2037 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2038 self.mayReturnEmpty = True 2039 self.errmsg = "Expected " + self.name 2040 #self.myException.msg = self.errmsg 2041 2042 self.minLen = min 2043 2044 if max > 0: 2045 self.maxLen = max 2046 else: 2047 self.maxLen = _MAX_INT 2048 2049 if exact > 0: 2050 self.maxLen = exact 2051 self.minLen = exact
2052
2053 - def parseImpl( self, instring, loc, doActions=True ):
2054 if not(instring[ loc ] in self.matchWhite): 2055 #~ raise ParseException( instring, loc, self.errmsg ) 2056 exc = self.myException 2057 exc.loc = loc 2058 exc.pstr = instring 2059 raise exc 2060 start = loc 2061 loc += 1 2062 maxloc = start + self.maxLen 2063 maxloc = min( maxloc, len(instring) ) 2064 while loc < maxloc and instring[loc] in self.matchWhite: 2065 loc += 1 2066 2067 if loc - start < self.minLen: 2068 #~ raise ParseException( instring, loc, self.errmsg ) 2069 exc = self.myException 2070 exc.loc = loc 2071 exc.pstr = instring 2072 raise exc 2073 2074 return loc, instring[start:loc]
2075 2076
2077 -class _PositionToken(Token):
2078 - def __init__( self ):
2079 super(_PositionToken,self).__init__() 2080 self.name=self.__class__.__name__ 2081 self.mayReturnEmpty = True 2082 self.mayIndexError = False
2083
2084 -class GoToColumn(_PositionToken):
2085 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2086 - def __init__( self, colno ):
2087 super(GoToColumn,self).__init__() 2088 self.col = colno
2089
2090 - def preParse( self, instring, loc ):
2091 if col(loc,instring) != self.col: 2092 instrlen = len(instring) 2093 if self.ignoreExprs: 2094 loc = self._skipIgnorables( instring, loc ) 2095 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2096 loc += 1 2097 return loc
2098
2099 - def parseImpl( self, instring, loc, doActions=True ):
2100 thiscol = col( loc, instring ) 2101 if thiscol > self.col: 2102 raise ParseException( instring, loc, "Text not in expected column", self ) 2103 newloc = loc + self.col - thiscol 2104 ret = instring[ loc: newloc ] 2105 return newloc, ret
2106
2107 -class LineStart(_PositionToken):
2108 """Matches if current position is at the beginning of a line within the parse string"""
2109 - def __init__( self ):
2110 super(LineStart,self).__init__() 2111 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2112 self.errmsg = "Expected start of line"
2113 #self.myException.msg = self.errmsg 2114
2115 - def preParse( self, instring, loc ):
2116 preloc = super(LineStart,self).preParse(instring,loc) 2117 if instring[preloc] == "\n": 2118 loc += 1 2119 return loc
2120
2121 - def parseImpl( self, instring, loc, doActions=True ):
2122 if not( loc==0 or 2123 (loc == self.preParse( instring, 0 )) or 2124 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2125 #~ raise ParseException( instring, loc, "Expected start of line" ) 2126 exc = self.myException 2127 exc.loc = loc 2128 exc.pstr = instring 2129 raise exc 2130 return loc, []
2131
2132 -class LineEnd(_PositionToken):
2133 """Matches if current position is at the end of a line within the parse string"""
2134 - def __init__( self ):
2135 super(LineEnd,self).__init__() 2136 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2137 self.errmsg = "Expected end of line"
2138 #self.myException.msg = self.errmsg 2139
2140 - def parseImpl( self, instring, loc, doActions=True ):
2141 if loc<len(instring): 2142 if instring[loc] == "\n": 2143 return loc+1, "\n" 2144 else: 2145 #~ raise ParseException( instring, loc, "Expected end of line" ) 2146 exc = self.myException 2147 exc.loc = loc 2148 exc.pstr = instring 2149 raise exc 2150 elif loc == len(instring): 2151 return loc+1, [] 2152 else: 2153 exc = self.myException 2154 exc.loc = loc 2155 exc.pstr = instring 2156 raise exc
2157
2158 -class StringStart(_PositionToken):
2159 """Matches if current position is at the beginning of the parse string"""
2160 - def __init__( self ):
2161 super(StringStart,self).__init__() 2162 self.errmsg = "Expected start of text"
2163 #self.myException.msg = self.errmsg 2164
2165 - def parseImpl( self, instring, loc, doActions=True ):
2166 if loc != 0: 2167 # see if entire string up to here is just whitespace and ignoreables 2168 if loc != self.preParse( instring, 0 ): 2169 #~ raise ParseException( instring, loc, "Expected start of text" ) 2170 exc = self.myException 2171 exc.loc = loc 2172 exc.pstr = instring 2173 raise exc 2174 return loc, []
2175
2176 -class StringEnd(_PositionToken):
2177 """Matches if current position is at the end of the parse string"""
2178 - def __init__( self ):
2179 super(StringEnd,self).__init__() 2180 self.errmsg = "Expected end of text"
2181 #self.myException.msg = self.errmsg 2182
2183 - def parseImpl( self, instring, loc, doActions=True ):
2184 if loc < len(instring): 2185 #~ raise ParseException( instring, loc, "Expected end of text" ) 2186 exc = self.myException 2187 exc.loc = loc 2188 exc.pstr = instring 2189 raise exc 2190 elif loc == len(instring): 2191 return loc+1, [] 2192 elif loc > len(instring): 2193 return loc, [] 2194 else: 2195 exc = self.myException 2196 exc.loc = loc 2197 exc.pstr = instring 2198 raise exc
2199
2200 -class WordStart(_PositionToken):
2201 """Matches if the current position is at the beginning of a Word, and 2202 is not preceded by any character in a given set of wordChars 2203 (default=printables). To emulate the \b behavior of regular expressions, 2204 use WordStart(alphanums). WordStart will also match at the beginning of 2205 the string being parsed, or at the beginning of a line. 2206 """
2207 - def __init__(self, wordChars = printables):
2208 super(WordStart,self).__init__() 2209 self.wordChars = set(wordChars) 2210 self.errmsg = "Not at the start of a word"
2211
2212 - def parseImpl(self, instring, loc, doActions=True ):
2213 if loc != 0: 2214 if (instring[loc-1] in self.wordChars or 2215 instring[loc] not in self.wordChars): 2216 exc = self.myException 2217 exc.loc = loc 2218 exc.pstr = instring 2219 raise exc 2220 return loc, []
2221
2222 -class WordEnd(_PositionToken):
2223 """Matches if the current position is at the end of a Word, and 2224 is not followed by any character in a given set of wordChars 2225 (default=printables). To emulate the \b behavior of regular expressions, 2226 use WordEnd(alphanums). WordEnd will also match at the end of 2227 the string being parsed, or at the end of a line. 2228 """
2229 - def __init__(self, wordChars = printables):
2230 super(WordEnd,self).__init__() 2231 self.wordChars = set(wordChars) 2232 self.skipWhitespace = False 2233 self.errmsg = "Not at the end of a word"
2234
2235 - def parseImpl(self, instring, loc, doActions=True ):
2236 instrlen = len(instring) 2237 if instrlen>0 and loc<instrlen: 2238 if (instring[loc] in self.wordChars or 2239 instring[loc-1] not in self.wordChars): 2240 #~ raise ParseException( instring, loc, "Expected end of word" ) 2241 exc = self.myException 2242 exc.loc = loc 2243 exc.pstr = instring 2244 raise exc 2245 return loc, []
2246 2247
2248 -class ParseExpression(ParserElement):
2249 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2250 - def __init__( self, exprs, savelist = False ):
2251 super(ParseExpression,self).__init__(savelist) 2252 if isinstance( exprs, list ): 2253 self.exprs = exprs 2254 elif isinstance( exprs, basestring ): 2255 self.exprs = [ Literal( exprs ) ] 2256 else: 2257 try: 2258 self.exprs = list( exprs ) 2259 except TypeError: 2260 self.exprs = [ exprs ] 2261 self.callPreparse = False
2262
2263 - def __getitem__( self, i ):
2264 return self.exprs[i]
2265
2266 - def append( self, other ):
2267 self.exprs.append( other ) 2268 self.strRepr = None 2269 return self
2270
2271 - def leaveWhitespace( self ):
2272 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2273 all contained expressions.""" 2274 self.skipWhitespace = False 2275 self.exprs = [ e.copy() for e in self.exprs ] 2276 for e in self.exprs: 2277 e.leaveWhitespace() 2278 return self
2279
2280 - def ignore( self, other ):
2281 if isinstance( other, Suppress ): 2282 if other not in self.ignoreExprs: 2283 super( ParseExpression, self).ignore( other ) 2284 for e in self.exprs: 2285 e.ignore( self.ignoreExprs[-1] ) 2286 else: 2287 super( ParseExpression, self).ignore( other ) 2288 for e in self.exprs: 2289 e.ignore( self.ignoreExprs[-1] ) 2290 return self
2291
2292 - def __str__( self ):
2293 try: 2294 return super(ParseExpression,self).__str__() 2295 except: 2296 pass 2297 2298 if self.strRepr is None: 2299 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2300 return self.strRepr
2301
2302 - def streamline( self ):
2303 super(ParseExpression,self).streamline() 2304 2305 for e in self.exprs: 2306 e.streamline() 2307 2308 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2309 # but only if there are no parse actions or resultsNames on the nested And's 2310 # (likewise for Or's and MatchFirst's) 2311 if ( len(self.exprs) == 2 ): 2312 other = self.exprs[0] 2313 if ( isinstance( other, self.__class__ ) and 2314 not(other.parseAction) and 2315 other.resultsName is None and 2316 not other.debug ): 2317 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2318 self.strRepr = None 2319 self.mayReturnEmpty |= other.mayReturnEmpty 2320 self.mayIndexError |= other.mayIndexError 2321 2322 other = self.exprs[-1] 2323 if ( isinstance( other, self.__class__ ) and 2324 not(other.parseAction) and 2325 other.resultsName is None and 2326 not other.debug ): 2327 self.exprs = self.exprs[:-1] + other.exprs[:] 2328 self.strRepr = None 2329 self.mayReturnEmpty |= other.mayReturnEmpty 2330 self.mayIndexError |= other.mayIndexError 2331 2332 return self
2333
2334 - def setResultsName( self, name, listAllMatches=False ):
2335 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2336 return ret
2337
2338 - def validate( self, validateTrace=[] ):
2339 tmp = validateTrace[:]+[self] 2340 for e in self.exprs: 2341 e.validate(tmp) 2342 self.checkRecursion( [] )
2343
2344 -class And(ParseExpression):
2345 """Requires all given ParseExpressions to be found in the given order. 2346 Expressions may be separated by whitespace. 2347 May be constructed using the '+' operator. 2348 """ 2349
2350 - class _ErrorStop(Empty):
2351 - def __init__(self, *args, **kwargs):
2352 super(Empty,self).__init__(*args, **kwargs) 2353 self.leaveWhitespace()
2354
2355 - def __init__( self, exprs, savelist = True ):
2356 super(And,self).__init__(exprs, savelist) 2357 self.mayReturnEmpty = True 2358 for e in self.exprs: 2359 if not e.mayReturnEmpty: 2360 self.mayReturnEmpty = False 2361 break 2362 self.setWhitespaceChars( exprs[0].whiteChars ) 2363 self.skipWhitespace = exprs[0].skipWhitespace 2364 self.callPreparse = True
2365
2366 - def parseImpl( self, instring, loc, doActions=True ):
2367 # pass False as last arg to _parse for first element, since we already 2368 # pre-parsed the string as part of our And pre-parsing 2369 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2370 errorStop = False 2371 for e in self.exprs[1:]: 2372 if isinstance(e, And._ErrorStop): 2373 errorStop = True 2374 continue 2375 if errorStop: 2376 try: 2377 loc, exprtokens = e._parse( instring, loc, doActions ) 2378 except ParseSyntaxException: 2379 raise 2380 except ParseBaseException: 2381 pe = sys.exc_info()[1] 2382 raise ParseSyntaxException(pe) 2383 except IndexError: 2384 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2385 else: 2386 loc, exprtokens = e._parse( instring, loc, doActions ) 2387 if exprtokens or exprtokens.keys(): 2388 resultlist += exprtokens 2389 return loc, resultlist
2390
2391 - def __iadd__(self, other ):
2392 if isinstance( other, basestring ): 2393 other = Literal( other ) 2394 return self.append( other ) #And( [ self, other ] )
2395
2396 - def checkRecursion( self, parseElementList ):
2397 subRecCheckList = parseElementList[:] + [ self ] 2398 for e in self.exprs: 2399 e.checkRecursion( subRecCheckList ) 2400 if not e.mayReturnEmpty: 2401 break
2402
2403 - def __str__( self ):
2404 if hasattr(self,"name"): 2405 return self.name 2406 2407 if self.strRepr is None: 2408 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2409 2410 return self.strRepr
2411 2412
2413 -class Or(ParseExpression):
2414 """Requires that at least one ParseExpression is found. 2415 If two expressions match, the expression that matches the longest string will be used. 2416 May be constructed using the '^' operator. 2417 """
2418 - def __init__( self, exprs, savelist = False ):
2419 super(Or,self).__init__(exprs, savelist) 2420 self.mayReturnEmpty = False 2421 for e in self.exprs: 2422 if e.mayReturnEmpty: 2423 self.mayReturnEmpty = True 2424 break
2425
2426 - def parseImpl( self, instring, loc, doActions=True ):
2427 maxExcLoc = -1 2428 maxMatchLoc = -1 2429 maxException = None 2430 for e in self.exprs: 2431 try: 2432 loc2 = e.tryParse( instring, loc ) 2433 except ParseException: 2434 err = sys.exc_info()[1] 2435 if err.loc > maxExcLoc: 2436 maxException = err 2437 maxExcLoc = err.loc 2438 except IndexError: 2439 if len(instring) > maxExcLoc: 2440 maxException = ParseException(instring,len(instring),e.errmsg,self) 2441 maxExcLoc = len(instring) 2442 else: 2443 if loc2 > maxMatchLoc: 2444 maxMatchLoc = loc2 2445 maxMatchExp = e 2446 2447 if maxMatchLoc < 0: 2448 if maxException is not None: 2449 raise maxException 2450 else: 2451 raise ParseException(instring, loc, "no defined alternatives to match", self) 2452 2453 return maxMatchExp._parse( instring, loc, doActions )
2454
2455 - def __ixor__(self, other ):
2456 if isinstance( other, basestring ): 2457 other = Literal( other ) 2458 return self.append( other ) #Or( [ self, other ] )
2459
2460 - def __str__( self ):
2461 if hasattr(self,"name"): 2462 return self.name 2463 2464 if self.strRepr is None: 2465 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2466 2467 return self.strRepr
2468
2469 - def checkRecursion( self, parseElementList ):
2470 subRecCheckList = parseElementList[:] + [ self ] 2471 for e in self.exprs: 2472 e.checkRecursion( subRecCheckList )
2473 2474
2475 -class MatchFirst(ParseExpression):
2476 """Requires that at least one ParseExpression is found. 2477 If two expressions match, the first one listed is the one that will match. 2478 May be constructed using the '|' operator. 2479 """
2480 - def __init__( self, exprs, savelist = False ):
2481 super(MatchFirst,self).__init__(exprs, savelist) 2482 if exprs: 2483 self.mayReturnEmpty = False 2484 for e in self.exprs: 2485 if e.mayReturnEmpty: 2486 self.mayReturnEmpty = True 2487 break 2488 else: 2489 self.mayReturnEmpty = True
2490
2491 - def parseImpl( self, instring, loc, doActions=True ):
2492 maxExcLoc = -1 2493 maxException = None 2494 for e in self.exprs: 2495 try: 2496 ret = e._parse( instring, loc, doActions ) 2497 return ret 2498 except ParseException, err: 2499 if err.loc > maxExcLoc: 2500 maxException = err 2501 maxExcLoc = err.loc 2502 except IndexError: 2503 if len(instring) > maxExcLoc: 2504 maxException = ParseException(instring,len(instring),e.errmsg,self) 2505 maxExcLoc = len(instring) 2506 2507 # only got here if no expression matched, raise exception for match that made it the furthest 2508 else: 2509 if maxException is not None: 2510 raise maxException 2511 else: 2512 raise ParseException(instring, loc, "no defined alternatives to match", self)
2513
2514 - def __ior__(self, other ):
2515 if isinstance( other, basestring ): 2516 other = Literal( other ) 2517 return self.append( other ) #MatchFirst( [ self, other ] )
2518
2519 - def __str__( self ):
2520 if hasattr(self,"name"): 2521 return self.name 2522 2523 if self.strRepr is None: 2524 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2525 2526 return self.strRepr
2527
2528 - def checkRecursion( self, parseElementList ):
2529 subRecCheckList = parseElementList[:] + [ self ] 2530 for e in self.exprs: 2531 e.checkRecursion( subRecCheckList )
2532 2533
2534 -class Each(ParseExpression):
2535 """Requires all given ParseExpressions to be found, but in any order. 2536 Expressions may be separated by whitespace. 2537 May be constructed using the '&' operator. 2538 """
2539 - def __init__( self, exprs, savelist = True ):
2540 super(Each,self).__init__(exprs, savelist) 2541 self.mayReturnEmpty = True 2542 for e in self.exprs: 2543 if not e.mayReturnEmpty: 2544 self.mayReturnEmpty = False 2545 break 2546 self.skipWhitespace = True 2547 self.initExprGroups = True
2548
2549 - def parseImpl( self, instring, loc, doActions=True ):
2550 if self.initExprGroups: 2551 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2552 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2553 self.optionals = opt1 + opt2 2554 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2555 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2556 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2557 self.required += self.multirequired 2558 self.initExprGroups = False 2559 tmpLoc = loc 2560 tmpReqd = self.required[:] 2561 tmpOpt = self.optionals[:] 2562 matchOrder = [] 2563 2564 keepMatching = True 2565 while keepMatching: 2566 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2567 failed = [] 2568 for e in tmpExprs: 2569 try: 2570 tmpLoc = e.tryParse( instring, tmpLoc ) 2571 except ParseException: 2572 failed.append(e) 2573 else: 2574 matchOrder.append(e) 2575 if e in tmpReqd: 2576 tmpReqd.remove(e) 2577 elif e in tmpOpt: 2578 tmpOpt.remove(e) 2579 if len(failed) == len(tmpExprs): 2580 keepMatching = False 2581 2582 if tmpReqd: 2583 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2584 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2585 2586 # add any unmatched Optionals, in case they have default values defined 2587 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2588 2589 resultlist = [] 2590 for e in matchOrder: 2591 loc,results = e._parse(instring,loc,doActions) 2592 resultlist.append(results) 2593 2594 finalResults = ParseResults([]) 2595 for r in resultlist: 2596 dups = {} 2597 for k in r.keys(): 2598 if k in finalResults.keys(): 2599 tmp = ParseResults(finalResults[k]) 2600 tmp += ParseResults(r[k]) 2601 dups[k] = tmp 2602 finalResults += ParseResults(r) 2603 for k,v in dups.items(): 2604 finalResults[k] = v 2605 return loc, finalResults
2606
2607 - def __str__( self ):
2608 if hasattr(self,"name"): 2609 return self.name 2610 2611 if self.strRepr is None: 2612 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2613 2614 return self.strRepr
2615
2616 - def checkRecursion( self, parseElementList ):
2617 subRecCheckList = parseElementList[:] + [ self ] 2618 for e in self.exprs: 2619 e.checkRecursion( subRecCheckList )
2620 2621
2622 -class ParseElementEnhance(ParserElement):
2623 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2624 - def __init__( self, expr, savelist=False ):
2625 super(ParseElementEnhance,self).__init__(savelist) 2626 if isinstance( expr, basestring ): 2627 expr = Literal(expr) 2628 self.expr = expr 2629 self.strRepr = None 2630 if expr is not None: 2631 self.mayIndexError = expr.mayIndexError 2632 self.mayReturnEmpty = expr.mayReturnEmpty 2633 self.setWhitespaceChars( expr.whiteChars ) 2634 self.skipWhitespace = expr.skipWhitespace 2635 self.saveAsList = expr.saveAsList 2636 self.callPreparse = expr.callPreparse 2637 self.ignoreExprs.extend(expr.ignoreExprs)
2638
2639 - def parseImpl( self, instring, loc, doActions=True ):
2640 if self.expr is not None: 2641 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2642 else: 2643 raise ParseException("",loc,self.errmsg,self)
2644
2645 - def leaveWhitespace( self ):
2646 self.skipWhitespace = False 2647 self.expr = self.expr.copy() 2648 if self.expr is not None: 2649 self.expr.leaveWhitespace() 2650 return self
2651
2652 - def ignore( self, other ):
2653 if isinstance( other, Suppress ): 2654 if other not in self.ignoreExprs: 2655 super( ParseElementEnhance, self).ignore( other ) 2656 if self.expr is not None: 2657 self.expr.ignore( self.ignoreExprs[-1] ) 2658 else: 2659 super( ParseElementEnhance, self).ignore( other ) 2660 if self.expr is not None: 2661 self.expr.ignore( self.ignoreExprs[-1] ) 2662 return self
2663
2664 - def streamline( self ):
2665 super(ParseElementEnhance,self).streamline() 2666 if self.expr is not None: 2667 self.expr.streamline() 2668 return self
2669
2670 - def checkRecursion( self, parseElementList ):
2671 if self in parseElementList: 2672 raise RecursiveGrammarException( parseElementList+[self] ) 2673 subRecCheckList = parseElementList[:] + [ self ] 2674 if self.expr is not None: 2675 self.expr.checkRecursion( subRecCheckList )
2676
2677 - def validate( self, validateTrace=[] ):
2678 tmp = validateTrace[:]+[self] 2679 if self.expr is not None: 2680 self.expr.validate(tmp) 2681 self.checkRecursion( [] )
2682
2683 - def __str__( self ):
2684 try: 2685 return super(ParseElementEnhance,self).__str__() 2686 except: 2687 pass 2688 2689 if self.strRepr is None and self.expr is not None: 2690 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2691 return self.strRepr
2692 2693
2694 -class FollowedBy(ParseElementEnhance):
2695 """Lookahead matching of the given parse expression. FollowedBy 2696 does *not* advance the parsing position within the input string, it only 2697 verifies that the specified parse expression matches at the current 2698 position. FollowedBy always returns a null token list."""
2699 - def __init__( self, expr ):
2700 super(FollowedBy,self).__init__(expr) 2701 self.mayReturnEmpty = True
2702
2703 - def parseImpl( self, instring, loc, doActions=True ):
2704 self.expr.tryParse( instring, loc ) 2705 return loc, []
2706 2707
2708 -class NotAny(ParseElementEnhance):
2709 """Lookahead to disallow matching with the given parse expression. NotAny 2710 does *not* advance the parsing position within the input string, it only 2711 verifies that the specified parse expression does *not* match at the current 2712 position. Also, NotAny does *not* skip over leading whitespace. NotAny 2713 always returns a null token list. May be constructed using the '~' operator."""
2714 - def __init__( self, expr ):
2715 super(NotAny,self).__init__(expr) 2716 #~ self.leaveWhitespace() 2717 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2718 self.mayReturnEmpty = True 2719 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2720 #self.myException = ParseException("",0,self.errmsg,self) 2721
2722 - def parseImpl( self, instring, loc, doActions=True ):
2723 try: 2724 self.expr.tryParse( instring, loc ) 2725 except (ParseException,IndexError): 2726 pass 2727 else: 2728 #~ raise ParseException(instring, loc, self.errmsg ) 2729 exc = self.myException 2730 exc.loc = loc 2731 exc.pstr = instring 2732 raise exc 2733 return loc, []
2734
2735 - def __str__( self ):
2736 if hasattr(self,"name"): 2737 return self.name 2738 2739 if self.strRepr is None: 2740 self.strRepr = "~{" + _ustr(self.expr) + "}" 2741 2742 return self.strRepr
2743 2744
2745 -class ZeroOrMore(ParseElementEnhance):
2746 """Optional repetition of zero or more of the given expression."""
2747 - def __init__( self, expr ):
2748 super(ZeroOrMore,self).__init__(expr) 2749 self.mayReturnEmpty = True
2750
2751 - def parseImpl( self, instring, loc, doActions=True ):
2752 tokens = [] 2753 try: 2754 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2755 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2756 while 1: 2757 if hasIgnoreExprs: 2758 preloc = self._skipIgnorables( instring, loc ) 2759 else: 2760 preloc = loc 2761 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2762 if tmptokens or tmptokens.keys(): 2763 tokens += tmptokens 2764 except (ParseException,IndexError): 2765 pass 2766 2767 return loc, tokens
2768
2769 - def __str__( self ):
2770 if hasattr(self,"name"): 2771 return self.name 2772 2773 if self.strRepr is None: 2774 self.strRepr = "[" + _ustr(self.expr) + "]..." 2775 2776 return self.strRepr
2777
2778 - def setResultsName( self, name, listAllMatches=False ):
2779 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2780 ret.saveAsList = True 2781 return ret
2782 2783
2784 -class OneOrMore(ParseElementEnhance):
2785 """Repetition of one or more of the given expression."""
2786 - def parseImpl( self, instring, loc, doActions=True ):
2787 # must be at least one 2788 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2789 try: 2790 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2791 while 1: 2792 if hasIgnoreExprs: 2793 preloc = self._skipIgnorables( instring, loc ) 2794 else: 2795 preloc = loc 2796 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2797 if tmptokens or tmptokens.keys(): 2798 tokens += tmptokens 2799 except (ParseException,IndexError): 2800 pass 2801 2802 return loc, tokens
2803
2804 - def __str__( self ):
2805 if hasattr(self,"name"): 2806 return self.name 2807 2808 if self.strRepr is None: 2809 self.strRepr = "{" + _ustr(self.expr) + "}..." 2810 2811 return self.strRepr
2812
2813 - def setResultsName( self, name, listAllMatches=False ):
2814 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2815 ret.saveAsList = True 2816 return ret
2817
2818 -class _NullToken(object):
2819 - def __bool__(self):
2820 return False
2821 __nonzero__ = __bool__
2822 - def __str__(self):
2823 return ""
2824 2825 _optionalNotMatched = _NullToken()
2826 -class Optional(ParseElementEnhance):
2827 """Optional matching of the given expression. 2828 A default return string can also be specified, if the optional expression 2829 is not found. 2830 """
2831 - def __init__( self, exprs, default=_optionalNotMatched ):
2832 super(Optional,self).__init__( exprs, savelist=False ) 2833 self.defaultValue = default 2834 self.mayReturnEmpty = True
2835
2836 - def parseImpl( self, instring, loc, doActions=True ):
2837 try: 2838 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2839 except (ParseException,IndexError): 2840 if self.defaultValue is not _optionalNotMatched: 2841 if self.expr.resultsName: 2842 tokens = ParseResults([ self.defaultValue ]) 2843 tokens[self.expr.resultsName] = self.defaultValue 2844 else: 2845 tokens = [ self.defaultValue ] 2846 else: 2847 tokens = [] 2848 return loc, tokens
2849
2850 - def __str__( self ):
2851 if hasattr(self,"name"): 2852 return self.name 2853 2854 if self.strRepr is None: 2855 self.strRepr = "[" + _ustr(self.expr) + "]" 2856 2857 return self.strRepr
2858 2859
2860 -class SkipTo(ParseElementEnhance):
2861 """Token for skipping over all undefined text until the matched expression is found. 2862 If include is set to true, the matched expression is also parsed (the skipped text 2863 and matched expression are returned as a 2-element list). The ignore 2864 argument is used to define grammars (typically quoted strings and comments) that 2865 might contain false matches. 2866 """
2867 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2868 super( SkipTo, self ).__init__( other ) 2869 self.ignoreExpr = ignore 2870 self.mayReturnEmpty = True 2871 self.mayIndexError = False 2872 self.includeMatch = include 2873 self.asList = False 2874 if failOn is not None and isinstance(failOn, basestring): 2875 self.failOn = Literal(failOn) 2876 else: 2877 self.failOn = failOn 2878 self.errmsg = "No match found for "+_ustr(self.expr)
2879 #self.myException = ParseException("",0,self.errmsg,self) 2880
2881 - def parseImpl( self, instring, loc, doActions=True ):
2882 startLoc = loc 2883 instrlen = len(instring) 2884 expr = self.expr 2885 failParse = False 2886 while loc <= instrlen: 2887 try: 2888 if self.failOn: 2889 try: 2890 self.failOn.tryParse(instring, loc) 2891 except ParseBaseException: 2892 pass 2893 else: 2894 failParse = True 2895 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2896 failParse = False 2897 if self.ignoreExpr is not None: 2898 while 1: 2899 try: 2900 loc = self.ignoreExpr.tryParse(instring,loc) 2901 # print "found ignoreExpr, advance to", loc 2902 except ParseBaseException: 2903 break 2904 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2905 skipText = instring[startLoc:loc] 2906 if self.includeMatch: 2907 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2908 if mat: 2909 skipRes = ParseResults( skipText ) 2910 skipRes += mat 2911 return loc, [ skipRes ] 2912 else: 2913 return loc, [ skipText ] 2914 else: 2915 return loc, [ skipText ] 2916 except (ParseException,IndexError): 2917 if failParse: 2918 raise 2919 else: 2920 loc += 1 2921 exc = self.myException 2922 exc.loc = loc 2923 exc.pstr = instring 2924 raise exc
2925
2926 -class Forward(ParseElementEnhance):
2927 """Forward declaration of an expression to be defined later - 2928 used for recursive grammars, such as algebraic infix notation. 2929 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2930 2931 Note: take care when assigning to Forward not to overlook precedence of operators. 2932 Specifically, '|' has a lower precedence than '<<', so that:: 2933 fwdExpr << a | b | c 2934 will actually be evaluated as:: 2935 (fwdExpr << a) | b | c 2936 thereby leaving b and c out as parseable alternatives. It is recommended that you 2937 explicitly group the values inserted into the Forward:: 2938 fwdExpr << (a | b | c) 2939 """
2940 - def __init__( self, other=None ):
2941 super(Forward,self).__init__( other, savelist=False )
2942
2943 - def __lshift__( self, other ):
2944 if isinstance( other, basestring ): 2945 other = Literal(other) 2946 self.expr = other 2947 self.mayReturnEmpty = other.mayReturnEmpty 2948 self.strRepr = None 2949 self.mayIndexError = self.expr.mayIndexError 2950 self.mayReturnEmpty = self.expr.mayReturnEmpty 2951 self.setWhitespaceChars( self.expr.whiteChars ) 2952 self.skipWhitespace = self.expr.skipWhitespace 2953 self.saveAsList = self.expr.saveAsList 2954 self.ignoreExprs.extend(self.expr.ignoreExprs) 2955 return None
2956
2957 - def leaveWhitespace( self ):
2958 self.skipWhitespace = False 2959 return self
2960
2961 - def streamline( self ):
2962 if not self.streamlined: 2963 self.streamlined = True 2964 if self.expr is not None: 2965 self.expr.streamline() 2966 return self
2967
2968 - def validate( self, validateTrace=[] ):
2969 if self not in validateTrace: 2970 tmp = validateTrace[:]+[self] 2971 if self.expr is not None: 2972 self.expr.validate(tmp) 2973 self.checkRecursion([])
2974
2975 - def __str__( self ):
2976 if hasattr(self,"name"): 2977 return self.name 2978 2979 self._revertClass = self.__class__ 2980 self.__class__ = _ForwardNoRecurse 2981 try: 2982 if self.expr is not None: 2983 retString = _ustr(self.expr) 2984 else: 2985 retString = "None" 2986 finally: 2987 self.__class__ = self._revertClass 2988 return self.__class__.__name__ + ": " + retString
2989
2990 - def copy(self):
2991 if self.expr is not None: 2992 return super(Forward,self).copy() 2993 else: 2994 ret = Forward() 2995 ret << self 2996 return ret
2997
2998 -class _ForwardNoRecurse(Forward):
2999 - def __str__( self ):
3000 return "..."
3001
3002 -class TokenConverter(ParseElementEnhance):
3003 """Abstract subclass of ParseExpression, for converting parsed results."""
3004 - def __init__( self, expr, savelist=False ):
3005 super(TokenConverter,self).__init__( expr )#, savelist ) 3006 self.saveAsList = False
3007
3008 -class Upcase(TokenConverter):
3009 """Converter to upper case all matching tokens."""
3010 - def __init__(self, *args):
3011 super(Upcase,self).__init__(*args) 3012 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3013 DeprecationWarning,stacklevel=2)
3014
3015 - def postParse( self, instring, loc, tokenlist ):
3016 return list(map( string.upper, tokenlist ))
3017 3018
3019 -class Combine(TokenConverter):
3020 """Converter to concatenate all matching tokens to a single string. 3021 By default, the matching patterns must also be contiguous in the input string; 3022 this can be disabled by specifying 'adjacent=False' in the constructor. 3023 """
3024 - def __init__( self, expr, joinString="", adjacent=True ):
3025 super(Combine,self).__init__( expr ) 3026 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3027 if adjacent: 3028 self.leaveWhitespace() 3029 self.adjacent = adjacent 3030 self.skipWhitespace = True 3031 self.joinString = joinString
3032
3033 - def ignore( self, other ):
3034 if self.adjacent: 3035 ParserElement.ignore(self, other) 3036 else: 3037 super( Combine, self).ignore( other ) 3038 return self
3039
3040 - def postParse( self, instring, loc, tokenlist ):
3041 retToks = tokenlist.copy() 3042 del retToks[:] 3043 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3044 3045 if self.resultsName and len(retToks.keys())>0: 3046 return [ retToks ] 3047 else: 3048 return retToks
3049
3050 -class Group(TokenConverter):
3051 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3052 - def __init__( self, expr ):
3053 super(Group,self).__init__( expr ) 3054 self.saveAsList = True
3055
3056 - def postParse( self, instring, loc, tokenlist ):
3057 return [ tokenlist ]
3058
3059 -class Dict(TokenConverter):
3060 """Converter to return a repetitive expression as a list, but also as a dictionary. 3061 Each element can also be referenced using the first token in the expression as its key. 3062 Useful for tabular report scraping when the first column can be used as a item key. 3063 """
3064 - def __init__( self, exprs ):
3065 super(Dict,self).__init__( exprs ) 3066 self.saveAsList = True
3067
3068 - def postParse( self, instring, loc, tokenlist ):
3069 for i,tok in enumerate(tokenlist): 3070 if len(tok) == 0: 3071 continue 3072 ikey = tok[0] 3073 if isinstance(ikey,int): 3074 ikey = _ustr(tok[0]).strip() 3075 if len(tok)==1: 3076 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3077 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3078 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3079 else: 3080 dictvalue = tok.copy() #ParseResults(i) 3081 del dictvalue[0] 3082 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3083 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3084 else: 3085 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3086 3087 if self.resultsName: 3088 return [ tokenlist ] 3089 else: 3090 return tokenlist
3091 3092
3093 -class Suppress(TokenConverter):
3094 """Converter for ignoring the results of a parsed expression."""
3095 - def postParse( self, instring, loc, tokenlist ):
3096 return []
3097
3098 - def suppress( self ):
3099 return self
3100 3101
3102 -class OnlyOnce(object):
3103 """Wrapper for parse actions, to ensure they are only called once."""
3104 - def __init__(self, methodCall):
3105 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3106 self.called = False
3107 - def __call__(self,s,l,t):
3108 if not self.called: 3109 results = self.callable(s,l,t) 3110 self.called = True 3111 return results 3112 raise ParseException(s,l,"")
3113 - def reset(self):
3114 self.called = False
3115
3116 -def traceParseAction(f):
3117 """Decorator for debugging parse actions.""" 3118 f = ParserElement._normalizeParseActionArgs(f) 3119 def z(*paArgs): 3120 thisFunc = f.func_name 3121 s,l,t = paArgs[-3:] 3122 if len(paArgs)>3: 3123 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3124 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3125 try: 3126 ret = f(*paArgs) 3127 except Exception: 3128 exc = sys.exc_info()[1] 3129 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3130 raise 3131 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3132 return ret
3133 try: 3134 z.__name__ = f.__name__ 3135 except AttributeError: 3136 pass 3137 return z 3138 3139 # 3140 # global helpers 3141 #
3142 -def delimitedList( expr, delim=",", combine=False ):
3143 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3144 By default, the list elements and delimiters can have intervening whitespace, and 3145 comments, but this can be overridden by passing 'combine=True' in the constructor. 3146 If combine is set to True, the matching tokens are returned as a single token 3147 string, with the delimiters included; otherwise, the matching tokens are returned 3148 as a list of tokens, with the delimiters suppressed. 3149 """ 3150 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3151 if combine: 3152 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3153 else: 3154 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3155
3156 -def countedArray( expr ):
3157 """Helper to define a counted list of expressions. 3158 This helper defines a pattern of the form:: 3159 integer expr expr expr... 3160 where the leading integer tells how many expr expressions follow. 3161 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3162 """ 3163 arrayExpr = Forward() 3164 def countFieldParseAction(s,l,t): 3165 n = int(t[0]) 3166 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3167 return []
3168 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 3169
3170 -def _flatten(L):
3171 if type(L) is not list: return [L] 3172 if L == []: return L 3173 return _flatten(L[0]) + _flatten(L[1:])
3174
3175 -def matchPreviousLiteral(expr):
3176 """Helper to define an expression that is indirectly defined from 3177 the tokens matched in a previous expression, that is, it looks 3178 for a 'repeat' of a previous expression. For example:: 3179 first = Word(nums) 3180 second = matchPreviousLiteral(first) 3181 matchExpr = first + ":" + second 3182 will match "1:1", but not "1:2". Because this matches a 3183 previous literal, will also match the leading "1:1" in "1:10". 3184 If this is not desired, use matchPreviousExpr. 3185 Do *not* use with packrat parsing enabled. 3186 """ 3187 rep = Forward() 3188 def copyTokenToRepeater(s,l,t): 3189 if t: 3190 if len(t) == 1: 3191 rep << t[0] 3192 else: 3193 # flatten t tokens 3194 tflat = _flatten(t.asList()) 3195 rep << And( [ Literal(tt) for tt in tflat ] ) 3196 else: 3197 rep << Empty()
3198 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3199 return rep 3200
3201 -def matchPreviousExpr(expr):
3202 """Helper to define an expression that is indirectly defined from 3203 the tokens matched in a previous expression, that is, it looks 3204 for a 'repeat' of a previous expression. For example:: 3205 first = Word(nums) 3206 second = matchPreviousExpr(first) 3207 matchExpr = first + ":" + second 3208 will match "1:1", but not "1:2". Because this matches by 3209 expressions, will *not* match the leading "1:1" in "1:10"; 3210 the expressions are evaluated first, and then compared, so 3211 "1" is compared with "10". 3212 Do *not* use with packrat parsing enabled. 3213 """ 3214 rep = Forward() 3215 e2 = expr.copy() 3216 rep << e2 3217 def copyTokenToRepeater(s,l,t): 3218 matchTokens = _flatten(t.asList()) 3219 def mustMatchTheseTokens(s,l,t): 3220 theseTokens = _flatten(t.asList()) 3221 if theseTokens != matchTokens: 3222 raise ParseException("",0,"")
3223 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3224 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3225 return rep 3226
3227 -def _escapeRegexRangeChars(s):
3228 #~ escape these chars: ^-] 3229 for c in r"\^-]": 3230 s = s.replace(c,_bslash+c) 3231 s = s.replace("\n",r"\n") 3232 s = s.replace("\t",r"\t") 3233 return _ustr(s)
3234
3235 -def oneOf( strs, caseless=False, useRegex=True ):
3236 """Helper to quickly define a set of alternative Literals, and makes sure to do 3237 longest-first testing when there is a conflict, regardless of the input order, 3238 but returns a MatchFirst for best performance. 3239 3240 Parameters: 3241 - strs - a string of space-delimited literals, or a list of string literals 3242 - caseless - (default=False) - treat all literals as caseless 3243 - useRegex - (default=True) - as an optimization, will generate a Regex 3244 object; otherwise, will generate a MatchFirst object (if caseless=True, or 3245 if creating a Regex raises an exception) 3246 """ 3247 if caseless: 3248 isequal = ( lambda a,b: a.upper() == b.upper() ) 3249 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3250 parseElementClass = CaselessLiteral 3251 else: 3252 isequal = ( lambda a,b: a == b ) 3253 masks = ( lambda a,b: b.startswith(a) ) 3254 parseElementClass = Literal 3255 3256 if isinstance(strs,(list,tuple)): 3257 symbols = list(strs[:]) 3258 elif isinstance(strs,basestring): 3259 symbols = strs.split() 3260 else: 3261 warnings.warn("Invalid argument to oneOf, expected string or list", 3262 SyntaxWarning, stacklevel=2) 3263 3264 i = 0 3265 while i < len(symbols)-1: 3266 cur = symbols[i] 3267 for j,other in enumerate(symbols[i+1:]): 3268 if ( isequal(other, cur) ): 3269 del symbols[i+j+1] 3270 break 3271 elif ( masks(cur, other) ): 3272 del symbols[i+j+1] 3273 symbols.insert(i,other) 3274 cur = other 3275 break 3276 else: 3277 i += 1 3278 3279 if not caseless and useRegex: 3280 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3281 try: 3282 if len(symbols)==len("".join(symbols)): 3283 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3284 else: 3285 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3286 except: 3287 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3288 SyntaxWarning, stacklevel=2) 3289 3290 3291 # last resort, just use MatchFirst 3292 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3293
3294 -def dictOf( key, value ):
3295 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3296 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3297 in the proper order. The key pattern can include delimiting markers or punctuation, 3298 as long as they are suppressed, thereby leaving the significant key text. The value 3299 pattern can include named results, so that the Dict results can include named token 3300 fields. 3301 """ 3302 return Dict( ZeroOrMore( Group ( key + value ) ) )
3303
3304 -def originalTextFor(expr, asString=True):
3305 """Helper to return the original, untokenized text for a given expression. Useful to 3306 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3307 revert separate tokens with intervening whitespace back to the original matching 3308 input text. Simpler to use than the parse action keepOriginalText, and does not 3309 require the inspect module to chase up the call stack. By default, returns a 3310 string containing the original parsed text. 3311 3312 If the optional asString argument is passed as False, then the return value is a 3313 ParseResults containing any results names that were originally matched, and a 3314 single token containing the original matched text from the input string. So if 3315 the expression passed to originalTextFor contains expressions with defined 3316 results names, you must set asString to False if you want to preserve those 3317 results name values.""" 3318 locMarker = Empty().setParseAction(lambda s,loc,t: loc).leaveWhitespace() 3319 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") 3320 if asString: 3321 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3322 else: 3323 def extractText(s,l,t): 3324 del t[:] 3325 t.insert(0, s[t._original_start:t._original_end]) 3326 del t["_original_start"] 3327 del t["_original_end"]
3328 matchExpr.setParseAction(extractText) 3329 return matchExpr 3330 3331 # convenience constants for positional expressions 3332 empty = Empty().setName("empty") 3333 lineStart = LineStart().setName("lineStart") 3334 lineEnd = LineEnd().setName("lineEnd") 3335 stringStart = StringStart().setName("stringStart") 3336 stringEnd = StringEnd().setName("stringEnd") 3337 3338 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3339 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3340 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3341 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3342 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3343 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3344 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3345 3346 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3347
3348 -def srange(s):
3349 r"""Helper to easily define string ranges for use in Word construction. Borrows 3350 syntax from regexp '[]' string range definitions:: 3351 srange("[0-9]") -> "0123456789" 3352 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3353 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3354 The input string must be enclosed in []'s, and the returned string is the expanded 3355 character set joined into a single string. 3356 The values enclosed in the []'s may be:: 3357 a single character 3358 an escaped character with a leading backslash (such as \- or \]) 3359 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3360 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3361 a range of any of the above, separated by a dash ('a-z', etc.) 3362 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3363 """ 3364 try: 3365 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3366 except: 3367 return ""
3368
3369 -def matchOnlyAtCol(n):
3370 """Helper method for defining parse actions that require matching at a specific 3371 column in the input text. 3372 """ 3373 def verifyCol(strg,locn,toks): 3374 if col(locn,strg) != n: 3375 raise ParseException(strg,locn,"matched token not at column %d" % n)
3376 return verifyCol 3377
3378 -def replaceWith(replStr):
3379 """Helper method for common parse actions that simply return a literal value. Especially 3380 useful when used with transformString(). 3381 """ 3382 def _replFunc(*args): 3383 return [replStr]
3384 return _replFunc 3385
3386 -def removeQuotes(s,l,t):
3387 """Helper parse action for removing quotation marks from parsed quoted strings. 3388 To use, add this parse action to quoted string using:: 3389 quotedString.setParseAction( removeQuotes ) 3390 """ 3391 return t[0][1:-1]
3392
3393 -def upcaseTokens(s,l,t):
3394 """Helper parse action to convert tokens to upper case.""" 3395 return [ tt.upper() for tt in map(_ustr,t) ]
3396
3397 -def downcaseTokens(s,l,t):
3398 """Helper parse action to convert tokens to lower case.""" 3399 return [ tt.lower() for tt in map(_ustr,t) ]
3400
3401 -def keepOriginalText(s,startLoc,t):
3402 """DEPRECATED - use new helper method 'originalTextFor'. 3403 Helper parse action to preserve original parsed text, 3404 overriding any nested parse actions.""" 3405 try: 3406 endloc = getTokensEndLoc() 3407 except ParseException: 3408 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3409 del t[:] 3410 t += ParseResults(s[startLoc:endloc]) 3411 return t
3412
3413 -def getTokensEndLoc():
3414 """Method to be called from within a parse action to determine the end 3415 location of the parsed tokens.""" 3416 import inspect 3417 fstack = inspect.stack() 3418 try: 3419 # search up the stack (through intervening argument normalizers) for correct calling routine 3420 for f in fstack[2:]: 3421 if f[3] == "_parseNoCache": 3422 endloc = f[0].f_locals["loc"] 3423 return endloc 3424 else: 3425 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3426 finally: 3427 del fstack
3428
3429 -def _makeTags(tagStr, xml):
3430 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3431 if isinstance(tagStr,basestring): 3432 resname = tagStr 3433 tagStr = Keyword(tagStr, caseless=not xml) 3434 else: 3435 resname = tagStr.name 3436 3437 tagAttrName = Word(alphas,alphanums+"_-:") 3438 if (xml): 3439 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3440 openTag = Suppress("<") + tagStr + \ 3441 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3442 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3443 else: 3444 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3445 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3446 openTag = Suppress("<") + tagStr + \ 3447 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3448 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3449 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3450 closeTag = Combine(_L("</") + tagStr + ">") 3451 3452 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3453 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3454 3455 return openTag, closeTag
3456
3457 -def makeHTMLTags(tagStr):
3458 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3459 return _makeTags( tagStr, False )
3460
3461 -def makeXMLTags(tagStr):
3462 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3463 return _makeTags( tagStr, True )
3464
3465 -def withAttribute(*args,**attrDict):
3466 """Helper to create a validating parse action to be used with start tags created 3467 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3468 with a required attribute value, to avoid false matches on common tags such as 3469 <TD> or <DIV>. 3470 3471 Call withAttribute with a series of attribute names and values. Specify the list 3472 of filter attributes names and values as: 3473 - keyword arguments, as in (class="Customer",align="right"), or 3474 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3475 For attribute names with a namespace prefix, you must use the second form. Attribute 3476 names are matched insensitive to upper/lower case. 3477 3478 To verify that the attribute exists, but without specifying a value, pass 3479 withAttribute.ANY_VALUE as the value. 3480 """ 3481 if args: 3482 attrs = args[:] 3483 else: 3484 attrs = attrDict.items() 3485 attrs = [(k,v) for k,v in attrs] 3486 def pa(s,l,tokens): 3487 for attrName,attrValue in attrs: 3488 if attrName not in tokens: 3489 raise ParseException(s,l,"no matching attribute " + attrName) 3490 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3491 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3492 (attrName, tokens[attrName], attrValue))
3493 return pa 3494 withAttribute.ANY_VALUE = object() 3495 3496 opAssoc = _Constants() 3497 opAssoc.LEFT = object() 3498 opAssoc.RIGHT = object() 3499
3500 -def operatorPrecedence( baseExpr, opList ):
3501 """Helper method for constructing grammars of expressions made up of 3502 operators working in a precedence hierarchy. Operators may be unary or 3503 binary, left- or right-associative. Parse actions can also be attached 3504 to operator expressions. 3505 3506 Parameters: 3507 - baseExpr - expression representing the most basic element for the nested 3508 - opList - list of tuples, one for each operator precedence level in the 3509 expression grammar; each tuple is of the form 3510 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3511 - opExpr is the pyparsing expression for the operator; 3512 may also be a string, which will be converted to a Literal; 3513 if numTerms is 3, opExpr is a tuple of two expressions, for the 3514 two operators separating the 3 terms 3515 - numTerms is the number of terms for this operator (must 3516 be 1, 2, or 3) 3517 - rightLeftAssoc is the indicator whether the operator is 3518 right or left associative, using the pyparsing-defined 3519 constants opAssoc.RIGHT and opAssoc.LEFT. 3520 - parseAction is the parse action to be associated with 3521 expressions matching this operator expression (the 3522 parse action tuple member may be omitted) 3523 """ 3524 ret = Forward() 3525 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3526 for i,operDef in enumerate(opList): 3527 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3528 if arity == 3: 3529 if opExpr is None or len(opExpr) != 2: 3530 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3531 opExpr1, opExpr2 = opExpr 3532 thisExpr = Forward()#.setName("expr%d" % i) 3533 if rightLeftAssoc == opAssoc.LEFT: 3534 if arity == 1: 3535 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3536 elif arity == 2: 3537 if opExpr is not None: 3538 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3539 else: 3540 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3541 elif arity == 3: 3542 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3543 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3544 else: 3545 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3546 elif rightLeftAssoc == opAssoc.RIGHT: 3547 if arity == 1: 3548 # try to avoid LR with this extra test 3549 if not isinstance(opExpr, Optional): 3550 opExpr = Optional(opExpr) 3551 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3552 elif arity == 2: 3553 if opExpr is not None: 3554 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3555 else: 3556 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3557 elif arity == 3: 3558 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3559 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3560 else: 3561 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3562 else: 3563 raise ValueError("operator must indicate right or left associativity") 3564 if pa: 3565 matchExpr.setParseAction( pa ) 3566 thisExpr << ( matchExpr | lastExpr ) 3567 lastExpr = thisExpr 3568 ret << lastExpr 3569 return ret
3570 3571 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3572 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3573 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3574 unicodeString = Combine(_L('u') + quotedString.copy()) 3575
3576 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3577 """Helper method for defining nested lists enclosed in opening and closing 3578 delimiters ("(" and ")" are the default). 3579 3580 Parameters: 3581 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3582 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3583 - content - expression for items within the nested lists (default=None) 3584 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3585 3586 If an expression is not provided for the content argument, the nested 3587 expression will capture all whitespace-delimited content between delimiters 3588 as a list of separate values. 3589 3590 Use the ignoreExpr argument to define expressions that may contain 3591 opening or closing characters that should not be treated as opening 3592 or closing characters for nesting, such as quotedString or a comment 3593 expression. Specify multiple expressions using an Or or MatchFirst. 3594 The default is quotedString, but if no expressions are to be ignored, 3595 then pass None for this argument. 3596 """ 3597 if opener == closer: 3598 raise ValueError("opening and closing strings cannot be the same") 3599 if content is None: 3600 if isinstance(opener,basestring) and isinstance(closer,basestring): 3601 if len(opener) == 1 and len(closer)==1: 3602 if ignoreExpr is not None: 3603 content = (Combine(OneOrMore(~ignoreExpr + 3604 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3605 ).setParseAction(lambda t:t[0].strip())) 3606 else: 3607 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3608 ).setParseAction(lambda t:t[0].strip())) 3609 else: 3610 if ignoreExpr is not None: 3611 content = (Combine(OneOrMore(~ignoreExpr + 3612 ~Literal(opener) + ~Literal(closer) + 3613 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3614 ).setParseAction(lambda t:t[0].strip())) 3615 else: 3616 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3617 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3618 ).setParseAction(lambda t:t[0].strip())) 3619 else: 3620 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3621 ret = Forward() 3622 if ignoreExpr is not None: 3623 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3624 else: 3625 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3626 return ret
3627
3628 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3629 """Helper method for defining space-delimited indentation blocks, such as 3630 those used to define block statements in Python source code. 3631 3632 Parameters: 3633 - blockStatementExpr - expression defining syntax of statement that 3634 is repeated within the indented block 3635 - indentStack - list created by caller to manage indentation stack 3636 (multiple statementWithIndentedBlock expressions within a single grammar 3637 should share a common indentStack) 3638 - indent - boolean indicating whether block must be indented beyond the 3639 the current level; set to False for block of left-most statements 3640 (default=True) 3641 3642 A valid block must contain at least one blockStatement. 3643 """ 3644 def checkPeerIndent(s,l,t): 3645 if l >= len(s): return 3646 curCol = col(l,s) 3647 if curCol != indentStack[-1]: 3648 if curCol > indentStack[-1]: 3649 raise ParseFatalException(s,l,"illegal nesting") 3650 raise ParseException(s,l,"not a peer entry")
3651 3652 def checkSubIndent(s,l,t): 3653 curCol = col(l,s) 3654 if curCol > indentStack[-1]: 3655 indentStack.append( curCol ) 3656 else: 3657 raise ParseException(s,l,"not a subentry") 3658 3659 def checkUnindent(s,l,t): 3660 if l >= len(s): return 3661 curCol = col(l,s) 3662 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3663 raise ParseException(s,l,"not an unindent") 3664 indentStack.pop() 3665 3666 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3667 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3668 PEER = Empty().setParseAction(checkPeerIndent) 3669 UNDENT = Empty().setParseAction(checkUnindent) 3670 if indent: 3671 smExpr = Group( Optional(NL) + 3672 #~ FollowedBy(blockStatementExpr) + 3673 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3674 else: 3675 smExpr = Group( Optional(NL) + 3676 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3677 blockStatementExpr.ignore(_bslash + LineEnd()) 3678 return smExpr 3679 3680 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3681 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3682 3683 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3684 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3685 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3686 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3687 3688 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3689 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3690 3691 htmlComment = Regex(r"<!--[\s\S]*?-->") 3692 restOfLine = Regex(r".*").leaveWhitespace() 3693 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3694 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3695 3696 javaStyleComment = cppStyleComment 3697 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3698 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3699 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3700 Optional( Word(" \t") + 3701 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3702 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3703 3704 3705 if __name__ == "__main__": 3706
3707 - def test( teststring ):
3708 try: 3709 tokens = simpleSQL.parseString( teststring ) 3710 tokenlist = tokens.asList() 3711 print (teststring + "->" + str(tokenlist)) 3712 print ("tokens = " + str(tokens)) 3713 print ("tokens.columns = " + str(tokens.columns)) 3714 print ("tokens.tables = " + str(tokens.tables)) 3715 print (tokens.asXML("SQL",True)) 3716 except ParseBaseException: 3717 err = sys.exc_info()[1] 3718 print (teststring + "->") 3719 print (err.line) 3720 print (" "*(err.column-1) + "^") 3721 print (err) 3722 print()
3723 3724 selectToken = CaselessLiteral( "select" ) 3725 fromToken = CaselessLiteral( "from" ) 3726 3727 ident = Word( alphas, alphanums + "_$" ) 3728 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3729 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3730 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3731 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3732 simpleSQL = ( selectToken + \ 3733 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3734 fromToken + \ 3735 tableNameList.setResultsName( "tables" ) ) 3736 3737 test( "SELECT * from XYZZY, ABC" ) 3738 test( "select * from SYS.XYZZY" ) 3739 test( "Select A from Sys.dual" ) 3740 test( "Select AA,BB,CC from Sys.dual" ) 3741 test( "Select A, B, C from Sys.dual" ) 3742 test( "Select A, B, C from Sys.dual" ) 3743 test( "Xelect A, B, C from Sys.dual" ) 3744 test( "Select A, B, C frox Sys.dual" ) 3745 test( "Select" ) 3746 test( "Select ^^^ frox Sys.dual" ) 3747 test( "Select A, B, C from Sys.dual, Table2 " ) 3748