home *** CD-ROM | disk | FTP | other *** search
/ Freelog 116 / FreelogNo116-JuilletSeptembre2013.iso / Bureautique / gImageReader / gimagereader_0.9-1_win32.exe / bin / email / feedparser.pyc (.txt) < prev    next >
Python Compiled Bytecode  |  2011-03-24  |  11KB  |  411 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.7)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. __all__ = [
  21.     'FeedParser']
  22. import re
  23. from email import errors
  24. from email import message
  25. NLCRE = re.compile('\r\n|\r|\n')
  26. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  27. NLCRE_eol = re.compile('(\r\n|\r|\n)\\Z')
  28. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  29. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{1,}:|[\\t ])')
  30. EMPTYSTRING = ''
  31. NL = '\n'
  32. NeedMoreData = object()
  33.  
  34. class BufferedSubFile(object):
  35.     '''A file-ish object that can have new data loaded into it.
  36.  
  37.     You can also push and pop line-matching predicates onto a stack.  When the
  38.     current predicate matches the current line, a false EOF response
  39.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  40.     simple abstraction -- it parses until EOF closes the current message.
  41.     '''
  42.     
  43.     def __init__(self):
  44.         self._partial = ''
  45.         self._lines = []
  46.         self._eofstack = []
  47.         self._closed = False
  48.  
  49.     
  50.     def push_eof_matcher(self, pred):
  51.         self._eofstack.append(pred)
  52.  
  53.     
  54.     def pop_eof_matcher(self):
  55.         return self._eofstack.pop()
  56.  
  57.     
  58.     def close(self):
  59.         self._lines.append(self._partial)
  60.         self._partial = ''
  61.         self._closed = True
  62.  
  63.     
  64.     def readline(self):
  65.         if not self._lines:
  66.             if self._closed:
  67.                 return ''
  68.             return None
  69.         line = None._lines.pop()
  70.         for ateof in self._eofstack[::-1]:
  71.             if ateof(line):
  72.                 self._lines.append(line)
  73.                 return ''
  74.         
  75.         return line
  76.  
  77.     
  78.     def unreadline(self, line):
  79.         if not line is not NeedMoreData:
  80.             raise AssertionError
  81.         None._lines.append(line)
  82.  
  83.     
  84.     def push(self, data):
  85.         '''Push some new data into this object.'''
  86.         data = self._partial + data
  87.         self._partial = ''
  88.         parts = NLCRE_crack.split(data)
  89.         self._partial = parts.pop()
  90.         if not (self._partial) and parts and parts[-1].endswith('\r'):
  91.             self._partial = parts.pop(-2) + parts.pop()
  92.         lines = []
  93.         for i in range(len(parts) // 2):
  94.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  95.         
  96.         self.pushlines(lines)
  97.  
  98.     
  99.     def pushlines(self, lines):
  100.         self._lines[:0] = lines[::-1]
  101.  
  102.     
  103.     def is_closed(self):
  104.         return self._closed
  105.  
  106.     
  107.     def __iter__(self):
  108.         return self
  109.  
  110.     
  111.     def next(self):
  112.         line = self.readline()
  113.         if line == '':
  114.             raise StopIteration
  115.         return line
  116.  
  117.  
  118.  
  119. class FeedParser:
  120.     '''A feed-style parser of email.'''
  121.     
  122.     def __init__(self, _factory = message.Message):
  123.         '''_factory is called with no arguments to create a new message obj'''
  124.         self._factory = _factory
  125.         self._input = BufferedSubFile()
  126.         self._msgstack = []
  127.         self._parse = self._parsegen().next
  128.         self._cur = None
  129.         self._last = None
  130.         self._headersonly = False
  131.  
  132.     
  133.     def _set_headersonly(self):
  134.         self._headersonly = True
  135.  
  136.     
  137.     def feed(self, data):
  138.         '''Push more data into the parser.'''
  139.         self._input.push(data)
  140.         self._call_parse()
  141.  
  142.     
  143.     def _call_parse(self):
  144.         
  145.         try:
  146.             self._parse()
  147.         except StopIteration:
  148.             pass
  149.  
  150.  
  151.     
  152.     def close(self):
  153.         '''Parse all remaining data and return the root message object.'''
  154.         self._input.close()
  155.         self._call_parse()
  156.         root = self._pop_message()
  157.         if not not (self._msgstack):
  158.             raise AssertionError
  159.         if None.get_content_maintype() == 'multipart' and not root.is_multipart():
  160.             root.defects.append(errors.MultipartInvariantViolationDefect())
  161.         return root
  162.  
  163.     
  164.     def _new_message(self):
  165.         msg = self._factory()
  166.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  167.             msg.set_default_type('message/rfc822')
  168.         if self._msgstack:
  169.             self._msgstack[-1].attach(msg)
  170.         self._msgstack.append(msg)
  171.         self._cur = msg
  172.         self._last = msg
  173.  
  174.     
  175.     def _pop_message(self):
  176.         retval = self._msgstack.pop()
  177.         if self._msgstack:
  178.             self._cur = self._msgstack[-1]
  179.         else:
  180.             self._cur = None
  181.         return retval
  182.  
  183.     
  184.     def _parsegen(self):
  185.         self._new_message()
  186.         headers = []
  187.         for line in self._input:
  188.             if line is NeedMoreData:
  189.                 yield NeedMoreData
  190.                 continue
  191.             if not headerRE.match(line):
  192.                 if not NLCRE.match(line):
  193.                     self._input.unreadline(line)
  194.                 break
  195.             headers.append(line)
  196.         
  197.         self._parse_headers(headers)
  198.         if self._headersonly:
  199.             lines = []
  200.             while True:
  201.                 line = self._input.readline()
  202.                 if line is NeedMoreData:
  203.                     yield NeedMoreData
  204.                     continue
  205.                 if line == '':
  206.                     break
  207.                 lines.append(line)
  208.             self._cur.set_payload(EMPTYSTRING.join(lines))
  209.             return None
  210.         if None._cur.get_content_type() == 'message/delivery-status':
  211.             while True:
  212.                 self._input.push_eof_matcher(NLCRE.match)
  213.                 for retval in self._parsegen():
  214.                     if retval is NeedMoreData:
  215.                         yield NeedMoreData
  216.                         continue
  217.                     break
  218.                 
  219.                 msg = self._pop_message()
  220.                 self._input.pop_eof_matcher()
  221.                 while True:
  222.                     line = self._input.readline()
  223.                     if line is NeedMoreData:
  224.                         yield NeedMoreData
  225.                         continue
  226.                     break
  227.                 while True:
  228.                     line = self._input.readline()
  229.                     if line is NeedMoreData:
  230.                         yield NeedMoreData
  231.                         continue
  232.                     break
  233.                 if line == '':
  234.                     break
  235.                 self._input.unreadline(line)
  236.             return None
  237.         if None._cur.get_content_maintype() == 'message':
  238.             for retval in self._parsegen():
  239.                 if retval is NeedMoreData:
  240.                     yield NeedMoreData
  241.                     continue
  242.                 break
  243.             
  244.             self._pop_message()
  245.             return None
  246.         if None._cur.get_content_maintype() == 'multipart':
  247.             boundary = self._cur.get_boundary()
  248.             if boundary is None:
  249.                 self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
  250.                 lines = []
  251.                 for line in self._input:
  252.                     if line is NeedMoreData:
  253.                         yield NeedMoreData
  254.                         continue
  255.                     lines.append(line)
  256.                 
  257.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  258.                 return None
  259.             separator = None + boundary
  260.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  261.             capturing_preamble = True
  262.             preamble = []
  263.             linesep = False
  264.             while True:
  265.                 line = self._input.readline()
  266.                 if line is NeedMoreData:
  267.                     yield NeedMoreData
  268.                     continue
  269.                 if line == '':
  270.                     break
  271.                 mo = boundaryre.match(line)
  272.                 if mo:
  273.                     if mo.group('end'):
  274.                         linesep = mo.group('linesep')
  275.                         break
  276.                     if capturing_preamble:
  277.                         if preamble:
  278.                             lastline = preamble[-1]
  279.                             eolmo = NLCRE_eol.search(lastline)
  280.                             if eolmo:
  281.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  282.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  283.                         capturing_preamble = False
  284.                         self._input.unreadline(line)
  285.                         continue
  286.                     while True:
  287.                         line = self._input.readline()
  288.                         if line is NeedMoreData:
  289.                             yield NeedMoreData
  290.                             continue
  291.                         mo = boundaryre.match(line)
  292.                         if not mo:
  293.                             self._input.unreadline(line)
  294.                             break
  295.                             continue
  296.                         self._input.push_eof_matcher(boundaryre.match)
  297.                         for retval in self._parsegen():
  298.                             if retval is NeedMoreData:
  299.                                 yield NeedMoreData
  300.                                 continue
  301.                             break
  302.                         
  303.                     if self._last.get_content_maintype() == 'multipart':
  304.                         epilogue = self._last.epilogue
  305.                         if epilogue == '':
  306.                             self._last.epilogue = None
  307.                         elif epilogue is not None:
  308.                             mo = NLCRE_eol.search(epilogue)
  309.                             if mo:
  310.                                 end = len(mo.group(0))
  311.                                 self._last.epilogue = epilogue[:-end]
  312.                             
  313.                         
  314.                     else:
  315.                         payload = self._last.get_payload()
  316.                         if isinstance(payload, basestring):
  317.                             mo = NLCRE_eol.search(payload)
  318.                             if mo:
  319.                                 payload = payload[:-len(mo.group(0))]
  320.                                 self._last.set_payload(payload)
  321.                             
  322.                     self._input.pop_eof_matcher()
  323.                     self._pop_message()
  324.                     self._last = self._cur
  325.                     continue
  326.                 if not capturing_preamble:
  327.                     raise AssertionError
  328.                 None.append(line)
  329.             if capturing_preamble:
  330.                 self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
  331.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  332.                 epilogue = []
  333.                 for line in self._input:
  334.                     if line is NeedMoreData:
  335.                         yield NeedMoreData
  336.                         continue
  337.                         continue
  338.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  339.                 return None
  340.             if None:
  341.                 epilogue = [
  342.                     '']
  343.             else:
  344.                 epilogue = []
  345.             for line in self._input:
  346.                 if line is NeedMoreData:
  347.                     yield NeedMoreData
  348.                     continue
  349.                 epilogue.append(line)
  350.             
  351.             if epilogue:
  352.                 firstline = epilogue[0]
  353.                 bolmo = NLCRE_bol.match(firstline)
  354.                 if bolmo:
  355.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  356.                 
  357.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  358.             return None
  359.         lines = None
  360.         for line in self._input:
  361.             if line is NeedMoreData:
  362.                 yield NeedMoreData
  363.                 continue
  364.             lines.append(line)
  365.         
  366.         self._cur.set_payload(EMPTYSTRING.join(lines))
  367.  
  368.     
  369.     def _parse_headers(self, lines):
  370.         lastheader = ''
  371.         lastvalue = []
  372.         for lineno, line in enumerate(lines):
  373.             if line[0] in ' \t':
  374.                 if not lastheader:
  375.                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
  376.                     self._cur.defects.append(defect)
  377.                     continue
  378.                 lastvalue.append(line)
  379.                 continue
  380.             if lastheader:
  381.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  382.                 self._cur[lastheader] = lhdr
  383.                 lastheader = ''
  384.                 lastvalue = []
  385.             if line.startswith('From '):
  386.                 if lineno == 0:
  387.                     mo = NLCRE_eol.search(line)
  388.                     if mo:
  389.                         line = line[:-len(mo.group(0))]
  390.                     self._cur.set_unixfrom(line)
  391.                     continue
  392.                 elif lineno == len(lines) - 1:
  393.                     self._input.unreadline(line)
  394.                     return None
  395.                 defect = errors.MisplacedEnvelopeHeaderDefect(line)
  396.                 self._cur.defects.append(defect)
  397.                 continue
  398.             i = line.find(':')
  399.             if i < 0:
  400.                 defect = errors.MalformedHeaderDefect(line)
  401.                 self._cur.defects.append(defect)
  402.                 continue
  403.             lastheader = line[:i]
  404.             lastvalue = [
  405.                 line[i + 1:].lstrip()]
  406.         
  407.         if lastheader:
  408.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  409.  
  410.  
  411.