Source code for support.http_client

"""A simple HTTP client which mixes httplib with gevent and PayPal protecteds.

It provides convenience functions for the standard set of `HTTP methods`_:

>>> http_client.get('http://example.com/foo') # doctest: +SKIP

which are just shortcuts for the corresponding :py:func:`request` call:

>>> http_client.request("get", "http://example.com/foo") # doctest: +SKIP

.. _HTTP Methods: http://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods

If you don't intend to read the response's body, you should use a
context manager:

>>> with http_client.get('http://www.example.com') as response: # doctest: +SKIP
...    assert response.status == 200

This will release the underlying socket back to the socket pool.
"""

import httplib
from urlparse import urlparse, urlunparse
import functools
import urllib2
import os
import json

import context
import connection_mgr

import async
from gevent import socket
from gevent import ssl


# TODO: make and use a better HTTP library instead of wrapping httplib.
# hopefully this is at least a pretty stable abstraction that can migrate over
# ... if nothing else, much better than shrugging our shoulders when someone
# asks how to make an http request


class _GHTTPConnection(httplib.HTTPConnection):

    default_port = httplib.HTTP_PORT

    def __init__(self, host, port=None, strict=None,
                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
                 protected=None):
        httplib.HTTPConnection.__init__(self, host, port, strict, timeout)
        self.protected = protected

    def connect(self):
        ctx = context.get_context()
        self.sock = ctx.connection_mgr.get_connection((self.host, self.port),
                                                      self.protected)
        if self._tunnel_host:
            self._tunnel()

    def release_sock(self):
        # print self._HTTPConnection__state, self.sock
        if (self._HTTPConnection__state == httplib._CS_IDLE
           and self._HTTPConnection__response is None
           and self.sock):
            context.get_context().connection_mgr.release_connection(self.sock)
            self.sock = None

    def _set_content_length(self, body):
        # Set the content-length based on the body.
        thelen = None
        try:
            thelen = str(len(body))
        except TypeError:
            # If this is a file-like object, try to
            # fstat its file descriptor
            try:
                thelen = str(os.fstat(body.fileno()).st_size)
            except (AttributeError, OSError):
                # TODO
                # Don't send a length if this failed
                if self.debuglevel > 0:
                    print "Cannot stat file-type HTTP body."

        if thelen is not None:
            self.putheader('Content-Length', thelen)

    def __del__(self):
        self.release_sock()


class _GHTTPSConnection(_GHTTPConnection):

    default_port = httplib.HTTPS_PORT

    def __init__(self, host, port=None, strict=None,
                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
        _GHTTPConnection.__init__(self, host, port, strict, timeout,
                                  protected=connection_mgr.PLAIN_SSL)

    def connect(self):
        ctx = context.get_context()
        if self._tunnel_host:
            # we need to issue CONNECT *prior* to doing any SSL.  so
            # start off by asking for a plain socket...
            self.sock = ctx.connection_mgr.get_connection((self.host,
                                                           self.port),
                                                          ssl=None)
            # ...then issue the CONNECT...
            self._tunnel()
            # ...finally, replace the underlying socket on the
            # monitored socket with an SSL wrapped socket that matches
            # the kind specified by self.protected.  note that this is
            # copy-pasted from connection_mgr.py
            self.sock._msock = (ssl.wrap_socket(self.sock._msock)
                                if self.protected == connection_mgr.PLAIN_SSL
                                else
                                async.wrap_socket_context(self.sock._msock,
                                                          self.protected))
        else:
            # if we don't need to issue a connect, then the super
            # class will do the right thing
            _GHTTPConnection.connect(self)


[docs]def urllib2_request(u2req, timeout=None): """\ Translate a urllib2.Request to something we can pass to our request() function, and translate our Response to a urllib2.addinfourl object """ # TODO: proxy support? method = u2req.get_method() url = u2req._Request__original body = u2req.get_data() headers = dict(u2req.unredirected_hdrs) headers.update((k, v) for k, v in u2req.headers.items() if k not in headers) try: kwargs = {} if timeout is not None: kwargs['timeout'] = timeout resp = request(method, url, body, headers, **kwargs) hr = resp.http_response hr.recv = hr.read fp = socket._fileobject(hr, close=True) aiu = urllib2.addinfourl(fp=fp, headers=hr.msg, url=resp.request.url) aiu.code = hr.status aiu.msg = hr.reason return aiu except ValueError as e: raise urllib2.URLError(e.msg)
[docs]def request(method, url, body=None, headers=None, literal=False, use_protected=False, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): '''\ A function to issue HTTP requests. :param method: the `HTTP method`_ for this request. Case insensitive. :param url: the URL to request. Must include a protocol (e.g. `http`, `https`). :param body: the body of the request, if applicable :type body: a string or file-like object (i.e, an object that has a ``read`` method). It could also be a dict, in which case it is stringified, and the header set to application/json :param headers: A dictionary of request headers :type headers: :py:class:`dict` :param literal: if true, instruct :py:class:`~httplib.HTTPConnection` **not** to set the ``Host`` or ``Accept-Encoding`` headers automatically. Useful for testing :param use_protected: if true, use the appropriate protected for this call. :param timeout: connection timeout for this request. :returns: a :py:class:`Response` object. An example, calling up google with a custom host header: >>> request('get', ... 'http://google.com', ... headers={'Host': 'www.google.com'}, ... literal=True) <http_client.Response (200) GET http://google.com> .. _HTTP Method: http://en.wikipedia.org/wiki/\ Hypertext_Transfer_Protocol#Request_methods ''' method = method.upper() if method not in _HTTP_METHODS: raise ValueError("invalid http method {0}".format(method)) parsed = urlparse(url) if parsed.scheme not in ('http', 'https'): raise ValueError('unknown protocol %s' % parsed.scheme) domain, _, port = parsed.netloc.partition(':') try: port = int(port) except ValueError: port = 80 if parsed.scheme == 'http' else 443 protected = (parsed.scheme == 'https') and (True if use_protected else "PLAIN_SSL") conn = _GHTTPConnection(domain, port, protected=protected, timeout=timeout) selector = urlunparse(parsed._replace(scheme='', netloc='')) skips = {'skip_host': True, 'skip_accept_encoding': True} if literal else {} if headers is None: headers = {} if not literal: headers.setdefault('User-Agent', 'python') if isinstance(body, dict): body = json.dumps(body) if 'Content-Type' not in headers: headers['Content-Type'] = 'application/json' conn.putrequest(method, selector, **skips) # OMD! if not literal and body is not None and 'Content-Length' not in headers: conn._set_content_length(body) for header, value in headers.items(): if type(value) is list: for subvalue in value: conn.putheader(header, subvalue) else: conn.putheader(header, value) conn.endheaders() if body is not None: conn.send(body) raw = conn.getresponse() # does NOT hold a reference to the # HTTPConnection raw._connection = conn # so the finalizer doesn't get called # until the request has died return Response( Request(method, url, headers, body), raw.status, raw.msg, raw)
[docs]class Request(object): '''\ A simple wrapper for HTTP Requests .. py:attribute:: method The method used for this request (e.g., `POST`, `GET`). .. py:attribute:: url The requested URL. .. py:attribute:: headers The request headers (a :py:class:`list` of two-item :py:class:`tuples`) .. py:attribute:: body The body if present, otherwise `None`. ''' def __init__(self, method, url, headers, body): self.method = method self.url = url self.headers = headers self.body = body def __repr__(self): return "<http_client.Request {0} {1}>".format(self.method, self.url)
[docs]class Response(object): r'''\ A simple wrapper for HTTP responses. .. py:attribute:: request the :py:class:`Request` object that lead to this response .. py:attribute:: status the numeric status code for this Response .. py:attribute:: headers an :py:class:`~httplib.HTTPMessage` object containing this response's headers. You can treat this as a dictionary: for example, you can get the value for the ``Host`` header with ``msg['Host']``. **You should, however, be careful with duplicate headers.** Consider the following headers: >>> headers = '\r\n'.join(['X-First-Header: First, Value', ... 'X-First-Header: Second, Value', ... 'X-Second-Header: Final, Value', ... '']) Note that the header ``X-First-Header`` appears twice. >>> from StringIO import StringIO >>> from httplib import HTTPMessage >>> msg = HTTPMessage(StringIO(headers)) >>> msg['X-First-Header'] 'First, Value, Second, Value' :py:class:`HTTPMessage` has *concatenated* the two values we provided for `X-First-Header` (`First, Value` and `Second, Value`) with a comma. Unfortunately both of these values contain a comma. That means a simple :py:meth:`str.split` can't Recover the original values: >>> msg['X-First-Header'].split(', ') ['First', 'Value', 'Second', 'Value'] The same behavior occurs with :meth:`HTTPMessage.items`: >>> msg.items() # doctest: +NORMALIZE_WHITESPACE [('x-second-header', 'Final, Value'), ('x-first-header', 'First, Value, Second, Value')] To correctly recover values from duplicated header fields, use :meth:`HTTPMessage.getheaders`: >>> msg.getheaders('X-First-Header') ['First, Value', 'Second, Value'] .. py:attribute:: http_response the underlying :py:class:`~httplib.HTTPResponse` object for this response. ''' def __init__(self, request, status, headers, http_response): self.request = request self.status = status self.headers = headers self.http_response = http_response self._body = None
[docs] def close(self): """Release the underlying socket back to the connection pool. This will be automatically called by :attribute:`~Response.body` after the body has been read. You should arrange to have this called ( """ if hasattr(self.http_response, '_connection'): self.http_response._connection.release_sock() del self.http_response._connection self.http_response.close()
def __enter__(self): return self def __exit__(self, *exc_info): self.close() @property def body(self): """the body of the request, if applicable. Since this value is lazily loaded, if you never access it the response's body will never be downloaded. Once loaded it's stored locally, so repeated accesses won't trigger repeated network calls. """ if self._body is None: with self: self._body = self.http_response.read() return self._body def __repr__(self): return "<http_client.Response ({0}) {1} {2}>".format( self.status, self.request.method, self.request.url) #http://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Request_methods
_HTTP_METHODS = ('GET', 'HEAD', 'POST', 'PUT', 'DELETE', 'TRACE', 'OPTIONS', 'CONNECT', 'PATCH') def _init_methods(): g = globals() for m in _HTTP_METHODS: g[m.lower()] = functools.partial(request, m) _init_methods()