aboutsummaryrefslogtreecommitdiffstats
path: root/libbe/util/http.py
blob: 8af97eb83e96ec5987f370721e569b5e6e410df0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# Copyright

# For urllib2 information, see
#   urllib2, from urllib2 - The Missing Manual
#   http://www.voidspace.org.uk/python/articles/urllib2.shtml
# 
# A dictionary of response codes is available in
#   httplib.responses
# but it is slow to load.

import urllib
import urllib2

from libbe import TESTING

if TESTING:
    import unittest


HTTP_OK = 200
HTTP_FOUND = 302
HTTP_TEMP_REDIRECT = 307
HTTP_USER_ERROR = 418
"""Status returned to indicate exceptions on the server side.

A BE-specific extension to the HTTP/1.1 protocol (See `RFC 2616`_).

.. _RFC 2616: http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1
"""

HTTP_VALID = [HTTP_OK, HTTP_FOUND, HTTP_TEMP_REDIRECT, HTTP_USER_ERROR]


USER_AGENT = 'BE-agent'


class HTTPError (Exception):
    def __init__(self, error=None, url=None, msg=None):
        Exception.__init__(self, msg)
        self.url = url
        self.error = error
        self.msg = msg

    def __str__(self):
        if self.msg is None:
            if self.error is None:
                return 'Unknown HTTP error: {}'.format(self.url)
            return str(self.error)
        return self.msg


def get_post_url(url, get=True, data=None, data_dict=None, headers=[],
                 agent=None):
    """Execute a GET or POST transaction.

    Parameters
    ----------
    url : str
      The base URL (query portion added internally, if necessary).
    get : bool
      Use GET if True, otherwise use POST.
    data : str
      Raw data to send by POST (requires POST).
    data_dict : dict
      Data to send, either by URL query (if GET) or by POST (if POST).
      Cannot be given in combination with `data`.
    headers : list
      Extra HTTP headers to add to the request.
    agent : str
      User agent string overriding the BE default.
    """
    if agent is None:
        agent = USER_AGENT
    if data is None:
        if data_dict is None:
            data_dict = {}
        if get is True:
            if data_dict != {}:
                # encode get parameters in the url
                param_string = urllib.urlencode(data_dict)
                url = '{}?{}'.format(url, param_string)
        else:
            data = urllib.urlencode(data_dict)
    else:
        assert get is False, (data, get)
        assert data_dict is None, (data, data_dict)
    headers = dict(headers)
    headers['User-Agent'] = agent
    req = urllib2.Request(url, data=data, headers=headers)
    try:
        response = urllib2.urlopen(req)
    except urllib2.HTTPError, e:
        lines = [
            'We failed to connect to the server (HTTPError).',
            'URL: {}'.format(url),
            ]
        if hasattr(e, 'reason'):
            lines.append('Reason: {}'.format(e.reason))
        lines.append('Error code: {}'.format(e.code))
        msg = '\n'.join(lines)
        raise HTTPError(error=e, url=url, msg=msg)
    except urllib2.URLError, e:
        msg = ('We failed to connect to the server (URLError).\nURL: {}\n'
               'Reason: {}').format(url, e.reason)
        raise HTTPError(error=e, url=url, msg=msg)
    page = response.read()
    final_url = response.geturl()
    info = response.info()
    response.close()
    return (page, final_url, info)


if TESTING:
    class GetPostUrlTestCase (unittest.TestCase):
        """Test cases for get_post_url()"""
        def test_get(self):
            url = 'http://bugseverywhere.org/'
            page,final_url,info = get_post_url(url=url)
            self.failUnless(final_url == url,
                'Redirect?\n  Expected: "{}"\n  Got:      "{}"'.format(
                    url, final_url))

        def test_get_redirect(self):
            url = 'http://physics.drexel.edu/~wking/code/be/redirect'
            expected = 'http://physics.drexel.edu/~wking/'
            page,final_url,info = get_post_url(url=url)
            self.failUnless(final_url == expected,
                'Redirect?\n  Expected: "{}"\n  Got:      "{}"'.format(
                    expected, final_url))