# Copyright (C) 2012 W. Trevor King # # This file is part of Bugs Everywhere. # # Bugs Everywhere is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation, either version 2 of the License, or (at your option) any # later version. # # Bugs Everywhere is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along with # Bugs Everywhere. If not, see . # For urllib2 information, see # urllib2, from urllib2 - The Missing Manual # http://www.voidspace.org.uk/python/articles/urllib2.shtml # # A dictionary of response codes is available in # httplib.responses # but it is slow to load. import urllib.request, urllib.parse, urllib.error import urllib.request, urllib.error, urllib.parse from libbe import TESTING if TESTING: import unittest HTTP_OK = 200 HTTP_FOUND = 302 HTTP_TEMP_REDIRECT = 307 HTTP_USER_ERROR = 418 """Status returned to indicate exceptions on the server side. A BE-specific extension to the HTTP/1.1 protocol (See `RFC 2616`_). .. _RFC 2616: http://www.w3.org/Protocols/rfc2616/rfc2616-sec6.html#sec6.1.1 """ HTTP_VALID = [HTTP_OK, HTTP_FOUND, HTTP_TEMP_REDIRECT, HTTP_USER_ERROR] USER_AGENT = 'BE-agent' class HTTPError (Exception): def __init__(self, error=None, url=None, msg=None): Exception.__init__(self, msg) self.url = url self.error = error self.msg = msg def __str__(self): if self.msg is None: if self.error is None: return 'Unknown HTTP error: {}'.format(self.url) return str(self.error) return self.msg def get_post_url(url, get=True, data=None, data_dict=None, headers=[], agent=None): """Execute a GET or POST transaction. Parameters ---------- url : str The base URL (query portion added internally, if necessary). get : bool Use GET if True, otherwise use POST. data : str Raw data to send by POST (requires POST). data_dict : dict Data to send, either by URL query (if GET) or by POST (if POST). Cannot be given in combination with `data`. headers : list Extra HTTP headers to add to the request. agent : str User agent string overriding the BE default. """ if agent is None: agent = USER_AGENT if data is None: if data_dict is None: data_dict = {} if get is True: if data_dict != {}: # encode get parameters in the url param_string = urllib.parse.urlencode(data_dict) url = '{}?{}'.format(url, param_string) else: data = urllib.parse.urlencode(data_dict) else: assert get is False, (data, get) assert data_dict is None, (data, data_dict) headers = dict(headers) headers['User-Agent'] = agent req = urllib.request.Request(url, data=data, headers=headers) try: response = urllib.request.urlopen(req) except urllib.error.HTTPError as e: if e.code == HTTP_USER_ERROR: lines = ['The server reported a user error (HTTPError)'] else: lines = ['The server reported an error (HTTPError)'] lines.append('URL: {}'.format(url)) if hasattr(e, 'reason'): lines.append('Reason: {}'.format(e.reason)) lines.append('Error code: {}'.format(e.code)) msg = '\n'.join(lines) raise HTTPError(error=e, url=url, msg=msg) except urllib.error.URLError as e: msg = ('We failed to connect to the server (URLError).\nURL: {}\n' 'Reason: {}').format(url, e.reason) raise HTTPError(error=e, url=url, msg=msg) page = response.read() final_url = response.geturl() info = response.info() response.close() return (page, final_url, info) if TESTING: class GetPostUrlTestCase (unittest.TestCase): """Test cases for get_post_url()""" def test_get(self): url = 'http://bugseverywhere.org/' page,final_url,info = get_post_url(url=url) self.assertTrue(final_url == url, 'Redirect?\n Expected: "{}"\n Got: "{}"'.format( url, final_url)) def test_get_redirect(self): url = 'http://physics.drexel.edu/~wking/code/be/redirect' expected = 'http://physics.drexel.edu/~wking/' page,final_url,info = get_post_url(url=url) self.assertTrue(final_url == expected, 'Redirect?\n Expected: "{}"\n Got: "{}"'.format( expected, final_url))