diff options
author | Mathieu Blondel <mathieu@mblondel.org> | 2007-05-15 22:36:20 +0000 |
---|---|---|
committer | Mathieu BlondelMathieu Blondel <mathieu@mblondel.orgmathieu@mblondel.org> | 2007-05-15 22:36:20 +0000 |
commit | 6857af1197f86797a2482b70c3c0a4f330aa60a2 (patch) | |
tree | 70ad3cd842a9d0d192be2015f4ecf773773130b4 /src | |
parent | 08c5283494a8bce7c17e931e3e09fdfc0cf63932 (diff) | |
download | wikipediafs-6857af1197f86797a2482b70c3c0a4f330aa60a2.tar.gz |
- Moved cookie support code to user.py.
- Created http.py.
- Added support for httpauth, based on a patch from Johannes Wolter.
- Added support for https.
git-svn-id: http://svn.code.sf.net/p/wikipediafs/code/branches/fuse-python-new-api@25 59acd704-e115-0410-a914-e735a229ed7c
Diffstat (limited to 'src')
-rw-r--r-- | src/wikipediafs/article.py | 200 | ||||
-rw-r--r-- | src/wikipediafs/config.py | 36 | ||||
-rw-r--r-- | src/wikipediafs/http.py | 85 | ||||
-rw-r--r-- | src/wikipediafs/user.py | 112 |
4 files changed, 427 insertions, 6 deletions
diff --git a/src/wikipediafs/article.py b/src/wikipediafs/article.py new file mode 100644 index 0000000..593a5da --- /dev/null +++ b/src/wikipediafs/article.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# WikipediaFS +# Copyright (C) 2005 - 2007 Mathieu Blondel +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import urllib, re, os +from sgmllib import SGMLParser +from http import ExtendedHTTPConnection + +class Article(SGMLParser): + """ + Gets and sets an article. + """ + + def __init__(self, + name, + host, + basename, + cookie_str=None, + https=False, + port=None, + httpauth_username=None, + httpauth_password=None + ): + SGMLParser.__init__(self) + + self.name = name + self.host = host + self.basename = basename + self.cookie_str = cookie_str + self.https = https + self.port = port + self.httpauth_username = httpauth_username + self.httpauth_password = httpauth_password + + self.content = "" + self.textarea = False + self.wpEdittime = 0 + self.wpStarttime = 0 + self.wpEditToken = None + self.last_open_time = 0 + + # url patterns + self.edit_page = "%s?title=%s&action=edit" % \ + (self.basename, self.name) + # basename must include a leading / + + self.submit_page = "%s?title=%s&action=submit" % \ + (self.basename, self.name) + + def start_textarea(self,attrs): + """ + Called when a textarea is entered. + """ + self.textarea = True + + def start_input(self,attrs): + """ + Called when an input is entered. + """ + # To set an article, we need to now its wpEdittime first. + + if len(attrs) == 3 and attrs[2][1] == "wpEdittime": + self.wpEdittime = attrs[1][1] + elif len(attrs) == 3 and attrs[2][1] == "wpEditToken": + self.wpEditToken = attrs[1][1] + elif len(attrs) == 3 and attrs[2][1] == "wpStarttime": + self.wpStarttime = attrs[1][1] + + def end_textarea(self): + """ + Called when a textarea is left. + """ + self.textarea = False + + def handle_data(self,data): + """ + Called when data is parsed. + """ + # We add the parsed data to self.content when the data parsed + # is in a textarea + if self.textarea == True: + self.content += data + + def get(self): + """ + Gets the wiki content (not the whole html page). + """ + + headers = {"User-agent" : "WikipediaFS"} + + if self.cookie_str is not None: + headers["Cookie"] = self.cookie_str + + conn = ExtendedHTTPConnection(self.host, self.port, self.https) + + conn.set_proxy() + + if self.httpauth_username and self.httpauth_password: + conn.http_auth(httpauth_username, httpauth_password) + + conn.add_headers(headers) + conn.request(self.edit_page) + #logger.info("HTTP GET %s" % self.edit_page) + response = conn.getresponse() + + # Feeds the SGMLparser + self.feed(response.read()) + conn.close() + + return self.content + + + def set(self, text): + # Looking for a [[Summary:*]] + regexp = '((\[\[)((s|S)ummary:)(.*)(\]\])(( )*\n)?)' + summary = re.search(regexp, text) + if summary is not None: + wpSummary = summary.group(5) + text = text.replace(summary.group(1), '') + else: + wpSummary = " " + + # wpEdittime is empty if the article is a new article + params = { + "wpTextbox1" : text, + "wpSummary" : wpSummary, + "wpEdittime" : self.wpEdittime, + "wpStarttime": self.wpStarttime, + "wpSave" : 1 + } + + # Needed for logged in edition + if self.wpEditToken is not None: + params["wpEditToken"] = self.wpEditToken + + params = urllib.urlencode(params) + + headers = {"Content-type": "application/x-www-form-urlencoded", + "User-agent" : "WikipediaFS"} + + if self.cookie_str is not None: + headers["Cookie"] = self.cookie_str + + conn = ExtendedHTTPConnection(self.host, self.port, self.https) + conn.set_proxy() + + conn.add_headers(headers) + conn.add_data(params) + conn.request(self.submit_page) + #logger.info("HTTP POST %s" % self.submit_page) + response = conn.getresponse() + + # Log http response + #if response.status == 302: + # logger.info("Succesful") + #elif response.status == 200: + # logger.warning("Problems occured %s\n" % response.read()) + #else: + # logger.info("%d \n %s " % (response.status,response.read())) + + conn.close() + + +if __name__ == "__main__": + import random + import sys + from user import User + + params = { + "host" : "www.mblondel.org", + "basename" : "/mediawiki/index.php", + "https" : True + } + + # Used username and password if any + if len(sys.argv) == 3: + user = User(sys.argv[1], sys.argv[2], **params) + params["cookie_str"] = user.getCookieString() + + art = Article("Test", **params) + print art.get() + + art.set("Test ! (%s)" % str(random.random())) +
\ No newline at end of file diff --git a/src/wikipediafs/config.py b/src/wikipediafs/config.py index 64b3cc0..5ad5941 100644 --- a/src/wikipediafs/config.py +++ b/src/wikipediafs/config.py @@ -35,13 +35,30 @@ class Config: <article-cache-time>300</article-cache-time> </general> <sites> - <!-- <site> + <!-- + Minimalist site entry sample: + <site> <dirname>wikipedia-fr</dirname> <host>fr.wikipedia.org</host> <basename>/w/index.php</basename> + </site> + And another one with all possible informations: + <site> + <dirname>wikipedia-fr</dirname> + <host>fr.wikipedia.org</host> + <port>443</port> + <basename>/w/index.php</basename> <username>Username</username> <password>Password</password> - </site>--> + <https /> + <httpauth_username>Username</httpauth_username> + <httpauth_password>Password</httpauth_password> + </site> + --> + <!-- + Below a Mediawiki test site. + Feel free to use it! + --> <site> <dirname>mblondel.org</dirname> <host>www.mblondel.org</host> @@ -89,14 +106,19 @@ class Config: sites = self.__config.getElementsByTagName("site") for site in sites: dic = {} - for ele in ('dirname', 'host', 'basename', 'username', - 'password'): + for ele in ("dirname", "host", "basename", "username", + "password", "https", "port", "httpauth_username", + "httpauth_password"): node = site.getElementsByTagName(ele) if node.length == 1: - dic[ele] = node[0].firstChild.nodeValue.encode("utf-8") + if node[0].firstChild: + dic[ele] = node[0].firstChild.nodeValue.encode("utf-8") + else: + dic[ele] = True # for elements like <https /> else: dic[ele] = None - self.sites[dic['dirname']] = dic + + self.sites[dic["dirname"]] = dic def __setCacheTime(self): @@ -129,6 +151,8 @@ if __name__ == "__main__": <dirname>mblondel.org</dirname> <host>www.mblondel.org</host> <basename>/mediawiki/index.php</basename> + <https /> + <port>8080</port> </site> </sites> </wfs-config> diff --git a/src/wikipediafs/http.py b/src/wikipediafs/http.py new file mode 100644 index 0000000..a7124d8 --- /dev/null +++ b/src/wikipediafs/http.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# WikipediaFS +# Copyright (C) 2005 - 2007 Mathieu Blondel +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import os, socket, string, base64 +from httplib import HTTPConnection, HTTPSConnection + +class ExtendedHTTPConnection: + """ + Transparent support for https, proxy, http auth + """ + def __init__(self, host, port=None, https=False): + if https and not port: + port = 443 + elif not port: + port = 80 + else: + port = int(port) + + if https: + self.conn = HTTPSConnection(host, port) + else: + self.conn = HTTPConnection(host, port) + + self.headers = {} + self.data = None + + def add_header(self, header, value): + self.headers[header] = value + + def add_headers(self, headers): + for k, v in headers.items(): + self.add_header(k, v) + + def request(self, url): + if self.data: + method = "POST" + else: + method = "GET" + + self.conn.request(method, url, self.data, self.headers) + + def getresponse(self, *args): + return self.conn.getresponse(*args) + + def close(self): + return self.conn.close() + + def add_data(self, data): + self.data = data + + def set_proxy(self): + """ + Sets proxy if needed. + """ + if os.environ.has_key("http_proxy"): + http_proxy = os.environ["http_proxy"] + http_proxy = http_proxy.replace("http://", "").rstrip("/") + (proxy_host, proxy_port) = http_proxy.split(":") + proxy_port = int(proxy_port) + proxy_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + proxy_sock.connect((proxy_host, proxy_port)) + self.sock = proxy_sock + + def http_auth(self, username, password): + httpbasicauth = "%s:%s" % (username, password) + add_header("Authorization", + "Basic %s" % base64.encodestring(httpbasicauth).strip()) +
\ No newline at end of file diff --git a/src/wikipediafs/user.py b/src/wikipediafs/user.py new file mode 100644 index 0000000..40b45f8 --- /dev/null +++ b/src/wikipediafs/user.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# WikipediaFS +# Copyright (C) 2005 - 2007 Mathieu Blondel +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import urllib, re +from http import ExtendedHTTPConnection + +class User: + """ + Gets user's cookie string + """ + + def __init__(self, + username, + password, + host, + basename, + https=False, + port=None, + httpauth_username=None, + httpauth_password=None + ): + + self.username = username + self.password = password + self.host = host + self.basename = basename + self.https = https + self.port = port + self.httpauth_username = httpauth_username + self.httpauth_password = httpauth_password + + # url pattern + self.login_page = "%s?title=Special:Userlogin" % self.basename + self.login_page += "&action=submit&returnto=Special:Userlogin" + + def getCookieString(self): + """ + Gets user's cookie string. + It will then have to be passed to an Article. + """ + + params = {"wpName":self.username, "wpPassword":self.password, + "wpLoginattempt":"Identification", "wpRemember":"1"} + + params = urllib.urlencode(params) + + headers = {"Content-type": "application/x-www-form-urlencoded", + "User-agent" : "WikipediaFS"} + + conn = ExtendedHTTPConnection(self.host, self.port, self.https) + + conn.set_proxy() # sets proxy if needed + + if self.httpauth_username and self.httpauth_password: + conn.http_auth(httpauth_username, httpauth_password) + + conn.add_data(params) + conn.add_headers(headers) + conn.request(self.login_page) + response = conn.getresponse() + + cookie_list = [] + in_cookie = re.compile(': (.*?);') + + for cookie_value in response.msg.getallmatchingheaders("set-cookie"): + it_matches = in_cookie.search(cookie_value) + + if it_matches: + cookie_list.append(it_matches.group(1)) + + conn.close() + + if len(cookie_list) == 4: + cookie_list.pop() + #logger.info("; ".join(cookie_list)) + return "; ".join(cookie_list) + else: + return None + + +if __name__ == "__main__": + import sys + + params = { + "host" : "www.mblondel.org", + "basename" : "/mediawiki/index.php", + "https" : True + } + + if(len(sys.argv) != 3): + print "python user.py username password" + else: + user = User(sys.argv[1], sys.argv[2], **params) + print user.getCookieString() +
\ No newline at end of file |