aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMathieu Blondel <mathieu@mblondel.org>2007-05-15 22:36:20 +0000
committerMathieu BlondelMathieu Blondel <mathieu@mblondel.orgmathieu@mblondel.org>2007-05-15 22:36:20 +0000
commit6857af1197f86797a2482b70c3c0a4f330aa60a2 (patch)
tree70ad3cd842a9d0d192be2015f4ecf773773130b4 /src
parent08c5283494a8bce7c17e931e3e09fdfc0cf63932 (diff)
downloadwikipediafs-6857af1197f86797a2482b70c3c0a4f330aa60a2.tar.gz
- Moved cookie support code to user.py.
- Created http.py. - Added support for httpauth, based on a patch from Johannes Wolter. - Added support for https. git-svn-id: http://svn.code.sf.net/p/wikipediafs/code/branches/fuse-python-new-api@25 59acd704-e115-0410-a914-e735a229ed7c
Diffstat (limited to 'src')
-rw-r--r--src/wikipediafs/article.py200
-rw-r--r--src/wikipediafs/config.py36
-rw-r--r--src/wikipediafs/http.py85
-rw-r--r--src/wikipediafs/user.py112
4 files changed, 427 insertions, 6 deletions
diff --git a/src/wikipediafs/article.py b/src/wikipediafs/article.py
new file mode 100644
index 0000000..593a5da
--- /dev/null
+++ b/src/wikipediafs/article.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# WikipediaFS
+# Copyright (C) 2005 - 2007 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import urllib, re, os
+from sgmllib import SGMLParser
+from http import ExtendedHTTPConnection
+
+class Article(SGMLParser):
+ """
+ Gets and sets an article.
+ """
+
+ def __init__(self,
+ name,
+ host,
+ basename,
+ cookie_str=None,
+ https=False,
+ port=None,
+ httpauth_username=None,
+ httpauth_password=None
+ ):
+ SGMLParser.__init__(self)
+
+ self.name = name
+ self.host = host
+ self.basename = basename
+ self.cookie_str = cookie_str
+ self.https = https
+ self.port = port
+ self.httpauth_username = httpauth_username
+ self.httpauth_password = httpauth_password
+
+ self.content = ""
+ self.textarea = False
+ self.wpEdittime = 0
+ self.wpStarttime = 0
+ self.wpEditToken = None
+ self.last_open_time = 0
+
+ # url patterns
+ self.edit_page = "%s?title=%s&action=edit" % \
+ (self.basename, self.name)
+ # basename must include a leading /
+
+ self.submit_page = "%s?title=%s&action=submit" % \
+ (self.basename, self.name)
+
+ def start_textarea(self,attrs):
+ """
+ Called when a textarea is entered.
+ """
+ self.textarea = True
+
+ def start_input(self,attrs):
+ """
+ Called when an input is entered.
+ """
+ # To set an article, we need to now its wpEdittime first.
+
+ if len(attrs) == 3 and attrs[2][1] == "wpEdittime":
+ self.wpEdittime = attrs[1][1]
+ elif len(attrs) == 3 and attrs[2][1] == "wpEditToken":
+ self.wpEditToken = attrs[1][1]
+ elif len(attrs) == 3 and attrs[2][1] == "wpStarttime":
+ self.wpStarttime = attrs[1][1]
+
+ def end_textarea(self):
+ """
+ Called when a textarea is left.
+ """
+ self.textarea = False
+
+ def handle_data(self,data):
+ """
+ Called when data is parsed.
+ """
+ # We add the parsed data to self.content when the data parsed
+ # is in a textarea
+ if self.textarea == True:
+ self.content += data
+
+ def get(self):
+ """
+ Gets the wiki content (not the whole html page).
+ """
+
+ headers = {"User-agent" : "WikipediaFS"}
+
+ if self.cookie_str is not None:
+ headers["Cookie"] = self.cookie_str
+
+ conn = ExtendedHTTPConnection(self.host, self.port, self.https)
+
+ conn.set_proxy()
+
+ if self.httpauth_username and self.httpauth_password:
+ conn.http_auth(httpauth_username, httpauth_password)
+
+ conn.add_headers(headers)
+ conn.request(self.edit_page)
+ #logger.info("HTTP GET %s" % self.edit_page)
+ response = conn.getresponse()
+
+ # Feeds the SGMLparser
+ self.feed(response.read())
+ conn.close()
+
+ return self.content
+
+
+ def set(self, text):
+ # Looking for a [[Summary:*]]
+ regexp = '((\[\[)((s|S)ummary:)(.*)(\]\])(( )*\n)?)'
+ summary = re.search(regexp, text)
+ if summary is not None:
+ wpSummary = summary.group(5)
+ text = text.replace(summary.group(1), '')
+ else:
+ wpSummary = " "
+
+ # wpEdittime is empty if the article is a new article
+ params = {
+ "wpTextbox1" : text,
+ "wpSummary" : wpSummary,
+ "wpEdittime" : self.wpEdittime,
+ "wpStarttime": self.wpStarttime,
+ "wpSave" : 1
+ }
+
+ # Needed for logged in edition
+ if self.wpEditToken is not None:
+ params["wpEditToken"] = self.wpEditToken
+
+ params = urllib.urlencode(params)
+
+ headers = {"Content-type": "application/x-www-form-urlencoded",
+ "User-agent" : "WikipediaFS"}
+
+ if self.cookie_str is not None:
+ headers["Cookie"] = self.cookie_str
+
+ conn = ExtendedHTTPConnection(self.host, self.port, self.https)
+ conn.set_proxy()
+
+ conn.add_headers(headers)
+ conn.add_data(params)
+ conn.request(self.submit_page)
+ #logger.info("HTTP POST %s" % self.submit_page)
+ response = conn.getresponse()
+
+ # Log http response
+ #if response.status == 302:
+ # logger.info("Succesful")
+ #elif response.status == 200:
+ # logger.warning("Problems occured %s\n" % response.read())
+ #else:
+ # logger.info("%d \n %s " % (response.status,response.read()))
+
+ conn.close()
+
+
+if __name__ == "__main__":
+ import random
+ import sys
+ from user import User
+
+ params = {
+ "host" : "www.mblondel.org",
+ "basename" : "/mediawiki/index.php",
+ "https" : True
+ }
+
+ # Used username and password if any
+ if len(sys.argv) == 3:
+ user = User(sys.argv[1], sys.argv[2], **params)
+ params["cookie_str"] = user.getCookieString()
+
+ art = Article("Test", **params)
+ print art.get()
+
+ art.set("Test ! (%s)" % str(random.random()))
+ \ No newline at end of file
diff --git a/src/wikipediafs/config.py b/src/wikipediafs/config.py
index 64b3cc0..5ad5941 100644
--- a/src/wikipediafs/config.py
+++ b/src/wikipediafs/config.py
@@ -35,13 +35,30 @@ class Config:
<article-cache-time>300</article-cache-time>
</general>
<sites>
- <!-- <site>
+ <!--
+ Minimalist site entry sample:
+ <site>
<dirname>wikipedia-fr</dirname>
<host>fr.wikipedia.org</host>
<basename>/w/index.php</basename>
+ </site>
+ And another one with all possible informations:
+ <site>
+ <dirname>wikipedia-fr</dirname>
+ <host>fr.wikipedia.org</host>
+ <port>443</port>
+ <basename>/w/index.php</basename>
<username>Username</username>
<password>Password</password>
- </site>-->
+ <https />
+ <httpauth_username>Username</httpauth_username>
+ <httpauth_password>Password</httpauth_password>
+ </site>
+ -->
+ <!--
+ Below a Mediawiki test site.
+ Feel free to use it!
+ -->
<site>
<dirname>mblondel.org</dirname>
<host>www.mblondel.org</host>
@@ -89,14 +106,19 @@ class Config:
sites = self.__config.getElementsByTagName("site")
for site in sites:
dic = {}
- for ele in ('dirname', 'host', 'basename', 'username',
- 'password'):
+ for ele in ("dirname", "host", "basename", "username",
+ "password", "https", "port", "httpauth_username",
+ "httpauth_password"):
node = site.getElementsByTagName(ele)
if node.length == 1:
- dic[ele] = node[0].firstChild.nodeValue.encode("utf-8")
+ if node[0].firstChild:
+ dic[ele] = node[0].firstChild.nodeValue.encode("utf-8")
+ else:
+ dic[ele] = True # for elements like <https />
else:
dic[ele] = None
- self.sites[dic['dirname']] = dic
+
+ self.sites[dic["dirname"]] = dic
def __setCacheTime(self):
@@ -129,6 +151,8 @@ if __name__ == "__main__":
<dirname>mblondel.org</dirname>
<host>www.mblondel.org</host>
<basename>/mediawiki/index.php</basename>
+ <https />
+ <port>8080</port>
</site>
</sites>
</wfs-config>
diff --git a/src/wikipediafs/http.py b/src/wikipediafs/http.py
new file mode 100644
index 0000000..a7124d8
--- /dev/null
+++ b/src/wikipediafs/http.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# WikipediaFS
+# Copyright (C) 2005 - 2007 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os, socket, string, base64
+from httplib import HTTPConnection, HTTPSConnection
+
+class ExtendedHTTPConnection:
+ """
+ Transparent support for https, proxy, http auth
+ """
+ def __init__(self, host, port=None, https=False):
+ if https and not port:
+ port = 443
+ elif not port:
+ port = 80
+ else:
+ port = int(port)
+
+ if https:
+ self.conn = HTTPSConnection(host, port)
+ else:
+ self.conn = HTTPConnection(host, port)
+
+ self.headers = {}
+ self.data = None
+
+ def add_header(self, header, value):
+ self.headers[header] = value
+
+ def add_headers(self, headers):
+ for k, v in headers.items():
+ self.add_header(k, v)
+
+ def request(self, url):
+ if self.data:
+ method = "POST"
+ else:
+ method = "GET"
+
+ self.conn.request(method, url, self.data, self.headers)
+
+ def getresponse(self, *args):
+ return self.conn.getresponse(*args)
+
+ def close(self):
+ return self.conn.close()
+
+ def add_data(self, data):
+ self.data = data
+
+ def set_proxy(self):
+ """
+ Sets proxy if needed.
+ """
+ if os.environ.has_key("http_proxy"):
+ http_proxy = os.environ["http_proxy"]
+ http_proxy = http_proxy.replace("http://", "").rstrip("/")
+ (proxy_host, proxy_port) = http_proxy.split(":")
+ proxy_port = int(proxy_port)
+ proxy_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ proxy_sock.connect((proxy_host, proxy_port))
+ self.sock = proxy_sock
+
+ def http_auth(self, username, password):
+ httpbasicauth = "%s:%s" % (username, password)
+ add_header("Authorization",
+ "Basic %s" % base64.encodestring(httpbasicauth).strip())
+ \ No newline at end of file
diff --git a/src/wikipediafs/user.py b/src/wikipediafs/user.py
new file mode 100644
index 0000000..40b45f8
--- /dev/null
+++ b/src/wikipediafs/user.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+# WikipediaFS
+# Copyright (C) 2005 - 2007 Mathieu Blondel
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import urllib, re
+from http import ExtendedHTTPConnection
+
+class User:
+ """
+ Gets user's cookie string
+ """
+
+ def __init__(self,
+ username,
+ password,
+ host,
+ basename,
+ https=False,
+ port=None,
+ httpauth_username=None,
+ httpauth_password=None
+ ):
+
+ self.username = username
+ self.password = password
+ self.host = host
+ self.basename = basename
+ self.https = https
+ self.port = port
+ self.httpauth_username = httpauth_username
+ self.httpauth_password = httpauth_password
+
+ # url pattern
+ self.login_page = "%s?title=Special:Userlogin" % self.basename
+ self.login_page += "&action=submit&returnto=Special:Userlogin"
+
+ def getCookieString(self):
+ """
+ Gets user's cookie string.
+ It will then have to be passed to an Article.
+ """
+
+ params = {"wpName":self.username, "wpPassword":self.password,
+ "wpLoginattempt":"Identification", "wpRemember":"1"}
+
+ params = urllib.urlencode(params)
+
+ headers = {"Content-type": "application/x-www-form-urlencoded",
+ "User-agent" : "WikipediaFS"}
+
+ conn = ExtendedHTTPConnection(self.host, self.port, self.https)
+
+ conn.set_proxy() # sets proxy if needed
+
+ if self.httpauth_username and self.httpauth_password:
+ conn.http_auth(httpauth_username, httpauth_password)
+
+ conn.add_data(params)
+ conn.add_headers(headers)
+ conn.request(self.login_page)
+ response = conn.getresponse()
+
+ cookie_list = []
+ in_cookie = re.compile(': (.*?);')
+
+ for cookie_value in response.msg.getallmatchingheaders("set-cookie"):
+ it_matches = in_cookie.search(cookie_value)
+
+ if it_matches:
+ cookie_list.append(it_matches.group(1))
+
+ conn.close()
+
+ if len(cookie_list) == 4:
+ cookie_list.pop()
+ #logger.info("; ".join(cookie_list))
+ return "; ".join(cookie_list)
+ else:
+ return None
+
+
+if __name__ == "__main__":
+ import sys
+
+ params = {
+ "host" : "www.mblondel.org",
+ "basename" : "/mediawiki/index.php",
+ "https" : True
+ }
+
+ if(len(sys.argv) != 3):
+ print "python user.py username password"
+ else:
+ user = User(sys.argv[1], sys.argv[2], **params)
+ print user.getCookieString()
+ \ No newline at end of file