Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:jayvdb:branches:devel:languages:python:misc
python-rest-client
distribution.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File distribution.patch of Package python-rest-client
Index: python-rest-client/gae_restful_lib.py =================================================================== --- python-rest-client.orig/gae_restful_lib.py 2008-06-03 06:26:04.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,456 +0,0 @@ -""" - Copyright (C) 2008 Benjamin O'Steen - - This file is part of python-fedoracommons. - - python-fedoracommons is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - python-fedoracommons is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. -""" - -__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' -__author__ = "Benjamin O'Steen <bosteen@gmail.com>" -__version__ = '0.1' - -from google.appengine.api import urlfetch - -import urlparse -from urllib import urlencode -import base64 -from base64 import encodestring - -import re -import md5 -import calendar -import time -import random -import sha -import hmac - -from mimeTypes import * - -import mimetypes - -from cStringIO import StringIO - - - -# For Auth implemnentation: Digest (from httplib2) -# TODO: !Important - add proper code attribution for httplib2 parts -USE_WWW_AUTH_STRICT_PARSING = 0 -conn = None -# In regex below: -# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP -# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space -# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: -# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? -WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") -WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") -UNQUOTE_PAIRS = re.compile(r'\\(.)') - -def _parse_www_authenticate(headers, headername='www-authenticate'): - """Returns a dictionary of dictionaries, one dict - per auth_scheme.""" - retval = {} - if headers.has_key(headername): - authenticate = headers[headername].strip() - www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED - while authenticate: - # Break off the scheme at the beginning of the line - if headername == 'authentication-info': - (auth_scheme, the_rest) = ('digest', authenticate) - else: - (auth_scheme, the_rest) = authenticate.split(" ", 1) - # Now loop over all the key value pairs that come after the scheme, - # being careful not to roll into the next scheme - match = www_auth.search(the_rest) - auth_params = {} - while match: - if match and len(match.groups()) == 3: - (key, value, the_rest) = match.groups() - auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) - match = www_auth.search(the_rest) - retval[auth_scheme.lower()] = auth_params - authenticate = the_rest.strip() - return retval - -def _cnonce(): - dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest() - return dig[:16] - -def _wsse_username_token(cnonce, iso_now, password): - return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() - -URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") - -def parse_uri(uri): - """Parses a URI using the regex given in Appendix B of RFC 3986. - - (scheme, authority, path, query, fragment) = parse_uri(uri) - """ - groups = URI.match(uri).groups() - return (groups[1], groups[3], groups[4], groups[6], groups[8]) - -# For credentials we need two things, first -# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) -# Then we also need a list of URIs that have already demanded authentication -# That list is tricky since sub-URIs can take the same auth, or the -# auth scheme may change as you descend the tree. -# So we also need each Auth instance to be able to tell us -# how close to the 'top' it is. - -class Authentication(object): - def __init__(self, credentials, host, request_uri, headers, response, content, http): - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - self.path = path - self.host = host - self.credentials = credentials - self.http = http - - def depth(self, request_uri): - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - return request_uri[len(self.path):].count("/") - - def inscope(self, host, request_uri): - # XXX Should we normalize the request_uri? - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - return (host == self.host) and path.startswith(self.path) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header. Over-rise this in sub-classes.""" - pass - - def response(self, response, content): - """Gives us a chance to update with new nonces - or such returned from the last authorized response. - Over-rise this in sub-classes if necessary. - - Return TRUE is the request is to be retried, for - example Digest may return stale=true. - """ - return False - - - -class BasicAuthentication(Authentication): - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() - - -class DigestAuthentication(Authentication): - """Only do qop='auth' and MD5, since that - is all Apache currently implements""" - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - challenge = _parse_www_authenticate(response, 'www-authenticate') - self.challenge = challenge['digest'] - qop = self.challenge.get('qop') - self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None - if self.challenge['qop'] is None: - raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop)) - self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5') - if self.challenge['algorithm'] != 'MD5': - raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) - self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) - self.challenge['nc'] = 1 - - def request(self, method, request_uri, headers, content, cnonce = None): - """Modify the request headers""" - H = lambda x: md5.new(x).hexdigest() - KD = lambda s, d: H("%s:%s" % (s, d)) - A2 = "".join([method, ":", request_uri]) - self.challenge['cnonce'] = cnonce or _cnonce() - request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], - '%08x' % self.challenge['nc'], - self.challenge['cnonce'], - self.challenge['qop'], H(A2) - )) - headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( - self.credentials[0], - self.challenge['realm'], - self.challenge['nonce'], - request_uri, - self.challenge['algorithm'], - request_digest, - self.challenge['qop'], - self.challenge['nc'], - self.challenge['cnonce'], - ) - self.challenge['nc'] += 1 - - def response(self, response, content): - if not response.has_key('authentication-info'): - challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) - if 'true' == challenge.get('stale'): - self.challenge['nonce'] = challenge['nonce'] - self.challenge['nc'] = 1 - return True - else: - updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) - - if updated_challenge.has_key('nextnonce'): - self.challenge['nonce'] = updated_challenge['nextnonce'] - self.challenge['nc'] = 1 - return False - - -class HmacDigestAuthentication(Authentication): - """Adapted from Robert Sayre's code and DigestAuthentication above.""" - __author__ = "Thomas Broyer (t.broyer@ltgt.net)" - - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - challenge = _parse_www_authenticate(response, 'www-authenticate') - self.challenge = challenge['hmacdigest'] - # TODO: self.challenge['domain'] - self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') - if self.challenge['reason'] not in ['unauthorized', 'integrity']: - self.challenge['reason'] = 'unauthorized' - self.challenge['salt'] = self.challenge.get('salt', '') - if not self.challenge.get('snonce'): - raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty.")) - self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') - if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: - raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) - self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') - if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: - raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) - if self.challenge['algorithm'] == 'HMAC-MD5': - self.hashmod = md5 - else: - self.hashmod = sha - if self.challenge['pw-algorithm'] == 'MD5': - self.pwhashmod = md5 - else: - self.pwhashmod = sha - self.key = "".join([self.credentials[0], ":", - self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), - ":", self.challenge['realm'] - ]) - self.key = self.pwhashmod.new(self.key).hexdigest().lower() - - def request(self, method, request_uri, headers, content): - """Modify the request headers""" - keys = _get_end2end_headers(headers) - keylist = "".join(["%s " % k for k in keys]) - headers_val = "".join([headers[k] for k in keys]) - created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) - cnonce = _cnonce() - request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) - request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() - headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( - self.credentials[0], - self.challenge['realm'], - self.challenge['snonce'], - cnonce, - request_uri, - created, - request_digest, - keylist, - ) - - def response(self, response, content): - challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) - if challenge.get('reason') in ['integrity', 'stale']: - return True - return False - - -class WsseAuthentication(Authentication): - """This is thinly tested and should not be relied upon. - At this time there isn't any third party server to test against. - Blogger and TypePad implemented this algorithm at one point - but Blogger has since switched to Basic over HTTPS and - TypePad has implemented it wrong, by never issuing a 401 - challenge but instead requiring your client to telepathically know that - their endpoint is expecting WSSE profile="UsernameToken".""" - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['Authorization'] = 'WSSE profile="UsernameToken"' - iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - cnonce = _cnonce() - password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) - headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( - self.credentials[0], - password_digest, - cnonce, - iso_now) - -class Credentials(object): - def __init__(self): - self.credentials = [] - - def add(self, name, password, domain=""): - self.credentials.append((domain.lower(), name, password)) - - def clear(self): - self.credentials = [] - - def iter(self, domain): - for (cdomain, name, password) in self.credentials: - if cdomain == "" or domain == cdomain: - yield (name, password) - -AUTH_SCHEME_CLASSES = { - "basic": BasicAuthentication, - "Basic": BasicAuthentication, - "wsse": WsseAuthentication, - "digest": DigestAuthentication, - "Digest": DigestAuthentication, - "hmacdigest": HmacDigestAuthentication -} - -AUTH_SCHEME_ORDER = ["hmacdigest", "digest", "Digest", "wsse", "basic", "Basic"] - -URLFETCH_METHOD_STRING = {urlfetch.GET:'GET', - urlfetch.PUT:'PUT', - urlfetch.DELETE:'DELETE', - urlfetch.POST:'POST', - urlfetch.HEAD:'HEAD' - } - - -class GAE_Connection: - def __init__(self, base_url, username=None, password=None): - self.base_url = base_url - m = mimeTypes() - self.mimetypes = m.getDictionary() - - # Name/password - self.credentials = Credentials() - - if username and password: - self.add_credentials(username, password, domain="") - - # authorization objects - self.authorizations = [] - - self.url = urlparse.urlparse(base_url) - - (scheme, netloc, path, query, fragment) = urlparse.urlsplit(base_url) - - self.scheme = scheme - self.host = netloc - self.path = path - - def _auth_from_challenge(self, host, request_uri, headers, response, content): - """A generator that creates Authorization objects - that can be applied to requests. - """ - challenges = _parse_www_authenticate(response, 'www-authenticate') - for cred in self.credentials.iter(host): - for scheme in AUTH_SCHEME_ORDER: - if challenges.has_key(scheme): - yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) - - def add_credentials(self, name, password, domain=""): - """Add a name and password that will be used - any time a request requires authentication.""" - self.credentials.add(name, password, domain) - - def clear_credentials(self): - """Remove all the names and passwords - that are used for authentication""" - self.credentials.clear() - self.authorizations = [] - - def request_get(self, resource, args = None, headers={}): - return self.request(resource, urlfetch.GET, args, headers=headers) - - def request_delete(self, resource, args = None, headers={}): - return self.request(resource, urlfetch.DELETE, args, headers=headers) - - def request_post(self, resource, args = None, body = None, filename=None, headers={}): - return self.request(resource, urlfetch.POST, args , body = body, filename=filename, headers=headers) - - def request_put(self, resource, args = None, body = None, filename=None, headers={}): - return self.request(resource, urlfetch.PUT, args , body = body, filename=filename, headers=headers) - - def request_head(self, resource, args = None, body = None, filename=None, headers={}): - return self.request(resource, urlfetch.HEAD, args , body = body, filename=filename, headers=headers) - - def _conn_request(self, conn, request_uri, method, body, headers): - # Shim to allow easy reuse of httplib2 auth methods - conn param is not used - urlfetch_response = urlfetch.fetch(request_uri, method=method, payload=body, headers=headers) - r_headers={'status':urlfetch_response.status_code} - for header_key in urlfetch_response.headers: - r_headers[header_key.lower()] = urlfetch_response.headers[header_key] - - return (r_headers, urlfetch_response.content.decode('UTF-8')) - - def get_content_type(self, filename): - extension = filename.split('.')[-1] - guessed_mimetype = self.mimetypes.get(extension, mimetypes.guess_type(filename)[0]) - return guessed_mimetype or 'application/octet-stream' - - def request(self, resource, method = urlfetch.GET, args = None, body = None, filename=None, headers={}): - params = None - path = resource - headers['User-Agent'] = 'Basic Agent' - - if not headers.get('Content-Type', None): - headers['Content-Type']='text/plain' - - request_path = [] - if self.path != "/": - if self.path.endswith('/'): - request_path.append(self.path[:-1]) - else: - request_path.append(self.path) - if path.startswith('/'): - request_path.append(path[1:]) - else: - request_path.append(path) - full_path = u'/'.join(request_path) - - if args: - full_path += u"?%s" % (urlencode(args)) - - request_uri = u"%s://%s%s" % (self.scheme, self.host, full_path) - - auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] - auth = auths and sorted(auths)[0][1] or None - if auth: - auth.request(method, request_uri, headers, body) - - (response, content) = self._conn_request(conn, request_uri, method, body, headers) - - if auth: - if auth.response(response, body): - auth.request(URLFETCH_METHOD_STRING[method], request_uri, headers, body) - - (response, content) = self._conn_request(conn, request_uri, method, body, headers) - - if response['status'] == 401: - #return {u"body":u"".join(["%s: %s" % (key, response[key]) for key in response])} - for authorization in self._auth_from_challenge(self.host, request_uri, headers, response, content): - authorization.request(URLFETCH_METHOD_STRING[method], request_uri, headers, body) - - (response, content) = self._conn_request(conn, request_uri, method, body, headers) - - if response['status'] != 401: - self.authorizations.append(authorization) - authorization.response(response, body) - break - - return {u'headers':response, u'body':content} - Index: python-rest-client/httplib2/__init__.py =================================================================== --- python-rest-client.orig/httplib2/__init__.py 2007-10-23 12:25:46.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,1123 +0,0 @@ -from __future__ import generators -""" -httplib2 - -A caching http interface that supports ETags and gzip -to conserve bandwidth. - -Requires Python 2.3 or later - -Changelog: -2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. - -""" - -__author__ = "Joe Gregorio (joe@bitworking.org)" -__copyright__ = "Copyright 2006, Joe Gregorio" -__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", - "James Antill", - "Xavier Verges Farrero", - "Jonathan Feinberg", - "Blair Zajac", - "Sam Ruby", - "Louis Nyffenegger"] -__license__ = "MIT" -__version__ = "$Rev: 259 $" - -import re -import sys -import md5 -import email -import email.Utils -import email.Message -import StringIO -import gzip -import zlib -import httplib -import urlparse -import base64 -import os -import copy -import calendar -import time -import random -import sha -import hmac -from gettext import gettext as _ -import socket - -try: - import socks -except ImportError: - socks = None - -if sys.version_info >= (2,3): - from iri2uri import iri2uri -else: - def iri2uri(uri): - return uri - -__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', - 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', - 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError', - 'debuglevel'] - - -# The httplib debug level, set to a non-zero value to get debug output -debuglevel = 0 - -# Python 2.3 support -if sys.version_info < (2,4): - def sorted(seq): - seq.sort() - return seq - -# Python 2.3 support -def HTTPResponse__getheaders(self): - """Return list of (header, value) tuples.""" - if self.msg is None: - raise httplib.ResponseNotReady() - return self.msg.items() - -if not hasattr(httplib.HTTPResponse, 'getheaders'): - httplib.HTTPResponse.getheaders = HTTPResponse__getheaders - -# All exceptions raised here derive from HttpLib2Error -class HttpLib2Error(Exception): pass - -# Some exceptions can be caught and optionally -# be turned back into responses. -class HttpLib2ErrorWithResponse(HttpLib2Error): - def __init__(self, desc, response, content): - self.response = response - self.content = content - HttpLib2Error.__init__(self, desc) - -class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass -class RedirectLimit(HttpLib2ErrorWithResponse): pass -class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass -class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass -class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass - -class RelativeURIError(HttpLib2Error): pass -class ServerNotFoundError(HttpLib2Error): pass - -# Open Items: -# ----------- -# Proxy support - -# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) - -# Pluggable cache storage (supports storing the cache in -# flat files by default. We need a plug-in architecture -# that can support Berkeley DB and Squid) - -# == Known Issues == -# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. -# Does not handle Cache-Control: max-stale -# Does not use Age: headers when calculating cache freshness. - - -# The number of redirections to follow before giving up. -# Note that only GET redirects are automatically followed. -# Will also honor 301 requests by saving that info and never -# requesting that URI again. -DEFAULT_MAX_REDIRECTS = 5 - -# Which headers are hop-by-hop headers by default -HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'] - -def _get_end2end_headers(response): - hopbyhop = list(HOP_BY_HOP) - hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')]) - return [header for header in response.keys() if header not in hopbyhop] - -URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") - -def parse_uri(uri): - """Parses a URI using the regex given in Appendix B of RFC 3986. - - (scheme, authority, path, query, fragment) = parse_uri(uri) - """ - groups = URI.match(uri).groups() - return (groups[1], groups[3], groups[4], groups[6], groups[8]) - -def urlnorm(uri): - (scheme, authority, path, query, fragment) = parse_uri(uri) - if not scheme or not authority: - raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) - authority = authority.lower() - scheme = scheme.lower() - if not path: - path = "/" - # Could do syntax based normalization of the URI before - # computing the digest. See Section 6.2.2 of Std 66. - request_uri = query and "?".join([path, query]) or path - scheme = scheme.lower() - defrag_uri = scheme + "://" + authority + request_uri - return scheme, authority, request_uri, defrag_uri - - -# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) -re_url_scheme = re.compile(r'^\w+://') -re_slash = re.compile(r'[?/:|]+') - -def safename(filename): - """Return a filename suitable for the cache. - - Strips dangerous and common characters to create a filename we - can use to store the cache in. - """ - - try: - if re_url_scheme.match(filename): - if isinstance(filename,str): - filename = filename.decode('utf-8') - filename = filename.encode('idna') - else: - filename = filename.encode('idna') - except UnicodeError: - pass - if isinstance(filename,unicode): - filename=filename.encode('utf-8') - filemd5 = md5.new(filename).hexdigest() - filename = re_url_scheme.sub("", filename) - filename = re_slash.sub(",", filename) - - # limit length of filename - if len(filename)>200: - filename=filename[:200] - return ",".join((filename, filemd5)) - -NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') -def _normalize_headers(headers): - return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()]) - -def _parse_cache_control(headers): - retval = {} - if headers.has_key('cache-control'): - parts = headers['cache-control'].split(',') - parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")] - parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")] - retval = dict(parts_with_args + parts_wo_args) - return retval - -# Whether to use a strict mode to parse WWW-Authenticate headers -# Might lead to bad results in case of ill-formed header value, -# so disabled by default, falling back to relaxed parsing. -# Set to true to turn on, usefull for testing servers. -USE_WWW_AUTH_STRICT_PARSING = 0 - -# In regex below: -# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP -# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space -# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: -# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? -WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") -WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") -UNQUOTE_PAIRS = re.compile(r'\\(.)') -def _parse_www_authenticate(headers, headername='www-authenticate'): - """Returns a dictionary of dictionaries, one dict - per auth_scheme.""" - retval = {} - if headers.has_key(headername): - authenticate = headers[headername].strip() - www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED - while authenticate: - # Break off the scheme at the beginning of the line - if headername == 'authentication-info': - (auth_scheme, the_rest) = ('digest', authenticate) - else: - (auth_scheme, the_rest) = authenticate.split(" ", 1) - # Now loop over all the key value pairs that come after the scheme, - # being careful not to roll into the next scheme - match = www_auth.search(the_rest) - auth_params = {} - while match: - if match and len(match.groups()) == 3: - (key, value, the_rest) = match.groups() - auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) - match = www_auth.search(the_rest) - retval[auth_scheme.lower()] = auth_params - authenticate = the_rest.strip() - return retval - - -def _entry_disposition(response_headers, request_headers): - """Determine freshness from the Date, Expires and Cache-Control headers. - - We don't handle the following: - - 1. Cache-Control: max-stale - 2. Age: headers are not used in the calculations. - - Not that this algorithm is simpler than you might think - because we are operating as a private (non-shared) cache. - This lets us ignore 's-maxage'. We can also ignore - 'proxy-invalidate' since we aren't a proxy. - We will never return a stale document as - fresh as a design decision, and thus the non-implementation - of 'max-stale'. This also lets us safely ignore 'must-revalidate' - since we operate as if every server has sent 'must-revalidate'. - Since we are private we get to ignore both 'public' and - 'private' parameters. We also ignore 'no-transform' since - we don't do any transformations. - The 'no-store' parameter is handled at a higher level. - So the only Cache-Control parameters we look at are: - - no-cache - only-if-cached - max-age - min-fresh - """ - - retval = "STALE" - cc = _parse_cache_control(request_headers) - cc_response = _parse_cache_control(response_headers) - - if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1: - retval = "TRANSPARENT" - if 'cache-control' not in request_headers: - request_headers['cache-control'] = 'no-cache' - elif cc.has_key('no-cache'): - retval = "TRANSPARENT" - elif cc_response.has_key('no-cache'): - retval = "STALE" - elif cc.has_key('only-if-cached'): - retval = "FRESH" - elif response_headers.has_key('date'): - date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date'])) - now = time.time() - current_age = max(0, now - date) - if cc_response.has_key('max-age'): - try: - freshness_lifetime = int(cc_response['max-age']) - except ValueError: - freshness_lifetime = 0 - elif response_headers.has_key('expires'): - expires = email.Utils.parsedate_tz(response_headers['expires']) - if None == expires: - freshness_lifetime = 0 - else: - freshness_lifetime = max(0, calendar.timegm(expires) - date) - else: - freshness_lifetime = 0 - if cc.has_key('max-age'): - try: - freshness_lifetime = int(cc['max-age']) - except ValueError: - freshness_lifetime = 0 - if cc.has_key('min-fresh'): - try: - min_fresh = int(cc['min-fresh']) - except ValueError: - min_fresh = 0 - current_age += min_fresh - if freshness_lifetime > current_age: - retval = "FRESH" - return retval - -def _decompressContent(response, new_content): - content = new_content - try: - encoding = response.get('content-encoding', None) - if encoding in ['gzip', 'deflate']: - if encoding == 'gzip': - content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() - if encoding == 'deflate': - content = zlib.decompress(content) - response['content-length'] = str(len(content)) - del response['content-encoding'] - except IOError: - content = "" - raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content) - return content - -def _updateCache(request_headers, response_headers, content, cache, cachekey): - if cachekey: - cc = _parse_cache_control(request_headers) - cc_response = _parse_cache_control(response_headers) - if cc.has_key('no-store') or cc_response.has_key('no-store'): - cache.delete(cachekey) - else: - info = email.Message.Message() - for key, value in response_headers.iteritems(): - if key not in ['status','content-encoding','transfer-encoding']: - info[key] = value - - status = response_headers.status - if status == 304: - status = 200 - - status_header = 'status: %d\r\n' % response_headers.status - - header_str = info.as_string() - - header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) - text = "".join([status_header, header_str, content]) - - cache.set(cachekey, text) - -def _cnonce(): - dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest() - return dig[:16] - -def _wsse_username_token(cnonce, iso_now, password): - return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() - - -# For credentials we need two things, first -# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) -# Then we also need a list of URIs that have already demanded authentication -# That list is tricky since sub-URIs can take the same auth, or the -# auth scheme may change as you descend the tree. -# So we also need each Auth instance to be able to tell us -# how close to the 'top' it is. - -class Authentication(object): - def __init__(self, credentials, host, request_uri, headers, response, content, http): - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - self.path = path - self.host = host - self.credentials = credentials - self.http = http - - def depth(self, request_uri): - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - return request_uri[len(self.path):].count("/") - - def inscope(self, host, request_uri): - # XXX Should we normalize the request_uri? - (scheme, authority, path, query, fragment) = parse_uri(request_uri) - return (host == self.host) and path.startswith(self.path) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header. Over-rise this in sub-classes.""" - pass - - def response(self, response, content): - """Gives us a chance to update with new nonces - or such returned from the last authorized response. - Over-rise this in sub-classes if necessary. - - Return TRUE is the request is to be retried, for - example Digest may return stale=true. - """ - return False - - - -class BasicAuthentication(Authentication): - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() - - -class DigestAuthentication(Authentication): - """Only do qop='auth' and MD5, since that - is all Apache currently implements""" - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - challenge = _parse_www_authenticate(response, 'www-authenticate') - self.challenge = challenge['digest'] - qop = self.challenge.get('qop') - self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None - if self.challenge['qop'] is None: - raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop)) - self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5') - if self.challenge['algorithm'] != 'MD5': - raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) - self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) - self.challenge['nc'] = 1 - - def request(self, method, request_uri, headers, content, cnonce = None): - """Modify the request headers""" - H = lambda x: md5.new(x).hexdigest() - KD = lambda s, d: H("%s:%s" % (s, d)) - A2 = "".join([method, ":", request_uri]) - self.challenge['cnonce'] = cnonce or _cnonce() - request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], - '%08x' % self.challenge['nc'], - self.challenge['cnonce'], - self.challenge['qop'], H(A2) - )) - headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( - self.credentials[0], - self.challenge['realm'], - self.challenge['nonce'], - request_uri, - self.challenge['algorithm'], - request_digest, - self.challenge['qop'], - self.challenge['nc'], - self.challenge['cnonce'], - ) - self.challenge['nc'] += 1 - - def response(self, response, content): - if not response.has_key('authentication-info'): - challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) - if 'true' == challenge.get('stale'): - self.challenge['nonce'] = challenge['nonce'] - self.challenge['nc'] = 1 - return True - else: - updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) - - if updated_challenge.has_key('nextnonce'): - self.challenge['nonce'] = updated_challenge['nextnonce'] - self.challenge['nc'] = 1 - return False - - -class HmacDigestAuthentication(Authentication): - """Adapted from Robert Sayre's code and DigestAuthentication above.""" - __author__ = "Thomas Broyer (t.broyer@ltgt.net)" - - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - challenge = _parse_www_authenticate(response, 'www-authenticate') - self.challenge = challenge['hmacdigest'] - # TODO: self.challenge['domain'] - self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') - if self.challenge['reason'] not in ['unauthorized', 'integrity']: - self.challenge['reason'] = 'unauthorized' - self.challenge['salt'] = self.challenge.get('salt', '') - if not self.challenge.get('snonce'): - raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty.")) - self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') - if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: - raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) - self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') - if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: - raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) - if self.challenge['algorithm'] == 'HMAC-MD5': - self.hashmod = md5 - else: - self.hashmod = sha - if self.challenge['pw-algorithm'] == 'MD5': - self.pwhashmod = md5 - else: - self.pwhashmod = sha - self.key = "".join([self.credentials[0], ":", - self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), - ":", self.challenge['realm'] - ]) - self.key = self.pwhashmod.new(self.key).hexdigest().lower() - - def request(self, method, request_uri, headers, content): - """Modify the request headers""" - keys = _get_end2end_headers(headers) - keylist = "".join(["%s " % k for k in keys]) - headers_val = "".join([headers[k] for k in keys]) - created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) - cnonce = _cnonce() - request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) - request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() - headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( - self.credentials[0], - self.challenge['realm'], - self.challenge['snonce'], - cnonce, - request_uri, - created, - request_digest, - keylist, - ) - - def response(self, response, content): - challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) - if challenge.get('reason') in ['integrity', 'stale']: - return True - return False - - -class WsseAuthentication(Authentication): - """This is thinly tested and should not be relied upon. - At this time there isn't any third party server to test against. - Blogger and TypePad implemented this algorithm at one point - but Blogger has since switched to Basic over HTTPS and - TypePad has implemented it wrong, by never issuing a 401 - challenge but instead requiring your client to telepathically know that - their endpoint is expecting WSSE profile="UsernameToken".""" - def __init__(self, credentials, host, request_uri, headers, response, content, http): - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['Authorization'] = 'WSSE profile="UsernameToken"' - iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - cnonce = _cnonce() - password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) - headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( - self.credentials[0], - password_digest, - cnonce, - iso_now) - -class GoogleLoginAuthentication(Authentication): - def __init__(self, credentials, host, request_uri, headers, response, content, http): - from urllib import urlencode - Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) - challenge = _parse_www_authenticate(response, 'www-authenticate') - service = challenge['googlelogin'].get('service', 'xapi') - # Bloggger actually returns the service in the challenge - # For the rest we guess based on the URI - if service == 'xapi' and request_uri.find("calendar") > 0: - service = "cl" - # No point in guessing Base or Spreadsheet - #elif request_uri.find("spreadsheets") > 0: - # service = "wise" - - auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent']) - resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'}) - lines = content.split('\n') - d = dict([tuple(line.split("=", 1)) for line in lines if line]) - if resp.status == 403: - self.Auth = "" - else: - self.Auth = d['Auth'] - - def request(self, method, request_uri, headers, content): - """Modify the request headers to add the appropriate - Authorization header.""" - headers['authorization'] = 'GoogleLogin Auth=' + self.Auth - - -AUTH_SCHEME_CLASSES = { - "basic": BasicAuthentication, - "wsse": WsseAuthentication, - "digest": DigestAuthentication, - "hmacdigest": HmacDigestAuthentication, - "googlelogin": GoogleLoginAuthentication -} - -AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] - -def _md5(s): - return - -class FileCache(object): - """Uses a local directory as a store for cached files. - Not really safe to use if multiple threads or processes are going to - be running on the same cache. - """ - def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior - self.cache = cache - self.safe = safe - if not os.path.exists(cache): - os.makedirs(self.cache) - - def get(self, key): - retval = None - cacheFullPath = os.path.join(self.cache, self.safe(key)) - try: - f = file(cacheFullPath, "r") - retval = f.read() - f.close() - except IOError: - pass - return retval - - def set(self, key, value): - cacheFullPath = os.path.join(self.cache, self.safe(key)) - f = file(cacheFullPath, "w") - f.write(value) - f.close() - - def delete(self, key): - cacheFullPath = os.path.join(self.cache, self.safe(key)) - if os.path.exists(cacheFullPath): - os.remove(cacheFullPath) - -class Credentials(object): - def __init__(self): - self.credentials = [] - - def add(self, name, password, domain=""): - self.credentials.append((domain.lower(), name, password)) - - def clear(self): - self.credentials = [] - - def iter(self, domain): - for (cdomain, name, password) in self.credentials: - if cdomain == "" or domain == cdomain: - yield (name, password) - -class KeyCerts(Credentials): - """Identical to Credentials except that - name/password are mapped to key/cert.""" - pass - - -class ProxyInfo(object): - """Collect information required to use a proxy.""" - def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): - """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX - constants. For example: - -p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) - """ - self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass - - def astuple(self): - return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, - self.proxy_user, self.proxy_pass) - - def isgood(self): - return socks and (self.proxy_host != None) and (self.proxy_port != None) - - -class HTTPConnectionWithTimeout(httplib.HTTPConnection): - """HTTPConnection subclass that supports timeouts""" - - def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): - httplib.HTTPConnection.__init__(self, host, port, strict) - self.timeout = timeout - self.proxy_info = proxy_info - - def connect(self): - """Connect to the host and port specified in __init__.""" - # Mostly verbatim from httplib.py. - msg = "getaddrinfo returns an empty list" - for res in socket.getaddrinfo(self.host, self.port, 0, - socket.SOCK_STREAM): - af, socktype, proto, canonname, sa = res - try: - if self.proxy_info and self.proxy_info.isgood(): - self.sock = socks.socksocket(af, socktype, proto) - self.sock.setproxy(*self.proxy_info.astuple()) - else: - self.sock = socket.socket(af, socktype, proto) - # Different from httplib: support timeouts. - if self.timeout is not None: - self.sock.settimeout(self.timeout) - # End of difference from httplib. - if self.debuglevel > 0: - print "connect: (%s, %s)" % (self.host, self.port) - self.sock.connect(sa) - except socket.error, msg: - if self.debuglevel > 0: - print 'connect fail:', (self.host, self.port) - if self.sock: - self.sock.close() - self.sock = None - continue - break - if not self.sock: - raise socket.error, msg - -class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): - "This class allows communication via SSL." - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=None, proxy_info=None): - self.timeout = timeout - self.proxy_info = proxy_info - httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file, - cert_file=cert_file, strict=strict) - - def connect(self): - "Connect to a host on a given (SSL) port." - - if self.proxy_info and self.proxy_info.isgood(): - self.sock.setproxy(*self.proxy_info.astuple()) - sock.setproxy(*self.proxy_info.astuple()) - else: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - if self.timeout is not None: - sock.settimeout(self.timeout) - sock.connect((self.host, self.port)) - ssl = socket.ssl(sock, self.key_file, self.cert_file) - self.sock = httplib.FakeSocket(sock, ssl) - - - -class Http(object): - """An HTTP client that handles: -- all methods -- caching -- ETags -- compression, -- HTTPS -- Basic -- Digest -- WSSE - -and more. - """ - def __init__(self, cache=None, timeout=None, proxy_info=None): - """The value of proxy_info is a ProxyInfo instance. - -If 'cache' is a string then it is used as a directory name -for a disk cache. Otherwise it must be an object that supports -the same interface as FileCache.""" - self.proxy_info = proxy_info - # Map domain name to an httplib connection - self.connections = {} - # The location of the cache, for now a directory - # where cached responses are held. - if cache and isinstance(cache, str): - self.cache = FileCache(cache) - else: - self.cache = cache - - # Name/password - self.credentials = Credentials() - - # Key/cert - self.certificates = KeyCerts() - - # authorization objects - self.authorizations = [] - - # If set to False then no redirects are followed, even safe ones. - self.follow_redirects = True - - # If 'follow_redirects' is True, and this is set to True then - # all redirecs are followed, including unsafe ones. - self.follow_all_redirects = False - - self.ignore_etag = False - - self.force_exception_to_status_code = False - - self.timeout = timeout - - def _auth_from_challenge(self, host, request_uri, headers, response, content): - """A generator that creates Authorization objects - that can be applied to requests. - """ - challenges = _parse_www_authenticate(response, 'www-authenticate') - for cred in self.credentials.iter(host): - for scheme in AUTH_SCHEME_ORDER: - if challenges.has_key(scheme): - yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) - - def add_credentials(self, name, password, domain=""): - """Add a name and password that will be used - any time a request requires authentication.""" - self.credentials.add(name, password, domain) - - def add_certificate(self, key, cert, domain): - """Add a key and cert that will be used - any time a request requires authentication.""" - self.certificates.add(key, cert, domain) - - def clear_credentials(self): - """Remove all the names and passwords - that are used for authentication""" - self.credentials.clear() - self.authorizations = [] - - def _conn_request(self, conn, request_uri, method, body, headers): - for i in range(2): - try: - conn.request(method, request_uri, body, headers) - response = conn.getresponse() - except socket.gaierror: - conn.close() - raise ServerNotFoundError("Unable to find the server at %s" % conn.host) - except httplib.HTTPException, e: - if i == 0: - conn.close() - conn.connect() - continue - else: - raise - else: - content = response.read() - response = Response(response) - if method != "HEAD": - content = _decompressContent(response, content) - - break; - return (response, content) - - - def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): - """Do the actual request using the connection object - and also follow one level of redirects if necessary""" - - auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] - auth = auths and sorted(auths)[0][1] or None - if auth: - auth.request(method, request_uri, headers, body) - - (response, content) = self._conn_request(conn, request_uri, method, body, headers) - - if auth: - if auth.response(response, body): - auth.request(method, request_uri, headers, body) - (response, content) = self._conn_request(conn, request_uri, method, body, headers ) - response._stale_digest = 1 - - if response.status == 401: - for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): - authorization.request(method, request_uri, headers, body) - (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) - if response.status != 401: - self.authorizations.append(authorization) - authorization.response(response, body) - break - - if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): - if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: - # Pick out the location header and basically start from the beginning - # remembering first to strip the ETag header and decrement our 'depth' - if redirections: - if not response.has_key('location') and response.status != 300: - raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content) - # Fix-up relative redirects (which violate an RFC 2616 MUST) - if response.has_key('location'): - location = response['location'] - (scheme, authority, path, query, fragment) = parse_uri(location) - if authority == None: - response['location'] = urlparse.urljoin(absolute_uri, location) - if response.status == 301 and method in ["GET", "HEAD"]: - response['-x-permanent-redirect-url'] = response['location'] - if not response.has_key('content-location'): - response['content-location'] = absolute_uri - _updateCache(headers, response, content, self.cache, cachekey) - if headers.has_key('if-none-match'): - del headers['if-none-match'] - if headers.has_key('if-modified-since'): - del headers['if-modified-since'] - if response.has_key('location'): - location = response['location'] - old_response = copy.deepcopy(response) - if not old_response.has_key('content-location'): - old_response['content-location'] = absolute_uri - redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method - (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1) - response.previous = old_response - else: - raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content) - elif response.status in [200, 203] and method == "GET": - # Don't cache 206's since we aren't going to handle byte range requests - if not response.has_key('content-location'): - response['content-location'] = absolute_uri - _updateCache(headers, response, content, self.cache, cachekey) - - return (response, content) - - -# Need to catch and rebrand some exceptions -# Then need to optionally turn all exceptions into status codes -# including all socket.* and httplib.* exceptions. - - - def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None): - """ Performs a single HTTP request. -The 'uri' is the URI of the HTTP resource and can begin -with either 'http' or 'https'. The value of 'uri' must be an absolute URI. - -The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. -There is no restriction on the methods allowed. - -The 'body' is the entity body to be sent with the request. It is a string -object. - -Any extra headers that are to be sent with the request should be provided in the -'headers' dictionary. - -The maximum number of redirect to follow before raising an -exception is 'redirections. The default is 5. - -The return value is a tuple of (response, content), the first -being and instance of the 'Response' class, the second being -a string that contains the response entity body. - """ - try: - if headers is None: - headers = {} - else: - headers = _normalize_headers(headers) - - if not headers.has_key('user-agent'): - headers['user-agent'] = "Python-httplib2/%s" % __version__ - - uri = iri2uri(uri) - - (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) - - conn_key = scheme+":"+authority - if conn_key in self.connections: - conn = self.connections[conn_key] - else: - if not connection_type: - connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout - certs = list(self.certificates.iter(authority)) - if scheme == 'https' and certs: - conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], - cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info) - else: - conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info) - conn.set_debuglevel(debuglevel) - - if method in ["GET", "HEAD"] and 'range' not in headers: - headers['accept-encoding'] = 'compress, gzip' - - info = email.Message.Message() - cached_value = None - if self.cache: - cachekey = defrag_uri - cached_value = self.cache.get(cachekey) - if cached_value: - info = email.message_from_string(cached_value) - try: - content = cached_value.split('\r\n\r\n', 1)[1] - except IndexError: - self.cache.delete(cachekey) - cachekey = None - cached_value = None - else: - cachekey = None - - if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers: - # http://www.w3.org/1999/04/Editing/ - headers['if-match'] = info['etag'] - - if method not in ["GET", "HEAD"] and self.cache and cachekey: - # RFC 2616 Section 13.10 - self.cache.delete(cachekey) - - if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: - if info.has_key('-x-permanent-redirect-url'): - # Should cached permanent redirects be counted in our redirection count? For now, yes. - (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) - response.previous = Response(info) - response.previous.fromcache = True - else: - # Determine our course of action: - # Is the cached entry fresh or stale? - # Has the client requested a non-cached response? - # - # There seems to be three possible answers: - # 1. [FRESH] Return the cache entry w/o doing a GET - # 2. [STALE] Do the GET (but add in cache validators if available) - # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request - entry_disposition = _entry_disposition(info, headers) - - if entry_disposition == "FRESH": - if not cached_value: - info['status'] = '504' - content = "" - response = Response(info) - if cached_value: - response.fromcache = True - return (response, content) - - if entry_disposition == "STALE": - if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers: - headers['if-none-match'] = info['etag'] - if info.has_key('last-modified') and not 'last-modified' in headers: - headers['if-modified-since'] = info['last-modified'] - elif entry_disposition == "TRANSPARENT": - pass - - (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) - - if response.status == 304 and method == "GET": - # Rewrite the cache entry with the new end-to-end headers - # Take all headers that are in response - # and overwrite their values in info. - # unless they are hop-by-hop, or are listed in the connection header. - - for key in _get_end2end_headers(response): - info[key] = response[key] - merged_response = Response(info) - if hasattr(response, "_stale_digest"): - merged_response._stale_digest = response._stale_digest - _updateCache(headers, merged_response, content, self.cache, cachekey) - response = merged_response - response.status = 200 - response.fromcache = True - - elif response.status == 200: - content = new_content - else: - self.cache.delete(cachekey) - content = new_content - else: - (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) - except Exception, e: - if self.force_exception_to_status_code: - if isinstance(e, HttpLib2ErrorWithResponse): - response = e.response - content = e.content - response.status = 500 - response.reason = str(e) - elif isinstance(e, socket.timeout): - content = "Request Timeout" - response = Response( { - "content-type": "text/plain", - "status": "408", - "content-length": len(content) - }) - response.reason = "Request Timeout" - else: - content = str(e) - response = Response( { - "content-type": "text/plain", - "status": "400", - "content-length": len(content) - }) - response.reason = "Bad Request" - else: - raise - - - return (response, content) - - - -class Response(dict): - """An object more like email.Message than httplib.HTTPResponse.""" - - """Is this response from our local cache""" - fromcache = False - - """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """ - version = 11 - - "Status code returned by server. " - status = 200 - - """Reason phrase returned by server.""" - reason = "Ok" - - previous = None - - def __init__(self, info): - # info is either an email.Message or - # an httplib.HTTPResponse object. - if isinstance(info, httplib.HTTPResponse): - for key, value in info.getheaders(): - self[key] = value - self.status = info.status - self['status'] = str(self.status) - self.reason = info.reason - self.version = info.version - elif isinstance(info, email.Message.Message): - for key, value in info.items(): - self[key] = value - self.status = int(self['status']) - else: - for key, value in info.iteritems(): - self[key] = value - self.status = int(self.get('status', self.status)) - - - def __getattr__(self, name): - if name == 'dict': - return self - else: - raise AttributeError, name Index: python-rest-client/httplib2/iri2uri.py =================================================================== --- python-rest-client.orig/httplib2/iri2uri.py 2007-09-04 01:02:06.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,110 +0,0 @@ -""" -iri2uri - -Converts an IRI to a URI. - -""" -__author__ = "Joe Gregorio (joe@bitworking.org)" -__copyright__ = "Copyright 2006, Joe Gregorio" -__contributors__ = [] -__version__ = "1.0.0" -__license__ = "MIT" -__history__ = """ -""" - -import urlparse - - -# Convert an IRI to a URI following the rules in RFC 3987 -# -# The characters we need to enocde and escape are defined in the spec: -# -# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD -# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF -# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD -# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD -# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD -# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD -# / %xD0000-DFFFD / %xE1000-EFFFD - -escape_range = [ - (0xA0, 0xD7FF ), - (0xE000, 0xF8FF ), - (0xF900, 0xFDCF ), - (0xFDF0, 0xFFEF), - (0x10000, 0x1FFFD ), - (0x20000, 0x2FFFD ), - (0x30000, 0x3FFFD), - (0x40000, 0x4FFFD ), - (0x50000, 0x5FFFD ), - (0x60000, 0x6FFFD), - (0x70000, 0x7FFFD ), - (0x80000, 0x8FFFD ), - (0x90000, 0x9FFFD), - (0xA0000, 0xAFFFD ), - (0xB0000, 0xBFFFD ), - (0xC0000, 0xCFFFD), - (0xD0000, 0xDFFFD ), - (0xE1000, 0xEFFFD), - (0xF0000, 0xFFFFD ), - (0x100000, 0x10FFFD) -] - -def encode(c): - retval = c - i = ord(c) - for low, high in escape_range: - if i < low: - break - if i >= low and i <= high: - retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')]) - break - return retval - - -def iri2uri(uri): - """Convert an IRI to a URI. Note that IRIs must be - passed in a unicode strings. That is, do not utf-8 encode - the IRI before passing it into the function.""" - if isinstance(uri ,unicode): - (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri) - authority = authority.encode('idna') - # For each character in 'ucschar' or 'iprivate' - # 1. encode as utf-8 - # 2. then %-encode each octet of that utf-8 - uri = urlparse.urlunsplit((scheme, authority, path, query, fragment)) - uri = "".join([encode(c) for c in uri]) - return uri - -if __name__ == "__main__": - import unittest - - class Test(unittest.TestCase): - - def test_uris(self): - """Test that URIs are invariant under the transformation.""" - invariant = [ - u"ftp://ftp.is.co.za/rfc/rfc1808.txt", - u"http://www.ietf.org/rfc/rfc2396.txt", - u"ldap://[2001:db8::7]/c=GB?objectClass?one", - u"mailto:John.Doe@example.com", - u"news:comp.infosystems.www.servers.unix", - u"tel:+1-816-555-1212", - u"telnet://192.0.2.16:80/", - u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ] - for uri in invariant: - self.assertEqual(uri, iri2uri(uri)) - - def test_iri(self): - """ Test that the right type of escaping is done for each part of the URI.""" - self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}")) - self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}")) - self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}")) - self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}")) - self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")) - self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))) - self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8'))) - - unittest.main() - - Index: python-rest-client/mimeTypes.py =================================================================== --- python-rest-client.orig/mimeTypes.py 2008-05-14 13:58:40.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,57 +0,0 @@ -""" - Copyright (C) 2008 Benjamin O'Steen - - This file is part of python-fedoracommons. - - python-fedoracommons is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - python-fedoracommons is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. -""" - -__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' -__author__ = "Benjamin O'Steen <bosteen@gmail.com>" -__version__ = '0.1' - -class mimeTypes(object): - def getDictionary(self): - mimetype_to_extension = {} - extension_to_mimetype = {} - mimetype_to_extension['text/plain'] = 'txt' - mimetype_to_extension['text/xml'] = 'xml' - mimetype_to_extension['text/css'] = 'css' - mimetype_to_extension['text/javascript'] = 'js' - mimetype_to_extension['text/rtf'] = 'rtf' - mimetype_to_extension['text/calendar'] = 'ics' - mimetype_to_extension['application/msword'] = 'doc' - mimetype_to_extension['application/msexcel'] = 'xls' - mimetype_to_extension['application/x-msword'] = 'doc' - mimetype_to_extension['application/vnd.ms-excel'] = 'xls' - mimetype_to_extension['application/vnd.ms-powerpoint'] = 'ppt' - mimetype_to_extension['application/pdf'] = 'pdf' - mimetype_to_extension['text/comma-separated-values'] = 'csv' - - - mimetype_to_extension['image/jpeg'] = 'jpg' - mimetype_to_extension['image/gif'] = 'gif' - mimetype_to_extension['image/jpg'] = 'jpg' - mimetype_to_extension['image/tiff'] = 'tiff' - mimetype_to_extension['image/png'] = 'png' - - # And hacky reverse lookups - for mimetype in mimetype_to_extension: - extension_to_mimetype[mimetype_to_extension[mimetype]] = mimetype - - mimetype_extension_mapping = {} - mimetype_extension_mapping.update(mimetype_to_extension) - mimetype_extension_mapping.update(extension_to_mimetype) - - return mimetype_extension_mapping Index: python-rest-client/rest_client/gae_restful_lib.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/gae_restful_lib.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,456 @@ +""" + Copyright (C) 2008 Benjamin O'Steen + + This file is part of python-fedoracommons. + + python-fedoracommons is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + python-fedoracommons is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. +""" + +__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' +__author__ = "Benjamin O'Steen <bosteen@gmail.com>" +__version__ = '0.1' + +from google.appengine.api import urlfetch + +import urlparse +from urllib import urlencode +import base64 +from base64 import encodestring + +import re +import md5 +import calendar +import time +import random +import sha +import hmac + +from mimeTypes import * + +import mimetypes + +from cStringIO import StringIO + + + +# For Auth implemnentation: Digest (from httplib2) +# TODO: !Important - add proper code attribution for httplib2 parts +USE_WWW_AUTH_STRICT_PARSING = 0 +conn = None +# In regex below: +# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP +# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space +# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: +# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? +WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") +WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") +UNQUOTE_PAIRS = re.compile(r'\\(.)') + +def _parse_www_authenticate(headers, headername='www-authenticate'): + """Returns a dictionary of dictionaries, one dict + per auth_scheme.""" + retval = {} + if headers.has_key(headername): + authenticate = headers[headername].strip() + www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED + while authenticate: + # Break off the scheme at the beginning of the line + if headername == 'authentication-info': + (auth_scheme, the_rest) = ('digest', authenticate) + else: + (auth_scheme, the_rest) = authenticate.split(" ", 1) + # Now loop over all the key value pairs that come after the scheme, + # being careful not to roll into the next scheme + match = www_auth.search(the_rest) + auth_params = {} + while match: + if match and len(match.groups()) == 3: + (key, value, the_rest) = match.groups() + auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) + match = www_auth.search(the_rest) + retval[auth_scheme.lower()] = auth_params + authenticate = the_rest.strip() + return retval + +def _cnonce(): + dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest() + return dig[:16] + +def _wsse_username_token(cnonce, iso_now, password): + return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() + +URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") + +def parse_uri(uri): + """Parses a URI using the regex given in Appendix B of RFC 3986. + + (scheme, authority, path, query, fragment) = parse_uri(uri) + """ + groups = URI.match(uri).groups() + return (groups[1], groups[3], groups[4], groups[6], groups[8]) + +# For credentials we need two things, first +# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) +# Then we also need a list of URIs that have already demanded authentication +# That list is tricky since sub-URIs can take the same auth, or the +# auth scheme may change as you descend the tree. +# So we also need each Auth instance to be able to tell us +# how close to the 'top' it is. + +class Authentication(object): + def __init__(self, credentials, host, request_uri, headers, response, content, http): + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + self.path = path + self.host = host + self.credentials = credentials + self.http = http + + def depth(self, request_uri): + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + return request_uri[len(self.path):].count("/") + + def inscope(self, host, request_uri): + # XXX Should we normalize the request_uri? + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + return (host == self.host) and path.startswith(self.path) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header. Over-rise this in sub-classes.""" + pass + + def response(self, response, content): + """Gives us a chance to update with new nonces + or such returned from the last authorized response. + Over-rise this in sub-classes if necessary. + + Return TRUE is the request is to be retried, for + example Digest may return stale=true. + """ + return False + + + +class BasicAuthentication(Authentication): + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header.""" + headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() + + +class DigestAuthentication(Authentication): + """Only do qop='auth' and MD5, since that + is all Apache currently implements""" + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + challenge = _parse_www_authenticate(response, 'www-authenticate') + self.challenge = challenge['digest'] + qop = self.challenge.get('qop') + self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None + if self.challenge['qop'] is None: + raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop)) + self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5') + if self.challenge['algorithm'] != 'MD5': + raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) + self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) + self.challenge['nc'] = 1 + + def request(self, method, request_uri, headers, content, cnonce = None): + """Modify the request headers""" + H = lambda x: md5.new(x).hexdigest() + KD = lambda s, d: H("%s:%s" % (s, d)) + A2 = "".join([method, ":", request_uri]) + self.challenge['cnonce'] = cnonce or _cnonce() + request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], + '%08x' % self.challenge['nc'], + self.challenge['cnonce'], + self.challenge['qop'], H(A2) + )) + headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( + self.credentials[0], + self.challenge['realm'], + self.challenge['nonce'], + request_uri, + self.challenge['algorithm'], + request_digest, + self.challenge['qop'], + self.challenge['nc'], + self.challenge['cnonce'], + ) + self.challenge['nc'] += 1 + + def response(self, response, content): + if not response.has_key('authentication-info'): + challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) + if 'true' == challenge.get('stale'): + self.challenge['nonce'] = challenge['nonce'] + self.challenge['nc'] = 1 + return True + else: + updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) + + if updated_challenge.has_key('nextnonce'): + self.challenge['nonce'] = updated_challenge['nextnonce'] + self.challenge['nc'] = 1 + return False + + +class HmacDigestAuthentication(Authentication): + """Adapted from Robert Sayre's code and DigestAuthentication above.""" + __author__ = "Thomas Broyer (t.broyer@ltgt.net)" + + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + challenge = _parse_www_authenticate(response, 'www-authenticate') + self.challenge = challenge['hmacdigest'] + # TODO: self.challenge['domain'] + self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') + if self.challenge['reason'] not in ['unauthorized', 'integrity']: + self.challenge['reason'] = 'unauthorized' + self.challenge['salt'] = self.challenge.get('salt', '') + if not self.challenge.get('snonce'): + raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty.")) + self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') + if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: + raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) + self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') + if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: + raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) + if self.challenge['algorithm'] == 'HMAC-MD5': + self.hashmod = md5 + else: + self.hashmod = sha + if self.challenge['pw-algorithm'] == 'MD5': + self.pwhashmod = md5 + else: + self.pwhashmod = sha + self.key = "".join([self.credentials[0], ":", + self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), + ":", self.challenge['realm'] + ]) + self.key = self.pwhashmod.new(self.key).hexdigest().lower() + + def request(self, method, request_uri, headers, content): + """Modify the request headers""" + keys = _get_end2end_headers(headers) + keylist = "".join(["%s " % k for k in keys]) + headers_val = "".join([headers[k] for k in keys]) + created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) + cnonce = _cnonce() + request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) + request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() + headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( + self.credentials[0], + self.challenge['realm'], + self.challenge['snonce'], + cnonce, + request_uri, + created, + request_digest, + keylist, + ) + + def response(self, response, content): + challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) + if challenge.get('reason') in ['integrity', 'stale']: + return True + return False + + +class WsseAuthentication(Authentication): + """This is thinly tested and should not be relied upon. + At this time there isn't any third party server to test against. + Blogger and TypePad implemented this algorithm at one point + but Blogger has since switched to Basic over HTTPS and + TypePad has implemented it wrong, by never issuing a 401 + challenge but instead requiring your client to telepathically know that + their endpoint is expecting WSSE profile="UsernameToken".""" + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header.""" + headers['Authorization'] = 'WSSE profile="UsernameToken"' + iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + cnonce = _cnonce() + password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) + headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( + self.credentials[0], + password_digest, + cnonce, + iso_now) + +class Credentials(object): + def __init__(self): + self.credentials = [] + + def add(self, name, password, domain=""): + self.credentials.append((domain.lower(), name, password)) + + def clear(self): + self.credentials = [] + + def iter(self, domain): + for (cdomain, name, password) in self.credentials: + if cdomain == "" or domain == cdomain: + yield (name, password) + +AUTH_SCHEME_CLASSES = { + "basic": BasicAuthentication, + "Basic": BasicAuthentication, + "wsse": WsseAuthentication, + "digest": DigestAuthentication, + "Digest": DigestAuthentication, + "hmacdigest": HmacDigestAuthentication +} + +AUTH_SCHEME_ORDER = ["hmacdigest", "digest", "Digest", "wsse", "basic", "Basic"] + +URLFETCH_METHOD_STRING = {urlfetch.GET:'GET', + urlfetch.PUT:'PUT', + urlfetch.DELETE:'DELETE', + urlfetch.POST:'POST', + urlfetch.HEAD:'HEAD' + } + + +class GAE_Connection: + def __init__(self, base_url, username=None, password=None): + self.base_url = base_url + m = mimeTypes() + self.mimetypes = m.getDictionary() + + # Name/password + self.credentials = Credentials() + + if username and password: + self.add_credentials(username, password, domain="") + + # authorization objects + self.authorizations = [] + + self.url = urlparse.urlparse(base_url) + + (scheme, netloc, path, query, fragment) = urlparse.urlsplit(base_url) + + self.scheme = scheme + self.host = netloc + self.path = path + + def _auth_from_challenge(self, host, request_uri, headers, response, content): + """A generator that creates Authorization objects + that can be applied to requests. + """ + challenges = _parse_www_authenticate(response, 'www-authenticate') + for cred in self.credentials.iter(host): + for scheme in AUTH_SCHEME_ORDER: + if challenges.has_key(scheme): + yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) + + def add_credentials(self, name, password, domain=""): + """Add a name and password that will be used + any time a request requires authentication.""" + self.credentials.add(name, password, domain) + + def clear_credentials(self): + """Remove all the names and passwords + that are used for authentication""" + self.credentials.clear() + self.authorizations = [] + + def request_get(self, resource, args = None, headers={}): + return self.request(resource, urlfetch.GET, args, headers=headers) + + def request_delete(self, resource, args = None, headers={}): + return self.request(resource, urlfetch.DELETE, args, headers=headers) + + def request_post(self, resource, args = None, body = None, filename=None, headers={}): + return self.request(resource, urlfetch.POST, args , body = body, filename=filename, headers=headers) + + def request_put(self, resource, args = None, body = None, filename=None, headers={}): + return self.request(resource, urlfetch.PUT, args , body = body, filename=filename, headers=headers) + + def request_head(self, resource, args = None, body = None, filename=None, headers={}): + return self.request(resource, urlfetch.HEAD, args , body = body, filename=filename, headers=headers) + + def _conn_request(self, conn, request_uri, method, body, headers): + # Shim to allow easy reuse of httplib2 auth methods - conn param is not used + urlfetch_response = urlfetch.fetch(request_uri, method=method, payload=body, headers=headers) + r_headers={'status':urlfetch_response.status_code} + for header_key in urlfetch_response.headers: + r_headers[header_key.lower()] = urlfetch_response.headers[header_key] + + return (r_headers, urlfetch_response.content.decode('UTF-8')) + + def get_content_type(self, filename): + extension = filename.split('.')[-1] + guessed_mimetype = self.mimetypes.get(extension, mimetypes.guess_type(filename)[0]) + return guessed_mimetype or 'application/octet-stream' + + def request(self, resource, method = urlfetch.GET, args = None, body = None, filename=None, headers={}): + params = None + path = resource + headers['User-Agent'] = 'Basic Agent' + + if not headers.get('Content-Type', None): + headers['Content-Type']='text/plain' + + request_path = [] + if self.path != "/": + if self.path.endswith('/'): + request_path.append(self.path[:-1]) + else: + request_path.append(self.path) + if path.startswith('/'): + request_path.append(path[1:]) + else: + request_path.append(path) + full_path = u'/'.join(request_path) + + if args: + full_path += u"?%s" % (urlencode(args)) + + request_uri = u"%s://%s%s" % (self.scheme, self.host, full_path) + + auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] + auth = auths and sorted(auths)[0][1] or None + if auth: + auth.request(method, request_uri, headers, body) + + (response, content) = self._conn_request(conn, request_uri, method, body, headers) + + if auth: + if auth.response(response, body): + auth.request(URLFETCH_METHOD_STRING[method], request_uri, headers, body) + + (response, content) = self._conn_request(conn, request_uri, method, body, headers) + + if response['status'] == 401: + #return {u"body":u"".join(["%s: %s" % (key, response[key]) for key in response])} + for authorization in self._auth_from_challenge(self.host, request_uri, headers, response, content): + authorization.request(URLFETCH_METHOD_STRING[method], request_uri, headers, body) + + (response, content) = self._conn_request(conn, request_uri, method, body, headers) + + if response['status'] != 401: + self.authorizations.append(authorization) + authorization.response(response, body) + break + + return {u'headers':response, u'body':content} + Index: python-rest-client/rest_client/httplib2/__init__.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/httplib2/__init__.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,1123 @@ +from __future__ import generators +""" +httplib2 + +A caching http interface that supports ETags and gzip +to conserve bandwidth. + +Requires Python 2.3 or later + +Changelog: +2007-08-18, Rick: Modified so it's able to use a socks proxy if needed. + +""" + +__author__ = "Joe Gregorio (joe@bitworking.org)" +__copyright__ = "Copyright 2006, Joe Gregorio" +__contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", + "James Antill", + "Xavier Verges Farrero", + "Jonathan Feinberg", + "Blair Zajac", + "Sam Ruby", + "Louis Nyffenegger"] +__license__ = "MIT" +__version__ = "$Rev: 259 $" + +import re +import sys +import md5 +import email +import email.Utils +import email.Message +import StringIO +import gzip +import zlib +import httplib +import urlparse +import base64 +import os +import copy +import calendar +import time +import random +import sha +import hmac +from gettext import gettext as _ +import socket + +try: + import socks +except ImportError: + socks = None + +if sys.version_info >= (2,3): + from iri2uri import iri2uri +else: + def iri2uri(uri): + return uri + +__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', + 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', + 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError', + 'debuglevel'] + + +# The httplib debug level, set to a non-zero value to get debug output +debuglevel = 0 + +# Python 2.3 support +if sys.version_info < (2,4): + def sorted(seq): + seq.sort() + return seq + +# Python 2.3 support +def HTTPResponse__getheaders(self): + """Return list of (header, value) tuples.""" + if self.msg is None: + raise httplib.ResponseNotReady() + return self.msg.items() + +if not hasattr(httplib.HTTPResponse, 'getheaders'): + httplib.HTTPResponse.getheaders = HTTPResponse__getheaders + +# All exceptions raised here derive from HttpLib2Error +class HttpLib2Error(Exception): pass + +# Some exceptions can be caught and optionally +# be turned back into responses. +class HttpLib2ErrorWithResponse(HttpLib2Error): + def __init__(self, desc, response, content): + self.response = response + self.content = content + HttpLib2Error.__init__(self, desc) + +class RedirectMissingLocation(HttpLib2ErrorWithResponse): pass +class RedirectLimit(HttpLib2ErrorWithResponse): pass +class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass +class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass +class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass + +class RelativeURIError(HttpLib2Error): pass +class ServerNotFoundError(HttpLib2Error): pass + +# Open Items: +# ----------- +# Proxy support + +# Are we removing the cached content too soon on PUT (only delete on 200 Maybe?) + +# Pluggable cache storage (supports storing the cache in +# flat files by default. We need a plug-in architecture +# that can support Berkeley DB and Squid) + +# == Known Issues == +# Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator. +# Does not handle Cache-Control: max-stale +# Does not use Age: headers when calculating cache freshness. + + +# The number of redirections to follow before giving up. +# Note that only GET redirects are automatically followed. +# Will also honor 301 requests by saving that info and never +# requesting that URI again. +DEFAULT_MAX_REDIRECTS = 5 + +# Which headers are hop-by-hop headers by default +HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'] + +def _get_end2end_headers(response): + hopbyhop = list(HOP_BY_HOP) + hopbyhop.extend([x.strip() for x in response.get('connection', '').split(',')]) + return [header for header in response.keys() if header not in hopbyhop] + +URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") + +def parse_uri(uri): + """Parses a URI using the regex given in Appendix B of RFC 3986. + + (scheme, authority, path, query, fragment) = parse_uri(uri) + """ + groups = URI.match(uri).groups() + return (groups[1], groups[3], groups[4], groups[6], groups[8]) + +def urlnorm(uri): + (scheme, authority, path, query, fragment) = parse_uri(uri) + if not scheme or not authority: + raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) + authority = authority.lower() + scheme = scheme.lower() + if not path: + path = "/" + # Could do syntax based normalization of the URI before + # computing the digest. See Section 6.2.2 of Std 66. + request_uri = query and "?".join([path, query]) or path + scheme = scheme.lower() + defrag_uri = scheme + "://" + authority + request_uri + return scheme, authority, request_uri, defrag_uri + + +# Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/) +re_url_scheme = re.compile(r'^\w+://') +re_slash = re.compile(r'[?/:|]+') + +def safename(filename): + """Return a filename suitable for the cache. + + Strips dangerous and common characters to create a filename we + can use to store the cache in. + """ + + try: + if re_url_scheme.match(filename): + if isinstance(filename,str): + filename = filename.decode('utf-8') + filename = filename.encode('idna') + else: + filename = filename.encode('idna') + except UnicodeError: + pass + if isinstance(filename,unicode): + filename=filename.encode('utf-8') + filemd5 = md5.new(filename).hexdigest() + filename = re_url_scheme.sub("", filename) + filename = re_slash.sub(",", filename) + + # limit length of filename + if len(filename)>200: + filename=filename[:200] + return ",".join((filename, filemd5)) + +NORMALIZE_SPACE = re.compile(r'(?:\r\n)?[ \t]+') +def _normalize_headers(headers): + return dict([ (key.lower(), NORMALIZE_SPACE.sub(value, ' ').strip()) for (key, value) in headers.iteritems()]) + +def _parse_cache_control(headers): + retval = {} + if headers.has_key('cache-control'): + parts = headers['cache-control'].split(',') + parts_with_args = [tuple([x.strip() for x in part.split("=")]) for part in parts if -1 != part.find("=")] + parts_wo_args = [(name.strip(), 1) for name in parts if -1 == name.find("=")] + retval = dict(parts_with_args + parts_wo_args) + return retval + +# Whether to use a strict mode to parse WWW-Authenticate headers +# Might lead to bad results in case of ill-formed header value, +# so disabled by default, falling back to relaxed parsing. +# Set to true to turn on, usefull for testing servers. +USE_WWW_AUTH_STRICT_PARSING = 0 + +# In regex below: +# [^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+ matches a "token" as defined by HTTP +# "(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?" matches a "quoted-string" as defined by HTTP, when LWS have already been replaced by a single space +# Actually, as an auth-param value can be either a token or a quoted-string, they are combined in a single pattern which matches both: +# \"?((?<=\")(?:[^\0-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x08\x0A-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"? +WWW_AUTH_STRICT = re.compile(r"^(?:\s*(?:,\s*)?([^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+)\s*=\s*\"?((?<=\")(?:[^\0-\x08\x0A-\x1f\x7f-\xff\\\"]|\\[\0-\x7f])*?(?=\")|(?<!\")[^\0-\x1f\x7f-\xff()<>@,;:\\\"/[\]?={} \t]+(?!\"))\"?)(.*)$") +WWW_AUTH_RELAXED = re.compile(r"^(?:\s*(?:,\s*)?([^ \t\r\n=]+)\s*=\s*\"?((?<=\")(?:[^\\\"]|\\.)*?(?=\")|(?<!\")[^ \t\r\n,]+(?!\"))\"?)(.*)$") +UNQUOTE_PAIRS = re.compile(r'\\(.)') +def _parse_www_authenticate(headers, headername='www-authenticate'): + """Returns a dictionary of dictionaries, one dict + per auth_scheme.""" + retval = {} + if headers.has_key(headername): + authenticate = headers[headername].strip() + www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED + while authenticate: + # Break off the scheme at the beginning of the line + if headername == 'authentication-info': + (auth_scheme, the_rest) = ('digest', authenticate) + else: + (auth_scheme, the_rest) = authenticate.split(" ", 1) + # Now loop over all the key value pairs that come after the scheme, + # being careful not to roll into the next scheme + match = www_auth.search(the_rest) + auth_params = {} + while match: + if match and len(match.groups()) == 3: + (key, value, the_rest) = match.groups() + auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) + match = www_auth.search(the_rest) + retval[auth_scheme.lower()] = auth_params + authenticate = the_rest.strip() + return retval + + +def _entry_disposition(response_headers, request_headers): + """Determine freshness from the Date, Expires and Cache-Control headers. + + We don't handle the following: + + 1. Cache-Control: max-stale + 2. Age: headers are not used in the calculations. + + Not that this algorithm is simpler than you might think + because we are operating as a private (non-shared) cache. + This lets us ignore 's-maxage'. We can also ignore + 'proxy-invalidate' since we aren't a proxy. + We will never return a stale document as + fresh as a design decision, and thus the non-implementation + of 'max-stale'. This also lets us safely ignore 'must-revalidate' + since we operate as if every server has sent 'must-revalidate'. + Since we are private we get to ignore both 'public' and + 'private' parameters. We also ignore 'no-transform' since + we don't do any transformations. + The 'no-store' parameter is handled at a higher level. + So the only Cache-Control parameters we look at are: + + no-cache + only-if-cached + max-age + min-fresh + """ + + retval = "STALE" + cc = _parse_cache_control(request_headers) + cc_response = _parse_cache_control(response_headers) + + if request_headers.has_key('pragma') and request_headers['pragma'].lower().find('no-cache') != -1: + retval = "TRANSPARENT" + if 'cache-control' not in request_headers: + request_headers['cache-control'] = 'no-cache' + elif cc.has_key('no-cache'): + retval = "TRANSPARENT" + elif cc_response.has_key('no-cache'): + retval = "STALE" + elif cc.has_key('only-if-cached'): + retval = "FRESH" + elif response_headers.has_key('date'): + date = calendar.timegm(email.Utils.parsedate_tz(response_headers['date'])) + now = time.time() + current_age = max(0, now - date) + if cc_response.has_key('max-age'): + try: + freshness_lifetime = int(cc_response['max-age']) + except ValueError: + freshness_lifetime = 0 + elif response_headers.has_key('expires'): + expires = email.Utils.parsedate_tz(response_headers['expires']) + if None == expires: + freshness_lifetime = 0 + else: + freshness_lifetime = max(0, calendar.timegm(expires) - date) + else: + freshness_lifetime = 0 + if cc.has_key('max-age'): + try: + freshness_lifetime = int(cc['max-age']) + except ValueError: + freshness_lifetime = 0 + if cc.has_key('min-fresh'): + try: + min_fresh = int(cc['min-fresh']) + except ValueError: + min_fresh = 0 + current_age += min_fresh + if freshness_lifetime > current_age: + retval = "FRESH" + return retval + +def _decompressContent(response, new_content): + content = new_content + try: + encoding = response.get('content-encoding', None) + if encoding in ['gzip', 'deflate']: + if encoding == 'gzip': + content = gzip.GzipFile(fileobj=StringIO.StringIO(new_content)).read() + if encoding == 'deflate': + content = zlib.decompress(content) + response['content-length'] = str(len(content)) + del response['content-encoding'] + except IOError: + content = "" + raise FailedToDecompressContent(_("Content purported to be compressed with %s but failed to decompress.") % response.get('content-encoding'), response, content) + return content + +def _updateCache(request_headers, response_headers, content, cache, cachekey): + if cachekey: + cc = _parse_cache_control(request_headers) + cc_response = _parse_cache_control(response_headers) + if cc.has_key('no-store') or cc_response.has_key('no-store'): + cache.delete(cachekey) + else: + info = email.Message.Message() + for key, value in response_headers.iteritems(): + if key not in ['status','content-encoding','transfer-encoding']: + info[key] = value + + status = response_headers.status + if status == 304: + status = 200 + + status_header = 'status: %d\r\n' % response_headers.status + + header_str = info.as_string() + + header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str) + text = "".join([status_header, header_str, content]) + + cache.set(cachekey, text) + +def _cnonce(): + dig = md5.new("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).hexdigest() + return dig[:16] + +def _wsse_username_token(cnonce, iso_now, password): + return base64.encodestring(sha.new("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() + + +# For credentials we need two things, first +# a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) +# Then we also need a list of URIs that have already demanded authentication +# That list is tricky since sub-URIs can take the same auth, or the +# auth scheme may change as you descend the tree. +# So we also need each Auth instance to be able to tell us +# how close to the 'top' it is. + +class Authentication(object): + def __init__(self, credentials, host, request_uri, headers, response, content, http): + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + self.path = path + self.host = host + self.credentials = credentials + self.http = http + + def depth(self, request_uri): + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + return request_uri[len(self.path):].count("/") + + def inscope(self, host, request_uri): + # XXX Should we normalize the request_uri? + (scheme, authority, path, query, fragment) = parse_uri(request_uri) + return (host == self.host) and path.startswith(self.path) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header. Over-rise this in sub-classes.""" + pass + + def response(self, response, content): + """Gives us a chance to update with new nonces + or such returned from the last authorized response. + Over-rise this in sub-classes if necessary. + + Return TRUE is the request is to be retried, for + example Digest may return stale=true. + """ + return False + + + +class BasicAuthentication(Authentication): + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header.""" + headers['authorization'] = 'Basic ' + base64.encodestring("%s:%s" % self.credentials).strip() + + +class DigestAuthentication(Authentication): + """Only do qop='auth' and MD5, since that + is all Apache currently implements""" + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + challenge = _parse_www_authenticate(response, 'www-authenticate') + self.challenge = challenge['digest'] + qop = self.challenge.get('qop') + self.challenge['qop'] = ('auth' in [x.strip() for x in qop.split()]) and 'auth' or None + if self.challenge['qop'] is None: + raise UnimplementedDigestAuthOptionError( _("Unsupported value for qop: %s." % qop)) + self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5') + if self.challenge['algorithm'] != 'MD5': + raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) + self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) + self.challenge['nc'] = 1 + + def request(self, method, request_uri, headers, content, cnonce = None): + """Modify the request headers""" + H = lambda x: md5.new(x).hexdigest() + KD = lambda s, d: H("%s:%s" % (s, d)) + A2 = "".join([method, ":", request_uri]) + self.challenge['cnonce'] = cnonce or _cnonce() + request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], + '%08x' % self.challenge['nc'], + self.challenge['cnonce'], + self.challenge['qop'], H(A2) + )) + headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( + self.credentials[0], + self.challenge['realm'], + self.challenge['nonce'], + request_uri, + self.challenge['algorithm'], + request_digest, + self.challenge['qop'], + self.challenge['nc'], + self.challenge['cnonce'], + ) + self.challenge['nc'] += 1 + + def response(self, response, content): + if not response.has_key('authentication-info'): + challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) + if 'true' == challenge.get('stale'): + self.challenge['nonce'] = challenge['nonce'] + self.challenge['nc'] = 1 + return True + else: + updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) + + if updated_challenge.has_key('nextnonce'): + self.challenge['nonce'] = updated_challenge['nextnonce'] + self.challenge['nc'] = 1 + return False + + +class HmacDigestAuthentication(Authentication): + """Adapted from Robert Sayre's code and DigestAuthentication above.""" + __author__ = "Thomas Broyer (t.broyer@ltgt.net)" + + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + challenge = _parse_www_authenticate(response, 'www-authenticate') + self.challenge = challenge['hmacdigest'] + # TODO: self.challenge['domain'] + self.challenge['reason'] = self.challenge.get('reason', 'unauthorized') + if self.challenge['reason'] not in ['unauthorized', 'integrity']: + self.challenge['reason'] = 'unauthorized' + self.challenge['salt'] = self.challenge.get('salt', '') + if not self.challenge.get('snonce'): + raise UnimplementedHmacDigestAuthOptionError( _("The challenge doesn't contain a server nonce, or this one is empty.")) + self.challenge['algorithm'] = self.challenge.get('algorithm', 'HMAC-SHA-1') + if self.challenge['algorithm'] not in ['HMAC-SHA-1', 'HMAC-MD5']: + raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) + self.challenge['pw-algorithm'] = self.challenge.get('pw-algorithm', 'SHA-1') + if self.challenge['pw-algorithm'] not in ['SHA-1', 'MD5']: + raise UnimplementedHmacDigestAuthOptionError( _("Unsupported value for pw-algorithm: %s." % self.challenge['pw-algorithm'])) + if self.challenge['algorithm'] == 'HMAC-MD5': + self.hashmod = md5 + else: + self.hashmod = sha + if self.challenge['pw-algorithm'] == 'MD5': + self.pwhashmod = md5 + else: + self.pwhashmod = sha + self.key = "".join([self.credentials[0], ":", + self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), + ":", self.challenge['realm'] + ]) + self.key = self.pwhashmod.new(self.key).hexdigest().lower() + + def request(self, method, request_uri, headers, content): + """Modify the request headers""" + keys = _get_end2end_headers(headers) + keylist = "".join(["%s " % k for k in keys]) + headers_val = "".join([headers[k] for k in keys]) + created = time.strftime('%Y-%m-%dT%H:%M:%SZ',time.gmtime()) + cnonce = _cnonce() + request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) + request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() + headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( + self.credentials[0], + self.challenge['realm'], + self.challenge['snonce'], + cnonce, + request_uri, + created, + request_digest, + keylist, + ) + + def response(self, response, content): + challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) + if challenge.get('reason') in ['integrity', 'stale']: + return True + return False + + +class WsseAuthentication(Authentication): + """This is thinly tested and should not be relied upon. + At this time there isn't any third party server to test against. + Blogger and TypePad implemented this algorithm at one point + but Blogger has since switched to Basic over HTTPS and + TypePad has implemented it wrong, by never issuing a 401 + challenge but instead requiring your client to telepathically know that + their endpoint is expecting WSSE profile="UsernameToken".""" + def __init__(self, credentials, host, request_uri, headers, response, content, http): + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header.""" + headers['Authorization'] = 'WSSE profile="UsernameToken"' + iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) + cnonce = _cnonce() + password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) + headers['X-WSSE'] = 'UsernameToken Username="%s", PasswordDigest="%s", Nonce="%s", Created="%s"' % ( + self.credentials[0], + password_digest, + cnonce, + iso_now) + +class GoogleLoginAuthentication(Authentication): + def __init__(self, credentials, host, request_uri, headers, response, content, http): + from urllib import urlencode + Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) + challenge = _parse_www_authenticate(response, 'www-authenticate') + service = challenge['googlelogin'].get('service', 'xapi') + # Bloggger actually returns the service in the challenge + # For the rest we guess based on the URI + if service == 'xapi' and request_uri.find("calendar") > 0: + service = "cl" + # No point in guessing Base or Spreadsheet + #elif request_uri.find("spreadsheets") > 0: + # service = "wise" + + auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers['user-agent']) + resp, content = self.http.request("https://www.google.com/accounts/ClientLogin", method="POST", body=urlencode(auth), headers={'Content-Type': 'application/x-www-form-urlencoded'}) + lines = content.split('\n') + d = dict([tuple(line.split("=", 1)) for line in lines if line]) + if resp.status == 403: + self.Auth = "" + else: + self.Auth = d['Auth'] + + def request(self, method, request_uri, headers, content): + """Modify the request headers to add the appropriate + Authorization header.""" + headers['authorization'] = 'GoogleLogin Auth=' + self.Auth + + +AUTH_SCHEME_CLASSES = { + "basic": BasicAuthentication, + "wsse": WsseAuthentication, + "digest": DigestAuthentication, + "hmacdigest": HmacDigestAuthentication, + "googlelogin": GoogleLoginAuthentication +} + +AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] + +def _md5(s): + return + +class FileCache(object): + """Uses a local directory as a store for cached files. + Not really safe to use if multiple threads or processes are going to + be running on the same cache. + """ + def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior + self.cache = cache + self.safe = safe + if not os.path.exists(cache): + os.makedirs(self.cache) + + def get(self, key): + retval = None + cacheFullPath = os.path.join(self.cache, self.safe(key)) + try: + f = file(cacheFullPath, "r") + retval = f.read() + f.close() + except IOError: + pass + return retval + + def set(self, key, value): + cacheFullPath = os.path.join(self.cache, self.safe(key)) + f = file(cacheFullPath, "w") + f.write(value) + f.close() + + def delete(self, key): + cacheFullPath = os.path.join(self.cache, self.safe(key)) + if os.path.exists(cacheFullPath): + os.remove(cacheFullPath) + +class Credentials(object): + def __init__(self): + self.credentials = [] + + def add(self, name, password, domain=""): + self.credentials.append((domain.lower(), name, password)) + + def clear(self): + self.credentials = [] + + def iter(self, domain): + for (cdomain, name, password) in self.credentials: + if cdomain == "" or domain == cdomain: + yield (name, password) + +class KeyCerts(Credentials): + """Identical to Credentials except that + name/password are mapped to key/cert.""" + pass + + +class ProxyInfo(object): + """Collect information required to use a proxy.""" + def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): + """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX + constants. For example: + +p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) + """ + self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass + + def astuple(self): + return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, + self.proxy_user, self.proxy_pass) + + def isgood(self): + return socks and (self.proxy_host != None) and (self.proxy_port != None) + + +class HTTPConnectionWithTimeout(httplib.HTTPConnection): + """HTTPConnection subclass that supports timeouts""" + + def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): + httplib.HTTPConnection.__init__(self, host, port, strict) + self.timeout = timeout + self.proxy_info = proxy_info + + def connect(self): + """Connect to the host and port specified in __init__.""" + # Mostly verbatim from httplib.py. + msg = "getaddrinfo returns an empty list" + for res in socket.getaddrinfo(self.host, self.port, 0, + socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + try: + if self.proxy_info and self.proxy_info.isgood(): + self.sock = socks.socksocket(af, socktype, proto) + self.sock.setproxy(*self.proxy_info.astuple()) + else: + self.sock = socket.socket(af, socktype, proto) + # Different from httplib: support timeouts. + if self.timeout is not None: + self.sock.settimeout(self.timeout) + # End of difference from httplib. + if self.debuglevel > 0: + print "connect: (%s, %s)" % (self.host, self.port) + self.sock.connect(sa) + except socket.error, msg: + if self.debuglevel > 0: + print 'connect fail:', (self.host, self.port) + if self.sock: + self.sock.close() + self.sock = None + continue + break + if not self.sock: + raise socket.error, msg + +class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): + "This class allows communication via SSL." + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=None, proxy_info=None): + self.timeout = timeout + self.proxy_info = proxy_info + httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file, + cert_file=cert_file, strict=strict) + + def connect(self): + "Connect to a host on a given (SSL) port." + + if self.proxy_info and self.proxy_info.isgood(): + self.sock.setproxy(*self.proxy_info.astuple()) + sock.setproxy(*self.proxy_info.astuple()) + else: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if self.timeout is not None: + sock.settimeout(self.timeout) + sock.connect((self.host, self.port)) + ssl = socket.ssl(sock, self.key_file, self.cert_file) + self.sock = httplib.FakeSocket(sock, ssl) + + + +class Http(object): + """An HTTP client that handles: +- all methods +- caching +- ETags +- compression, +- HTTPS +- Basic +- Digest +- WSSE + +and more. + """ + def __init__(self, cache=None, timeout=None, proxy_info=None): + """The value of proxy_info is a ProxyInfo instance. + +If 'cache' is a string then it is used as a directory name +for a disk cache. Otherwise it must be an object that supports +the same interface as FileCache.""" + self.proxy_info = proxy_info + # Map domain name to an httplib connection + self.connections = {} + # The location of the cache, for now a directory + # where cached responses are held. + if cache and isinstance(cache, str): + self.cache = FileCache(cache) + else: + self.cache = cache + + # Name/password + self.credentials = Credentials() + + # Key/cert + self.certificates = KeyCerts() + + # authorization objects + self.authorizations = [] + + # If set to False then no redirects are followed, even safe ones. + self.follow_redirects = True + + # If 'follow_redirects' is True, and this is set to True then + # all redirecs are followed, including unsafe ones. + self.follow_all_redirects = False + + self.ignore_etag = False + + self.force_exception_to_status_code = False + + self.timeout = timeout + + def _auth_from_challenge(self, host, request_uri, headers, response, content): + """A generator that creates Authorization objects + that can be applied to requests. + """ + challenges = _parse_www_authenticate(response, 'www-authenticate') + for cred in self.credentials.iter(host): + for scheme in AUTH_SCHEME_ORDER: + if challenges.has_key(scheme): + yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self) + + def add_credentials(self, name, password, domain=""): + """Add a name and password that will be used + any time a request requires authentication.""" + self.credentials.add(name, password, domain) + + def add_certificate(self, key, cert, domain): + """Add a key and cert that will be used + any time a request requires authentication.""" + self.certificates.add(key, cert, domain) + + def clear_credentials(self): + """Remove all the names and passwords + that are used for authentication""" + self.credentials.clear() + self.authorizations = [] + + def _conn_request(self, conn, request_uri, method, body, headers): + for i in range(2): + try: + conn.request(method, request_uri, body, headers) + response = conn.getresponse() + except socket.gaierror: + conn.close() + raise ServerNotFoundError("Unable to find the server at %s" % conn.host) + except httplib.HTTPException, e: + if i == 0: + conn.close() + conn.connect() + continue + else: + raise + else: + content = response.read() + response = Response(response) + if method != "HEAD": + content = _decompressContent(response, content) + + break; + return (response, content) + + + def _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey): + """Do the actual request using the connection object + and also follow one level of redirects if necessary""" + + auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] + auth = auths and sorted(auths)[0][1] or None + if auth: + auth.request(method, request_uri, headers, body) + + (response, content) = self._conn_request(conn, request_uri, method, body, headers) + + if auth: + if auth.response(response, body): + auth.request(method, request_uri, headers, body) + (response, content) = self._conn_request(conn, request_uri, method, body, headers ) + response._stale_digest = 1 + + if response.status == 401: + for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): + authorization.request(method, request_uri, headers, body) + (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) + if response.status != 401: + self.authorizations.append(authorization) + authorization.response(response, body) + break + + if (self.follow_all_redirects or (method in ["GET", "HEAD"]) or response.status == 303): + if self.follow_redirects and response.status in [300, 301, 302, 303, 307]: + # Pick out the location header and basically start from the beginning + # remembering first to strip the ETag header and decrement our 'depth' + if redirections: + if not response.has_key('location') and response.status != 300: + raise RedirectMissingLocation( _("Redirected but the response is missing a Location: header."), response, content) + # Fix-up relative redirects (which violate an RFC 2616 MUST) + if response.has_key('location'): + location = response['location'] + (scheme, authority, path, query, fragment) = parse_uri(location) + if authority == None: + response['location'] = urlparse.urljoin(absolute_uri, location) + if response.status == 301 and method in ["GET", "HEAD"]: + response['-x-permanent-redirect-url'] = response['location'] + if not response.has_key('content-location'): + response['content-location'] = absolute_uri + _updateCache(headers, response, content, self.cache, cachekey) + if headers.has_key('if-none-match'): + del headers['if-none-match'] + if headers.has_key('if-modified-since'): + del headers['if-modified-since'] + if response.has_key('location'): + location = response['location'] + old_response = copy.deepcopy(response) + if not old_response.has_key('content-location'): + old_response['content-location'] = absolute_uri + redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method + (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1) + response.previous = old_response + else: + raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content) + elif response.status in [200, 203] and method == "GET": + # Don't cache 206's since we aren't going to handle byte range requests + if not response.has_key('content-location'): + response['content-location'] = absolute_uri + _updateCache(headers, response, content, self.cache, cachekey) + + return (response, content) + + +# Need to catch and rebrand some exceptions +# Then need to optionally turn all exceptions into status codes +# including all socket.* and httplib.* exceptions. + + + def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None): + """ Performs a single HTTP request. +The 'uri' is the URI of the HTTP resource and can begin +with either 'http' or 'https'. The value of 'uri' must be an absolute URI. + +The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. +There is no restriction on the methods allowed. + +The 'body' is the entity body to be sent with the request. It is a string +object. + +Any extra headers that are to be sent with the request should be provided in the +'headers' dictionary. + +The maximum number of redirect to follow before raising an +exception is 'redirections. The default is 5. + +The return value is a tuple of (response, content), the first +being and instance of the 'Response' class, the second being +a string that contains the response entity body. + """ + try: + if headers is None: + headers = {} + else: + headers = _normalize_headers(headers) + + if not headers.has_key('user-agent'): + headers['user-agent'] = "Python-httplib2/%s" % __version__ + + uri = iri2uri(uri) + + (scheme, authority, request_uri, defrag_uri) = urlnorm(uri) + + conn_key = scheme+":"+authority + if conn_key in self.connections: + conn = self.connections[conn_key] + else: + if not connection_type: + connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout + certs = list(self.certificates.iter(authority)) + if scheme == 'https' and certs: + conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], + cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info) + else: + conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info) + conn.set_debuglevel(debuglevel) + + if method in ["GET", "HEAD"] and 'range' not in headers: + headers['accept-encoding'] = 'compress, gzip' + + info = email.Message.Message() + cached_value = None + if self.cache: + cachekey = defrag_uri + cached_value = self.cache.get(cachekey) + if cached_value: + info = email.message_from_string(cached_value) + try: + content = cached_value.split('\r\n\r\n', 1)[1] + except IndexError: + self.cache.delete(cachekey) + cachekey = None + cached_value = None + else: + cachekey = None + + if method in ["PUT"] and self.cache and info.has_key('etag') and not self.ignore_etag and 'if-match' not in headers: + # http://www.w3.org/1999/04/Editing/ + headers['if-match'] = info['etag'] + + if method not in ["GET", "HEAD"] and self.cache and cachekey: + # RFC 2616 Section 13.10 + self.cache.delete(cachekey) + + if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: + if info.has_key('-x-permanent-redirect-url'): + # Should cached permanent redirects be counted in our redirection count? For now, yes. + (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) + response.previous = Response(info) + response.previous.fromcache = True + else: + # Determine our course of action: + # Is the cached entry fresh or stale? + # Has the client requested a non-cached response? + # + # There seems to be three possible answers: + # 1. [FRESH] Return the cache entry w/o doing a GET + # 2. [STALE] Do the GET (but add in cache validators if available) + # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request + entry_disposition = _entry_disposition(info, headers) + + if entry_disposition == "FRESH": + if not cached_value: + info['status'] = '504' + content = "" + response = Response(info) + if cached_value: + response.fromcache = True + return (response, content) + + if entry_disposition == "STALE": + if info.has_key('etag') and not self.ignore_etag and not 'if-none-match' in headers: + headers['if-none-match'] = info['etag'] + if info.has_key('last-modified') and not 'last-modified' in headers: + headers['if-modified-since'] = info['last-modified'] + elif entry_disposition == "TRANSPARENT": + pass + + (response, new_content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) + + if response.status == 304 and method == "GET": + # Rewrite the cache entry with the new end-to-end headers + # Take all headers that are in response + # and overwrite their values in info. + # unless they are hop-by-hop, or are listed in the connection header. + + for key in _get_end2end_headers(response): + info[key] = response[key] + merged_response = Response(info) + if hasattr(response, "_stale_digest"): + merged_response._stale_digest = response._stale_digest + _updateCache(headers, merged_response, content, self.cache, cachekey) + response = merged_response + response.status = 200 + response.fromcache = True + + elif response.status == 200: + content = new_content + else: + self.cache.delete(cachekey) + content = new_content + else: + (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey) + except Exception, e: + if self.force_exception_to_status_code: + if isinstance(e, HttpLib2ErrorWithResponse): + response = e.response + content = e.content + response.status = 500 + response.reason = str(e) + elif isinstance(e, socket.timeout): + content = "Request Timeout" + response = Response( { + "content-type": "text/plain", + "status": "408", + "content-length": len(content) + }) + response.reason = "Request Timeout" + else: + content = str(e) + response = Response( { + "content-type": "text/plain", + "status": "400", + "content-length": len(content) + }) + response.reason = "Bad Request" + else: + raise + + + return (response, content) + + + +class Response(dict): + """An object more like email.Message than httplib.HTTPResponse.""" + + """Is this response from our local cache""" + fromcache = False + + """HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. """ + version = 11 + + "Status code returned by server. " + status = 200 + + """Reason phrase returned by server.""" + reason = "Ok" + + previous = None + + def __init__(self, info): + # info is either an email.Message or + # an httplib.HTTPResponse object. + if isinstance(info, httplib.HTTPResponse): + for key, value in info.getheaders(): + self[key] = value + self.status = info.status + self['status'] = str(self.status) + self.reason = info.reason + self.version = info.version + elif isinstance(info, email.Message.Message): + for key, value in info.items(): + self[key] = value + self.status = int(self['status']) + else: + for key, value in info.iteritems(): + self[key] = value + self.status = int(self.get('status', self.status)) + + + def __getattr__(self, name): + if name == 'dict': + return self + else: + raise AttributeError, name Index: python-rest-client/rest_client/httplib2/iri2uri.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/httplib2/iri2uri.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,110 @@ +""" +iri2uri + +Converts an IRI to a URI. + +""" +__author__ = "Joe Gregorio (joe@bitworking.org)" +__copyright__ = "Copyright 2006, Joe Gregorio" +__contributors__ = [] +__version__ = "1.0.0" +__license__ = "MIT" +__history__ = """ +""" + +import urlparse + + +# Convert an IRI to a URI following the rules in RFC 3987 +# +# The characters we need to enocde and escape are defined in the spec: +# +# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD +# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF +# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD +# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD +# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD +# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD +# / %xD0000-DFFFD / %xE1000-EFFFD + +escape_range = [ + (0xA0, 0xD7FF ), + (0xE000, 0xF8FF ), + (0xF900, 0xFDCF ), + (0xFDF0, 0xFFEF), + (0x10000, 0x1FFFD ), + (0x20000, 0x2FFFD ), + (0x30000, 0x3FFFD), + (0x40000, 0x4FFFD ), + (0x50000, 0x5FFFD ), + (0x60000, 0x6FFFD), + (0x70000, 0x7FFFD ), + (0x80000, 0x8FFFD ), + (0x90000, 0x9FFFD), + (0xA0000, 0xAFFFD ), + (0xB0000, 0xBFFFD ), + (0xC0000, 0xCFFFD), + (0xD0000, 0xDFFFD ), + (0xE1000, 0xEFFFD), + (0xF0000, 0xFFFFD ), + (0x100000, 0x10FFFD) +] + +def encode(c): + retval = c + i = ord(c) + for low, high in escape_range: + if i < low: + break + if i >= low and i <= high: + retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')]) + break + return retval + + +def iri2uri(uri): + """Convert an IRI to a URI. Note that IRIs must be + passed in a unicode strings. That is, do not utf-8 encode + the IRI before passing it into the function.""" + if isinstance(uri ,unicode): + (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri) + authority = authority.encode('idna') + # For each character in 'ucschar' or 'iprivate' + # 1. encode as utf-8 + # 2. then %-encode each octet of that utf-8 + uri = urlparse.urlunsplit((scheme, authority, path, query, fragment)) + uri = "".join([encode(c) for c in uri]) + return uri + +if __name__ == "__main__": + import unittest + + class Test(unittest.TestCase): + + def test_uris(self): + """Test that URIs are invariant under the transformation.""" + invariant = [ + u"ftp://ftp.is.co.za/rfc/rfc1808.txt", + u"http://www.ietf.org/rfc/rfc2396.txt", + u"ldap://[2001:db8::7]/c=GB?objectClass?one", + u"mailto:John.Doe@example.com", + u"news:comp.infosystems.www.servers.unix", + u"tel:+1-816-555-1212", + u"telnet://192.0.2.16:80/", + u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ] + for uri in invariant: + self.assertEqual(uri, iri2uri(uri)) + + def test_iri(self): + """ Test that the right type of escaping is done for each part of the URI.""" + self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}")) + self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}")) + self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}")) + self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}")) + self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")) + self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))) + self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8'))) + + unittest.main() + + Index: python-rest-client/rest_client/mimeTypes.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/mimeTypes.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,57 @@ +""" + Copyright (C) 2008 Benjamin O'Steen + + This file is part of python-fedoracommons. + + python-fedoracommons is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + python-fedoracommons is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. +""" + +__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' +__author__ = "Benjamin O'Steen <bosteen@gmail.com>" +__version__ = '0.1' + +class mimeTypes(object): + def getDictionary(self): + mimetype_to_extension = {} + extension_to_mimetype = {} + mimetype_to_extension['text/plain'] = 'txt' + mimetype_to_extension['text/xml'] = 'xml' + mimetype_to_extension['text/css'] = 'css' + mimetype_to_extension['text/javascript'] = 'js' + mimetype_to_extension['text/rtf'] = 'rtf' + mimetype_to_extension['text/calendar'] = 'ics' + mimetype_to_extension['application/msword'] = 'doc' + mimetype_to_extension['application/msexcel'] = 'xls' + mimetype_to_extension['application/x-msword'] = 'doc' + mimetype_to_extension['application/vnd.ms-excel'] = 'xls' + mimetype_to_extension['application/vnd.ms-powerpoint'] = 'ppt' + mimetype_to_extension['application/pdf'] = 'pdf' + mimetype_to_extension['text/comma-separated-values'] = 'csv' + + + mimetype_to_extension['image/jpeg'] = 'jpg' + mimetype_to_extension['image/gif'] = 'gif' + mimetype_to_extension['image/jpg'] = 'jpg' + mimetype_to_extension['image/tiff'] = 'tiff' + mimetype_to_extension['image/png'] = 'png' + + # And hacky reverse lookups + for mimetype in mimetype_to_extension: + extension_to_mimetype[mimetype_to_extension[mimetype]] = mimetype + + mimetype_extension_mapping = {} + mimetype_extension_mapping.update(mimetype_to_extension) + mimetype_extension_mapping.update(extension_to_mimetype) + + return mimetype_extension_mapping Index: python-rest-client/rest_client/restful_lib.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/restful_lib.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,129 @@ +""" + Copyright (C) 2008 Benjamin O'Steen + + This file is part of python-fedoracommons. + + python-fedoracommons is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + python-fedoracommons is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. +""" + +__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' +__author__ = "Benjamin O'Steen <bosteen@gmail.com>" +__version__ = '0.1' + +import httplib2 +import urlparse +import urllib +import base64 +from base64 import encodestring + +from mimeTypes import * + +import mimetypes + +from cStringIO import StringIO + +class Connection: + def __init__(self, base_url, username=None, password=None): + self.base_url = base_url + self.username = username + m = mimeTypes() + self.mimetypes = m.getDictionary() + + self.url = urlparse.urlparse(base_url) + + (scheme, netloc, path, query, fragment) = urlparse.urlsplit(base_url) + + self.scheme = scheme + self.host = netloc + self.path = path + + # Create Http class with support for Digest HTTP Authentication, if necessary + self.h = httplib2.Http(".cache") + self.h.follow_all_redirects = True + if username and password: + self.h.add_credentials(username, password) + + def request_get(self, resource, args = None, headers={}): + return self.request(resource, "get", args, headers=headers) + + def request_delete(self, resource, args = None, headers={}): + return self.request(resource, "delete", args, headers=headers) + + def request_head(self, resource, args = None, headers={}): + return self.request(resource, "head", args, headers=headers) + + def request_post(self, resource, args = None, body = None, filename=None, headers={}): + return self.request(resource, "post", args , body = body, filename=filename, headers=headers) + + def request_put(self, resource, args = None, body = None, filename=None, headers={}): + return self.request(resource, "put", args , body = body, filename=filename, headers=headers) + + def get_content_type(self, filename): + extension = filename.split('.')[-1] + guessed_mimetype = self.mimetypes.get(extension, mimetypes.guess_type(filename)[0]) + return guessed_mimetype or 'application/octet-stream' + + def request(self, resource, method = "get", args = None, body = None, filename=None, headers={}): + params = None + path = resource + headers['User-Agent'] = 'Basic Agent' + + BOUNDARY = u'00hoYUXOnLD5RQ8SKGYVgLLt64jejnMwtO7q8XE1' + CRLF = u'\r\n' + + if filename and body: + #fn = open(filename ,'r') + #chunks = fn.read() + #fn.close() + + # Attempt to find the Mimetype + content_type = self.get_content_type(filename) + headers['Content-Type']='multipart/form-data; boundary='+BOUNDARY + encode_string = StringIO() + encode_string.write(CRLF) + encode_string.write(u'--' + BOUNDARY + CRLF) + encode_string.write(u'Content-Disposition: form-data; name="file"; filename="%s"' % filename) + encode_string.write(CRLF) + encode_string.write(u'Content-Type: %s' % content_type + CRLF) + encode_string.write(CRLF) + encode_string.write(body) + encode_string.write(CRLF) + encode_string.write(u'--' + BOUNDARY + u'--' + CRLF) + + body = encode_string.getvalue() + headers['Content-Length'] = str(len(body)) + elif body: + if not headers.get('Content-Type', None): + headers['Content-Type']='text/xml' + headers['Content-Length'] = str(len(body)) + else: + headers['Content-Type']='text/xml' + + if args: + path += u"?" + urllib.urlencode(args) + + request_path = [] + if self.path != "/": + if self.path.endswith('/'): + request_path.append(self.path[:-1]) + else: + request_path.append(self.path) + if path.startswith('/'): + request_path.append(path[1:]) + else: + request_path.append(path) + + resp, content = self.h.request(u"%s://%s%s" % (self.scheme, self.host, u'/'.join(request_path)), method.upper(), body=body, headers=headers ) + + return {u'headers':resp, u'body':content.decode('UTF-8')} Index: python-rest-client/rest_client/talis.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/talis.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,262 @@ +import urllib2 + +from urllib2 import urlparse + +from restful_lib import Connection + +from gae_restful_lib import GAE_Connection + +from datetime import datetime + +from StringIO import StringIO + +from xml.etree import ElementTree as ET + +SPARQL_ENDPOINT = "/services/sparql" +META_ENDPOINT = "/meta" +CONTENT_ENDPOINT = "/items" +JOB_REQUESTS = "/jobs" +SNAPSHOTS = "/snapshots" +SNAPSHOT_TEMPLATE = "/snapshots/%s" + +RESET_STORE_TEMPLATE = u"""<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > + <bf:JobRequest> + <rdfs:label>%s</rdfs:label> + <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#ResetDataJob"/> + <bf:startTime>%sZ</bf:startTime> + </bf:JobRequest> + </rdf:RDF>""" + +SNAPSHOT_STORE_TEMPLATE = """<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > + <bf:JobRequest> + <rdfs:label>%s</rdfs:label> + <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#SnapshotJob"/> + <bf:startTime>%sZ</bf:startTime> + </bf:JobRequest> + </rdf:RDF>""" + +SNAPSHOT_RESTORE_TEMPLATE = """<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" + xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > + <bf:JobRequest> + <rdfs:label>%s</rdfs:label> + <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#RestoreJob"/> + <bf:snapshotUri rdf:resource="%s" /> + <bf:startTime>%sZ</bf:startTime> + </bf:JobRequest> + </rdf:RDF>""" + +class RDFFormatException(Exception): + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) + +class Store(): + def __init__(self, base_store_url, username=None, password=None): + """ Base URL for the store should be pretty self-explanatory. E.g. something like + "http://api.talis.com/stores/store_name" + Only needs to enter the username/password if this class is going to tinker + with things.""" + if base_store_url.endswith('/'): + base_store_url = base_store_url[:-1] + + self.base_store_url = base_store_url + # Split the given URL + if base_store_url: + self.conn = Connection(base_store_url, username=username, password=password) + + def does_snapshot_exist(self, snapshot_filename): + # Test to see if snapshot exists: + snapshot_path = SNAPSHOT_TEMPLATE % snapshot_filename + + response = self.conn.request(snapshot_path, method = "HEAD") + + if response.get('headers') and response.get('headers').get('status'): + status = response.get('headers').get('status') + + if status in ['200', '204']: + return True + elif status.startswith('4'): + return False + # else: raise Error? + + return False + + def schedule_reset_data(self, label, at_time=None): + """Will request that the store is emptied, and label the request. + If a time is given as an ISO8601 formatted string, this will be + the scheduled time for the snapshot. Otherwise, it will use the current time.""" + if not at_time: + at_time=datetime.utcnow().isoformat().split('.')[0] + + snapshot_request = RESET_STORE_TEMPLATE % (label, at_time) + + return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) + + def schedule_snapshot_data(self, label, at_time=None): + """Will request a snapshot be made of the store. + If a time is given as an ISO8601 formatted string, this will be + the scheduled time for the snapshot. Otherwise, it will use the current time.""" + if not at_time: + at_time=datetime.utcnow().isoformat().split('.')[0] + + snapshot_request = SNAPSHOT_STORE_TEMPLATE % (label, at_time) + + return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) + + def schedule_snapshot_restore(self, label, snapshot_filename, at_time=None): + """Will request that the store is restored from a snapshot. If a time is given as + an ISO8601 formatted string, this will be the scheduled time for + the recovery. Otherwise, it will use the current time.""" + if not at_time: + at_time=datetime.utcnow().isoformat().split('.')[0] + + # Test to see if snapshot exists: + snapshot_path = SNAPSHOT_TEMPLATE % snapshot_filename + + if self.does_snapshot_exist(snapshot_filename): + snapshot_uri = "%s%s" % (self.base_store_url, snapshot_path) + snapshot_request = SNAPSHOT_RESTORE_TEMPLATE % (label, snapshot_uri, at_time) + return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) + + def submit_rdfxml(self, rdf_text): + """Puts the given RDF/XML into the Talis Store""" + return self._put_rdf(rdf_text, mimetype="application/rdf+xml") + + def _put_rdf(self, rdf_text, mimetype="application/rdf+xml"): + """Placeholder for allowing other serialisation types to be put into a + Talis store, whether the conversion takes place here, or if the Talis + store starts to accept other formats.""" + if rdf_text: + request_headers = {} + if mimetype not in ['application/rdf+xml']: + raise RDFFormatException("%s is not an allowed RDF serialisation format" % mimetype) + request_headers['Content-Type'] = mimetype + return self.conn.request_post(META_ENDPOINT, body=rdf_text, headers=request_headers) + + def _query_sparql_service(self, query, args={}): + """Low-level SPARQL query - returns the message and response headers from the server. + You may be looking for Store.sparql instead of this.""" + passed_args = {'query':query} + passed_args.update(args) + return self.conn.request_get(SPARQL_ENDPOINT, args=passed_args, headers={'Content-type':'application/x-www-form-urlencoded'}) + + def _query_search_service(self, query, args={}): + """Low-level content box query - returns the message and response headers from the server. + You may be looking for Store.search instead of this.""" + + passed_args = {'query':query} + passed_args.update(args) + + return self.conn.request_get(CONTENT_ENDPOINT, args=passed_args, headers={'Content-type':'application/x-www-form-urlencoded'} ) + + def _list_snapshots(self, passed_args={}): + return self.conn.request_get(SNAPSHOTS, args=passed_args, headers={}) + +############################################################################## +# Convenience Functions +############################################################################## + + def submit_rdfxml_from_url(self, url_to_file, headers={"Accept":"application/rdf+xml"}): + """Convenience method - downloads the file from a given url, and then pushes that + into the meta store. Currently, it doesn't put it through a parse-> reserialise + step, so that it could handle more than rdf/xml on the way it but it is a + future possibility.""" + import_rdf_connection = Connection(url_to_file) + response = import_rdf_connection.request_get("", headers=headers) + + if response.get('headers') and response.get('headers').get('status') in ['200', '204']: + request_headers = {} + + # Lowercase all response header fields, to make matching easier. + # According to HTTP spec, they should be case-insensitive + response_headers = response['headers'] + for header in response_headers: + response_headers[header.lower()] = response_headers[header] + + # Set the body content + body = response.get('body').encode('UTF-8') + + # Get the response mimetype + rdf_type = response_headers.get('content-type', None) + + return self._put_rdf(body, mimetype=rdf_type) + + def sparql(self, query, args={}): + """Performs a SPARQL query and simply returns the body of the response if successful + - if there is an issue, such as a code 404 or 500, this method will return False. + + Use the _query_sparql_service method to get hold of + the complete response in this case.""" + response = self._query_sparql_service(query, args) + headers = response.get('headers') + + status = headers.get('status', headers.get('Status')) + + if status in ['200', 200, '204', 204]: + return response.get('body').encode('UTF-8') + else: + return False + + def search(self, query, args={}): + """Performs a search query and simply returns the body of the response if successful + - if there is an issue, such as a code 404 or 500, this method will return False. + + Use the _query_search_service method to get hold of + the complete response in this case.""" + response = self._query_search_service(query, args) + headers = response.get('headers') + + status = headers.get('status', headers.get('Status')) + + if status in ['200', 200, '204', 204]: + parsed_atom = Atom_Search_Results(response.get('body').encode('UTF-8')) + return parsed_atom.get_item_list() + else: + return False + +class Item(): + def __init__(self): + self.title = None + self.link = None + +class Atom_Search_Results(): + def __init__(self, atom_text): + self.load_atom_search(atom_text) + + def load_atom_search(self, atom_text): + self.atom = ET.fromstring(atom_text) + + def get_item_list(self): + if self.atom: + items = [] + for item in self.atom.findall('{http://purl.org/rss/1.0/}item'): + item_fields = Item() + item_fields.title = item.find('{http://purl.org/rss/1.0/}title').text + item_fields.link = item.find('{http://purl.org/rss/1.0/}link').text + items.append(item_fields) + + return items + +class GAE_Store(Store): + def __init__(self, base_store_url, username=None, password=None): + """ Base URL for the store should be pretty self-explanatory. E.g. something like + "http://api.talis.com/stores/store_name" + The username and password will not do anything, until the Google app engine's + fetch library handles authentication, if ever.""" + if base_store_url.endswith('/'): + base_store_url = base_store_url[:-1] + + self.base_store_url = base_store_url + # Split the given URL + if base_store_url: + self.conn = GAE_Connection(base_store_url, username, password) + Index: python-rest-client/restful_lib.py =================================================================== --- python-rest-client.orig/restful_lib.py 2008-05-15 06:37:57.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,129 +0,0 @@ -""" - Copyright (C) 2008 Benjamin O'Steen - - This file is part of python-fedoracommons. - - python-fedoracommons is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - python-fedoracommons is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with python-fedoracommons. If not, see <http://www.gnu.org/licenses/>. -""" - -__license__ = 'GPL http://www.gnu.org/licenses/gpl.txt' -__author__ = "Benjamin O'Steen <bosteen@gmail.com>" -__version__ = '0.1' - -import httplib2 -import urlparse -import urllib -import base64 -from base64 import encodestring - -from mimeTypes import * - -import mimetypes - -from cStringIO import StringIO - -class Connection: - def __init__(self, base_url, username=None, password=None): - self.base_url = base_url - self.username = username - m = mimeTypes() - self.mimetypes = m.getDictionary() - - self.url = urlparse.urlparse(base_url) - - (scheme, netloc, path, query, fragment) = urlparse.urlsplit(base_url) - - self.scheme = scheme - self.host = netloc - self.path = path - - # Create Http class with support for Digest HTTP Authentication, if necessary - self.h = httplib2.Http(".cache") - self.h.follow_all_redirects = True - if username and password: - self.h.add_credentials(username, password) - - def request_get(self, resource, args = None, headers={}): - return self.request(resource, "get", args, headers=headers) - - def request_delete(self, resource, args = None, headers={}): - return self.request(resource, "delete", args, headers=headers) - - def request_head(self, resource, args = None, headers={}): - return self.request(resource, "head", args, headers=headers) - - def request_post(self, resource, args = None, body = None, filename=None, headers={}): - return self.request(resource, "post", args , body = body, filename=filename, headers=headers) - - def request_put(self, resource, args = None, body = None, filename=None, headers={}): - return self.request(resource, "put", args , body = body, filename=filename, headers=headers) - - def get_content_type(self, filename): - extension = filename.split('.')[-1] - guessed_mimetype = self.mimetypes.get(extension, mimetypes.guess_type(filename)[0]) - return guessed_mimetype or 'application/octet-stream' - - def request(self, resource, method = "get", args = None, body = None, filename=None, headers={}): - params = None - path = resource - headers['User-Agent'] = 'Basic Agent' - - BOUNDARY = u'00hoYUXOnLD5RQ8SKGYVgLLt64jejnMwtO7q8XE1' - CRLF = u'\r\n' - - if filename and body: - #fn = open(filename ,'r') - #chunks = fn.read() - #fn.close() - - # Attempt to find the Mimetype - content_type = self.get_content_type(filename) - headers['Content-Type']='multipart/form-data; boundary='+BOUNDARY - encode_string = StringIO() - encode_string.write(CRLF) - encode_string.write(u'--' + BOUNDARY + CRLF) - encode_string.write(u'Content-Disposition: form-data; name="file"; filename="%s"' % filename) - encode_string.write(CRLF) - encode_string.write(u'Content-Type: %s' % content_type + CRLF) - encode_string.write(CRLF) - encode_string.write(body) - encode_string.write(CRLF) - encode_string.write(u'--' + BOUNDARY + u'--' + CRLF) - - body = encode_string.getvalue() - headers['Content-Length'] = str(len(body)) - elif body: - if not headers.get('Content-Type', None): - headers['Content-Type']='text/xml' - headers['Content-Length'] = str(len(body)) - else: - headers['Content-Type']='text/xml' - - if args: - path += u"?" + urllib.urlencode(args) - - request_path = [] - if self.path != "/": - if self.path.endswith('/'): - request_path.append(self.path[:-1]) - else: - request_path.append(self.path) - if path.startswith('/'): - request_path.append(path[1:]) - else: - request_path.append(path) - - resp, content = self.h.request(u"%s://%s%s" % (self.scheme, self.host, u'/'.join(request_path)), method.upper(), body=body, headers=headers ) - - return {u'headers':resp, u'body':content.decode('UTF-8')} Index: python-rest-client/talis.py =================================================================== --- python-rest-client.orig/talis.py 2008-06-03 06:08:58.000000000 -0300 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,262 +0,0 @@ -import urllib2 - -from urllib2 import urlparse - -from restful_lib import Connection - -from gae_restful_lib import GAE_Connection - -from datetime import datetime - -from StringIO import StringIO - -from xml.etree import ElementTree as ET - -SPARQL_ENDPOINT = "/services/sparql" -META_ENDPOINT = "/meta" -CONTENT_ENDPOINT = "/items" -JOB_REQUESTS = "/jobs" -SNAPSHOTS = "/snapshots" -SNAPSHOT_TEMPLATE = "/snapshots/%s" - -RESET_STORE_TEMPLATE = u"""<rdf:RDF - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" - xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > - <bf:JobRequest> - <rdfs:label>%s</rdfs:label> - <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#ResetDataJob"/> - <bf:startTime>%sZ</bf:startTime> - </bf:JobRequest> - </rdf:RDF>""" - -SNAPSHOT_STORE_TEMPLATE = """<rdf:RDF - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" - xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > - <bf:JobRequest> - <rdfs:label>%s</rdfs:label> - <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#SnapshotJob"/> - <bf:startTime>%sZ</bf:startTime> - </bf:JobRequest> - </rdf:RDF>""" - -SNAPSHOT_RESTORE_TEMPLATE = """<rdf:RDF - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" - xmlns:bf="http://schemas.talis.com/2006/bigfoot/configuration#" > - <bf:JobRequest> - <rdfs:label>%s</rdfs:label> - <bf:jobType rdf:resource="http://schemas.talis.com/2006/bigfoot/configuration#RestoreJob"/> - <bf:snapshotUri rdf:resource="%s" /> - <bf:startTime>%sZ</bf:startTime> - </bf:JobRequest> - </rdf:RDF>""" - -class RDFFormatException(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class Store(): - def __init__(self, base_store_url, username=None, password=None): - """ Base URL for the store should be pretty self-explanatory. E.g. something like - "http://api.talis.com/stores/store_name" - Only needs to enter the username/password if this class is going to tinker - with things.""" - if base_store_url.endswith('/'): - base_store_url = base_store_url[:-1] - - self.base_store_url = base_store_url - # Split the given URL - if base_store_url: - self.conn = Connection(base_store_url, username=username, password=password) - - def does_snapshot_exist(self, snapshot_filename): - # Test to see if snapshot exists: - snapshot_path = SNAPSHOT_TEMPLATE % snapshot_filename - - response = self.conn.request(snapshot_path, method = "HEAD") - - if response.get('headers') and response.get('headers').get('status'): - status = response.get('headers').get('status') - - if status in ['200', '204']: - return True - elif status.startswith('4'): - return False - # else: raise Error? - - return False - - def schedule_reset_data(self, label, at_time=None): - """Will request that the store is emptied, and label the request. - If a time is given as an ISO8601 formatted string, this will be - the scheduled time for the snapshot. Otherwise, it will use the current time.""" - if not at_time: - at_time=datetime.utcnow().isoformat().split('.')[0] - - snapshot_request = RESET_STORE_TEMPLATE % (label, at_time) - - return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) - - def schedule_snapshot_data(self, label, at_time=None): - """Will request a snapshot be made of the store. - If a time is given as an ISO8601 formatted string, this will be - the scheduled time for the snapshot. Otherwise, it will use the current time.""" - if not at_time: - at_time=datetime.utcnow().isoformat().split('.')[0] - - snapshot_request = SNAPSHOT_STORE_TEMPLATE % (label, at_time) - - return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) - - def schedule_snapshot_restore(self, label, snapshot_filename, at_time=None): - """Will request that the store is restored from a snapshot. If a time is given as - an ISO8601 formatted string, this will be the scheduled time for - the recovery. Otherwise, it will use the current time.""" - if not at_time: - at_time=datetime.utcnow().isoformat().split('.')[0] - - # Test to see if snapshot exists: - snapshot_path = SNAPSHOT_TEMPLATE % snapshot_filename - - if self.does_snapshot_exist(snapshot_filename): - snapshot_uri = "%s%s" % (self.base_store_url, snapshot_path) - snapshot_request = SNAPSHOT_RESTORE_TEMPLATE % (label, snapshot_uri, at_time) - return self.conn.request_post(JOB_REQUESTS, body = snapshot_request, headers={'Content-Type':'application/rdf+xml'}) - - def submit_rdfxml(self, rdf_text): - """Puts the given RDF/XML into the Talis Store""" - return self._put_rdf(rdf_text, mimetype="application/rdf+xml") - - def _put_rdf(self, rdf_text, mimetype="application/rdf+xml"): - """Placeholder for allowing other serialisation types to be put into a - Talis store, whether the conversion takes place here, or if the Talis - store starts to accept other formats.""" - if rdf_text: - request_headers = {} - if mimetype not in ['application/rdf+xml']: - raise RDFFormatException("%s is not an allowed RDF serialisation format" % mimetype) - request_headers['Content-Type'] = mimetype - return self.conn.request_post(META_ENDPOINT, body=rdf_text, headers=request_headers) - - def _query_sparql_service(self, query, args={}): - """Low-level SPARQL query - returns the message and response headers from the server. - You may be looking for Store.sparql instead of this.""" - passed_args = {'query':query} - passed_args.update(args) - return self.conn.request_get(SPARQL_ENDPOINT, args=passed_args, headers={'Content-type':'application/x-www-form-urlencoded'}) - - def _query_search_service(self, query, args={}): - """Low-level content box query - returns the message and response headers from the server. - You may be looking for Store.search instead of this.""" - - passed_args = {'query':query} - passed_args.update(args) - - return self.conn.request_get(CONTENT_ENDPOINT, args=passed_args, headers={'Content-type':'application/x-www-form-urlencoded'} ) - - def _list_snapshots(self, passed_args={}): - return self.conn.request_get(SNAPSHOTS, args=passed_args, headers={}) - -############################################################################## -# Convenience Functions -############################################################################## - - def submit_rdfxml_from_url(self, url_to_file, headers={"Accept":"application/rdf+xml"}): - """Convenience method - downloads the file from a given url, and then pushes that - into the meta store. Currently, it doesn't put it through a parse-> reserialise - step, so that it could handle more than rdf/xml on the way it but it is a - future possibility.""" - import_rdf_connection = Connection(url_to_file) - response = import_rdf_connection.request_get("", headers=headers) - - if response.get('headers') and response.get('headers').get('status') in ['200', '204']: - request_headers = {} - - # Lowercase all response header fields, to make matching easier. - # According to HTTP spec, they should be case-insensitive - response_headers = response['headers'] - for header in response_headers: - response_headers[header.lower()] = response_headers[header] - - # Set the body content - body = response.get('body').encode('UTF-8') - - # Get the response mimetype - rdf_type = response_headers.get('content-type', None) - - return self._put_rdf(body, mimetype=rdf_type) - - def sparql(self, query, args={}): - """Performs a SPARQL query and simply returns the body of the response if successful - - if there is an issue, such as a code 404 or 500, this method will return False. - - Use the _query_sparql_service method to get hold of - the complete response in this case.""" - response = self._query_sparql_service(query, args) - headers = response.get('headers') - - status = headers.get('status', headers.get('Status')) - - if status in ['200', 200, '204', 204]: - return response.get('body').encode('UTF-8') - else: - return False - - def search(self, query, args={}): - """Performs a search query and simply returns the body of the response if successful - - if there is an issue, such as a code 404 or 500, this method will return False. - - Use the _query_search_service method to get hold of - the complete response in this case.""" - response = self._query_search_service(query, args) - headers = response.get('headers') - - status = headers.get('status', headers.get('Status')) - - if status in ['200', 200, '204', 204]: - parsed_atom = Atom_Search_Results(response.get('body').encode('UTF-8')) - return parsed_atom.get_item_list() - else: - return False - -class Item(): - def __init__(self): - self.title = None - self.link = None - -class Atom_Search_Results(): - def __init__(self, atom_text): - self.load_atom_search(atom_text) - - def load_atom_search(self, atom_text): - self.atom = ET.fromstring(atom_text) - - def get_item_list(self): - if self.atom: - items = [] - for item in self.atom.findall('{http://purl.org/rss/1.0/}item'): - item_fields = Item() - item_fields.title = item.find('{http://purl.org/rss/1.0/}title').text - item_fields.link = item.find('{http://purl.org/rss/1.0/}link').text - items.append(item_fields) - - return items - -class GAE_Store(Store): - def __init__(self, base_store_url, username=None, password=None): - """ Base URL for the store should be pretty self-explanatory. E.g. something like - "http://api.talis.com/stores/store_name" - The username and password will not do anything, until the Google app engine's - fetch library handles authentication, if ever.""" - if base_store_url.endswith('/'): - base_store_url = base_store_url[:-1] - - self.base_store_url = base_store_url - # Split the given URL - if base_store_url: - self.conn = GAE_Connection(base_store_url, username, password) - Index: python-rest-client/MANIFEST.in =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/MANIFEST.in 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1,4 @@ +include MANIFEST +include MANIFEST.in +include setup.py +global-exclude .svn Index: python-rest-client/rest_client/__init__.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/rest_client/__init__.py 2009-11-10 13:26:57.000000000 -0400 @@ -0,0 +1 @@ +from restful_lib import * Index: python-rest-client/setup.py =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ python-rest-client/setup.py 2009-11-10 13:31:39.000000000 -0400 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +from distutils.core import setup + +long_description = """A REST Client for use in python, using httplib2 and urllib2. + +Includes a version that is suitable for use in the Google App Engine environment. +""" + +setup(name = "python-rest-client", + version = "0.2", + description = "A REST Client for Python", + long_description = long_description, + author = "Benjamin O'Steen", + author_email = "bosteen@gmail.com", + url = "http://code.google.com/p/python-rest-client/", + license = "GPL v3", + packages = ['rest_client', 'rest_client.httplib2'], + )
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor