Rev 2526: Finish http refactoring. Test suite passing. in file:///v/home/vila/src/experimental/reuse.transports/

Thu Jun 7 12:36:29 BST 2007

At file:///v/home/vila/src/experimental/reuse.transports/

------------------------------------------------------------
revno: 2526
revision-id: v.ladeuil+lp at free.fr-20070607113627-2fudc24suivry84k
parent: v.ladeuil+lp at free.fr-20070607112936-xkiqdgbkjibjjkh6
committer: Vincent Ladeuil <v.ladeuil+lp at free.fr>
branch nick: reuse.transports
timestamp: Thu 2007-06-07 13:36:27 +0200
message:
  Finish http refactoring. Test suite passing.
  
  * bzrlib/transport/http/_urllib.py:
  (HttpTransport_urllib.__init__): Simplified.
  (HttpTransport_urllib._remote_path): New method. Get rid of
  authinfo only for urllib2 purposes.
  (HttpTransport_urllib._perform): Rewrite the connection sync
  between transport and request.
  (HttpTransport_urllib._get, HttpTransport_urllib._get,
  HttpTransport_urllib._post): Updated for the new connection handling.
  
  * bzrlib/transport/http/_pycurl.py:
  (PyCurlTransport.__init__): Simplified.
  (PyCurlTransport._get_curl): New method.
  (PyCurlTransport.has, PyCurlTransport._get_full)
  (PyCurlTransport._get_ranged, PyCurlTransport._post): Use _get_curl().
modified:
  bzrlib/transport/http/_pycurl.py pycurlhttp.py-20060110060940-4e2a705911af77a6
  bzrlib/transport/http/_urllib.py _urlgrabber.py-20060113083826-0bbf7d992fbf090c
-------------- next part --------------
=== modified file 'bzrlib/transport/http/_pycurl.py'

--- a/bzrlib/transport/http/_pycurl.py	2007-06-01 20:26:46 +0000
+++ b/bzrlib/transport/http/_pycurl.py	2007-06-07 11:36:27 +0000
@@ -88,7 +88,7 @@
     """
 
     def __init__(self, base, from_transport=None):
-        super(PyCurlTransport, self).__init__(base)
+        super(PyCurlTransport, self).__init__(base, from_transport)
         if base.startswith('https'):
             # Check availability of https into pycurl supported
             # protocols
@@ -96,11 +96,17 @@
             if 'https' not in supported:
                 raise DependencyNotPresent('pycurl', 'no https support')
         self.cabundle = ca_bundle.get_ca_path()
-        if from_transport is not None:
-            self._curl = from_transport._curl
-        else:
-            mutter('using pycurl %s' % pycurl.version)
-            self._curl = pycurl.Curl()
+
+    def _get_curl(self):
+        connection = self._get_connection()
+        if connection is None:
+            # First connection ever. There is no credentials for pycurl, either
+            # the password was embedded in the URL or it's not needed. The
+            # connection for pycurl is just the Curl object, it will not
+            # connect until the first request
+            connection = pycurl.Curl()
+            self._set_connection(connection, None)
+        return connection
 
     def should_cache(self):
         """Return True if the data pulled across should be cached locally.
@@ -111,7 +117,7 @@
         """See Transport.has()"""
         # We set NO BODY=0 in _get_full, so it should be safe
         # to re-use the non-range curl object
-        curl = self._curl
+        curl = self._get_curl()
         abspath = self._remote_path(relpath)
         curl.setopt(pycurl.URL, abspath)
         self._set_curl_options(curl)
@@ -174,7 +180,7 @@
 
     def _get_full(self, relpath):
         """Make a request for the entire file"""
-        curl = self._curl
+        curl = self._get_curl()
         abspath, data, header = self._setup_get_request(curl, relpath)
         self._curl_perform(curl, header)
 
@@ -191,7 +197,7 @@
 
     def _get_ranged(self, relpath, ranges, tail_amount):
         """Make a request for just part of the file."""
-        curl = self._curl
+        curl = self._get_curl()
         abspath, data, header = self._setup_get_request(curl, relpath)
 
         range_header = self.attempted_range_header(ranges, tail_amount)
@@ -212,10 +218,10 @@
 
     def _post(self, body_bytes):
         fake_file = StringIO(body_bytes)
-        curl = self._curl
-        # Other places that use _base_curl for GET requests explicitly set
-        # HTTPGET, so it should be safe to re-use the same object for both GETs
-        # and POSTs.
+        curl = self._get_curl()
+        # Other places that use the Curl object (returned by _get_curl)
+        # for GET requests explicitly set HTTPGET, so it should be safe to
+        # re-use the same object for both GETs and POSTs.
         curl.setopt(pycurl.POST, 1)
         curl.setopt(pycurl.POSTFIELDSIZE, len(body_bytes))
         curl.setopt(pycurl.READFUNCTION, fake_file.read)

=== modified file 'bzrlib/transport/http/_urllib.py'
--- a/bzrlib/transport/http/_urllib.py	2007-06-01 20:26:46 +0000
+++ b/bzrlib/transport/http/_urllib.py	2007-06-07 11:36:27 +0000
@@ -18,7 +18,10 @@
 import urllib
 import urlparse
 
-from bzrlib import errors
+from bzrlib import (
+    errors,
+    urlutils,
+    )
 from bzrlib.trace import mutter
 from bzrlib.transport import register_urlparse_netloc_protocol
 from bzrlib.transport.http import HttpTransportBase
@@ -45,59 +48,61 @@
     _opener_class = Opener
 
     def __init__(self, base, from_transport=None):
-        """Set the base path where files will be stored."""
+        super(HttpTransport_urllib, self).__init__(base, from_transport)
         if from_transport is not None:
-            super(HttpTransport_urllib, self).__init__(base, from_transport)
-            self._connection = from_transport._connection
-            self._auth = from_transport._auth
-            self._proxy_auth = from_transport._proxy_auth
-
             self._opener = from_transport._opener
         else:
-            # urllib2 will be confused if it find authentication
-            # info in the urls. So we handle them separatly.
-            # Note: we don't need to when cloning because it was
-            # already done.
-            clean_base, user, password = extract_credentials(base)
-            super(HttpTransport_urllib, self).__init__(clean_base,
-                                                       from_transport)
-            self._connection = None
             self._opener = self._opener_class()
 
-            authuri = extract_authentication_uri(self._remote_path(self._path))
-            self._auth = {'user': user, 'password': password,
-                          'authuri': authuri}
+    def _remote_path(self, relpath):
+        """Produce absolute path, adjusting protocol."""
+        relative = urlutils.unescape(relpath).encode('utf-8')
+        path = self._combine_paths(self._path, relative)
+        # urllib2 will be confused if it find authentication
+        # info (user, password) in the urls. So we handle them separatly.
+        return self._unsplit_url(self._unqualified_scheme,
+                                 None, None, self._host, self._port, path)
+
+    def _perform(self, request):
+        """Send the request to the server and handles common errors.
+
+        :returns: urllib2 Response object
+        """
+        connection = self._get_connection()
+        if connection is not None:
+            # Give back shared info
+            request.connection = connection
+            (auth, proxy_auth) = self._get_credentials()
+        else:
+            # First request, intialize credentials
+            user = self._user
+            password = self._password
+            authuri = self._remote_path('.')
+            auth = {'user': user, 'password': password, 'authuri': authuri}
+
             if user and password is not None: # '' is a valid password
                 # Make the (user, password) available to urllib2
                 # We default to a realm of None to catch them all.
                 self._opener.password_manager.add_password(None, authuri,
                                                            user, password)
-            self._proxy_auth = {}
-
-    def _perform(self, request):
-        """Send the request to the server and handles common errors.
-
-        :returns: urllib2 Response object
-        """
-        if self._connection is not None:
-            # Give back shared info
-            request.connection = self._connection
+            proxy_auth = {}
         # Ensure authentication info is provided
-        request.auth = self._auth
-        request.proxy_auth = self._proxy_auth
+        request.auth = auth
+        request.proxy_auth = proxy_auth
 
         mutter('%s: [%s]' % (request.method, request.get_full_url()))
         if self._debuglevel > 0:
             print 'perform: %s base: %s, url: %s' % (request.method, self.base,
                                                      request.get_full_url())
         response = self._opener.open(request)
-        if self._connection is None:
-            # Acquire connection when the first request is able
-            # to connect to the server
-            self._connection = request.connection
-        # Always get auth parameters, they may change
-        self._auth = request.auth
-        self._proxy_auth = request.proxy_auth
+        if self._get_connection() is not request.connection:
+            # First connection or reconnection
+            self._set_connection(request.connection,
+                                 (request.auth, request.proxy_auth))
+        else:
+            # http may change the credentials while keeping the
+            # connection opened
+            self._update_credentials((request.auth, request.proxy_auth))
 
         code = response.code
         if request.follow_redirections is False \
@@ -129,12 +134,12 @@
 
         code = response.code
         if code == 404: # not found
-            self._connection.fake_close()
+            self._get_connection().fake_close()
             raise errors.NoSuchFile(abspath)
 
         data = handle_response(abspath, code, response.headers, response)
         # Close response to free the httplib.HTTPConnection pipeline
-        self._connection.fake_close()
+        self._get_connection().fake_close()
         return code, data
 
     def _post(self, body_bytes):
@@ -143,7 +148,7 @@
         code = response.code
         data = handle_response(abspath, code, response.headers, response)
         # Close response to free the httplib.HTTPConnection pipeline
-        self._connection.fake_close()
+        self._get_connection().fake_close()
         return code, data
 
     def should_cache(self):
@@ -160,7 +165,7 @@
         request = Request('HEAD', abspath)
         response = self._perform(request)
 
-        self._connection.fake_close()
+        self._get_connection().fake_close()
         return response
 
     def has(self, relpath):