[PATCH][MERGE] Improvements to is_ignored, take 2
Jan Hudec
bulb at ucw.cz
Thu Jan 12 22:47:41 GMT 2006
Hello,
I have improved on the is_ignored enhancements. The is_ignored_by method
should now work even when there are more than 100 patterns. No test cases
added to check that yet though.
It is at: <http://www.ucw.cz/~bulb/bzr/bzr.ignore/>. Please review and
comment. The diff again follows.
=== modified file 'bzrlib/add.py'
--- bzrlib/add.py
+++ bzrlib/add.py
@@ -155,7 +155,7 @@
if subf == bzrlib.BZRDIR:
mutter("skip control directory %r", subp)
else:
- ignore_glob = tree.is_ignored(subp)
+ ignore_glob = tree.is_ignored_by(subp)
if ignore_glob is not None:
mutter("skip ignored sub-file %r", subp)
if ignore_glob not in ignored:
=== modified file 'bzrlib/builtins.py'
--- bzrlib/builtins.py
+++ bzrlib/builtins.py
@@ -1135,7 +1135,7 @@
if file_class != 'I':
continue
## XXX: Slightly inefficient since this was already calculated
- pat = tree.is_ignored(path)
+ pat = tree.is_ignored_by(path)
print '%-50s %s' % (path, pat)
=== modified file 'bzrlib/workingtree.py'
--- bzrlib/workingtree.py
+++ bzrlib/workingtree.py
@@ -113,6 +113,45 @@
return gen_file_id('TREE_ROOT')
+def _glob_to_regex(pat):
+ r"""Convert a unix glob to regular expression.
+
+ Patterns containing '/' or '\' need to match whole path; others match
+ against only the last component - as per requirement of
+ WorkingTree.is_ignored().
+
+ Pattern is returned as string.
+ """
+ # TODO: For now we use fnmatch.translate, which is Broken(tm). New,
+ # correct, translator that would handle '**' for matching paths and other
+ # extended globbing stuff from cvs/rsync should be implemented.
+
+ # XXX: Shouldn't the globs be actually UNICODE?
+
+ # XXX: fnmatch is actually not quite what we want: it's only
+ # approximately the same as real Unix fnmatch, and doesn't
+ # treat dotfiles correctly and allows * to match /.
+ # Eventually it should be replaced with something more
+ # accurate.
+
+ if '/' in pat or '\\' in pat:
+ if (pat[:2] == './') or (pat[:2] == '.\\'):
+ pat = pat[2:]
+ return fnmatch.translate(pat)
+ else:
+ # XXX: Is the path normalized? Should we match [/\\] ?
+ return '(?:.*/)?' + fnmatch.translate(pat)
+
+
+def _glob_list_to_regex(pats, wrap='(?:%s)'):
+ """Convert a list of unix globs to a regular expression.
+
+ The pattern is returned as string. The wrap is % format applied to each
+ individual glob pattern. It has to apply group.
+ """
+ return '|'.join([wrap % _glob_to_regex(x) for x in pats])
+
+
class TreeEntry(object):
"""An entry that implements the minium interface used by commands.
@@ -712,11 +751,10 @@
def ignored_files(self):
- """Yield list of PATH, IGNORE_PATTERN"""
+ """Yield list of paths"""
for subp in self.extras():
- pat = self.is_ignored(subp)
- if pat != None:
- yield subp, pat
+ if self.is_ignored(subp):
+ yield subp
def get_ignore_list(self):
@@ -734,6 +772,36 @@
self._ignorelist = l
return l
+ def _get_ignore_regex(self):
+ """Return a regular expression composed of ignore patterns.
+
+ Cached in the Tree object after the first call.
+ """
+ import re
+ if not hasattr(self, '_ignoreregex'):
+ self._ignoreregex = re.compile(
+ _glob_list_to_regex(self.get_ignore_list()))
+ return self._ignoreregex
+
+ def _get_ignore_by_regex_list(self):
+ """Return regex list for is_ignored_by method.
+
+ Cached in the Tree object after the first call.
+
+ The return is a list of lists, each having pattern as the first
+ element, followed by list of globs it is composed from.
+ """
+ import re
+ if not hasattr(self, '_ignore_by_regex_list'):
+ pats = list(self.get_ignore_list()) # So we can shift...
+ self._ignore_by_regex_list = []
+ while pats:
+ self._ignore_by_regex_list.append(
+ [re.compile(_glob_list_to_regex(pats[0:50],
+ wrap='(%s)'))]
+ + pats[0:50])
+ pats[0:50] = ()
+ return self._ignore_by_regex_list
def is_ignored(self, filename):
r"""Check whether the filename matches an ignore pattern.
@@ -741,37 +809,27 @@
Patterns containing '/' or '\' need to match the whole path;
others match against only the last component.
- If the file is ignored, returns the pattern which caused it to
- be ignored, otherwise None. So this can simply be used as a
- boolean if desired."""
-
- # TODO: Use '**' to match directories, and other extended
- # globbing stuff from cvs/rsync.
-
- # XXX: fnmatch is actually not quite what we want: it's only
- # approximately the same as real Unix fnmatch, and doesn't
- # treat dotfiles correctly and allows * to match /.
- # Eventually it should be replaced with something more
- # accurate.
-
- for pat in self.get_ignore_list():
- if '/' in pat or '\\' in pat:
-
- # as a special case, you can put ./ at the start of a
- # pattern; this is good to match in the top-level
- # only;
-
- if (pat[:2] == './') or (pat[:2] == '.\\'):
- newpat = pat[2:]
- else:
- newpat = pat
- if fnmatch.fnmatchcase(filename, newpat):
- return pat
- else:
- if fnmatch.fnmatchcase(splitpath(filename)[-1], pat):
- return pat
- else:
- return None
+ If the file is ignored, returns a match object, otherwise None. So
+ this can simply be used as a boolean if desired. The match object is
+ really not very useful, because the individual patterns are not
+ captured.
+ """
+ pat = self._get_ignore_regex()
+ return pat.match(filename)
+
+ def is_ignored_by(self, filename):
+ r"""Check whether the filename matches and return the pattern it matches.
+
+ This method is similar to is_ignored, but makes the extra effort to
+ return the pattern that matched.
+ """
+
+ pats = self._get_ignore_by_regex_list()
+ for pat in pats:
+ m = pat[0].match(filename)
+ if m:
+ return pat[m.lastindex]
+ return None
def kind(self, file_id):
return file_kind(self.id2abspath(file_id))
--
Jan 'Bulb' Hudec <bulb at ucw.cz>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
Url : https://lists.ubuntu.com/archives/bazaar/attachments/20060112/b7a6efce/attachment.pgp
More information about the bazaar
mailing list