Ben Fidge said:
Daniele,
Thanks for the help. I should have specified my criteria
a little more clearly.
Basically, I've written a small backup utility that
allows the backing up of a web-sites content via a Web
Service running on that site.
The site is large (100,000+ aspx, xml, xsl etc files) and
the web service is taking too long to compile the list of
files to download.
The backup utility needs to specify filemasks for
including and excluding in the final result set. So,
hypothetically speaking, a backup set could be as follwos:
include inetpub\wwwroot\*.aspx recursively
include inetpub\wwwroot\*.xml recursively
include inetpub\wwwroot\*.xsl recursively
exclude inetpub\wwwroot\data\*.xml
exclude inetpub\wwwroot\data\savepage.aspx
Your method is certainly very fast, but is it possible to
tailor it for unlimited arbitrary include and exclude
filemasks?
The method I currently use is taking about 90 seconds to
execute on our web server and the Web Service call is
timing out, hence the need for the most efficient file
gathering algorithm.
Thanks for your time, you'll be on my Christmas Card list
if you can solve this one!!
Ben Fidge
I'm not to fond of this, but it seems to work to an extent. This code runs
across my C: drive(80k files) in 12 seconds. Perhaps filtering out to scan
each folder one time and using regex to match both includes and excludes
will be faster, but I don't have any more time to spend on this today, I
hope it helps:
string[] GetDirectories(string startingPoint, params string[]
searchPatterns)
{
ArrayList directories = new ArrayList();
if (startingPoint == "c:\\System Volume Information")
return (string[])directories.ToArray(typeof(string));
foreach (string directory in Directory.GetDirectories(startingPoint))
{
string[] results = GetDirectories(directory,searchPatterns);
if (directory == "c:\\System Volume Information")
continue;
foreach (string searchPattern in searchPatterns)
directories.Add(directory + "\\" + searchPattern);
directories.AddRange(results);
}
return (string[])directories.ToArray(typeof(string));
}
/// <summary>
/// Returns a fileset based on a list of include and exclude patterns
/// </summary>
/// <param name="include">An IList of wildcard patterns to include</param>
/// <param name="exclude">An IList of wildcard patterns to exclude</param>
/// <returns>An array of strings containing the resultant set</returns>
string[] LoadFiles(IList include, IList exclude)
{
ArrayList files = new ArrayList();
PrepareFilter(exclude);
foreach (string mask in include)
{
string directory = Path.GetDirectoryName(mask);
string fileMask = Path.GetFileName(mask);
string[] filesFound = Directory.GetFiles(directory,fileMask);
filesFound = Filter(filesFound,exclude);
files.AddRange(filesFound);
}
return (string[])files.ToArray(typeof(string));
}
/// <summary>
/// Filters out files based on a list of exclusions
/// </summary>
/// <param name="files">The files to filter</param>
/// <param name="exclude">An IList of patterns to use for exclusion
matching</param>
/// <returns>An array of strings that passed the filter</returns>
string[] Filter(string[] files, IList exclude)
{
ArrayList results = new ArrayList();
foreach (string result in files)
{
bool include = true;
foreach (string filter in exclude)
{
Regex regex;
regex = regexCache[filter] as Regex;
if (regex == null)
{
regex = PrepareFilter(filter,WildcardToRegex(filter));
}
if (regex.IsMatch(result))
{
include = false;
break;
}
}
if (include)
results.Add(result);
}
return (string[])results.ToArray(typeof(string));
}
/// <summary>
/// Prepares filtration by compiling and caching Regex statements
/// </summary>
/// <param name="exclude">A list of Wildcard patterns to prepare</param>
void PrepareFilter(IList patterns)
{
foreach (string path in patterns)
{
string result = WildcardToRegex(path);
PrepareFilter(path,result);
}
}
/// <summary>
/// Prepares and adds a single regex pattern to the regex cache
/// </summary>
/// <param name="name">the pattern name</param>
/// <param name="pattern">The pattern</param>
/// <returns>A Regex object to perform filtration for that
pattern</returns>
Regex PrepareFilter(string name, string pattern)
{
if (regexCache.Contains(name))
return regexCache[name] as Regex;
Regex regex = new Regex(pattern,RegexOptions.Compiled |
RegexOptions.IgnoreCase );
PrepareFilter(name,regex);
return regex;
}
/// <summary>
/// Adds a Regex to the regexcache.
/// </summary>
/// <param name="name">The regex name</param>
/// <param name="regex">The Regex object to cache</param>
/// <returns>the Regex passed in as regex, this is done simply to provide
return type compatability with PrepareFilter(string,string)</returns>
Regex PrepareFilter(string name, Regex regex)
{
if (regexCache.Contains(name))
return regexCache[name] as Regex;
regexCache.Add(name,regex);
return regex;
}
/// <summary>
/// Converts dos wildcard(*) string into regex in a generic manner
/// </summary>
/// <param name="pattern">The pattern to convert</param>
/// <returns>The converted regex string.</returns>
/// <remarks>
/// This method does not handle ? in patterns, if this support is needed
it should be added here.
/// </remarks>
string WildcardToRegex(string pattern)
{
pattern = pattern.Replace(@"\",@"\\");
pattern = pattern.Replace("^",@"\^");
pattern = pattern.Replace(".",@"\.");
pattern = pattern.Replace("*",".*");
pattern = pattern + "$";
return pattern;
}
IDictionary regexCache = new Hashtable();