J
John H.
I needed to parse a user search string and then create an SQL where-
clause by applying the user’s tokenized search criteria to a list of
fields from a table or view. I fully admit that I may have missed
something and reinvented a wheel here, but neither the string.split()
method or the RegEx() object seemed to be able to accomplish what I
wanted as far as search string parsing. If this can be accomplished
by functionality already in the .Net framework I’d be interested in
find out how.
My solution consists of a class with three methods
-------------------------------------------------------------------------
- string[] ParseSearchString(string p_InputString)
- bool IsSearchTermToken(string token)
- string CreateWhereClause(string[] fields, string[] tokens)
Basic rules of parsing search string
----------------------------------------------------
- A space between two search terms implies an ‘and’. For example,
‘John Smith’ would mean ‘John’ and ‘Smith’
- Use double quotes to create a search term with imbedded parentheses
or spaces. For example, “(c) John Smith” would be interpreted as a
single search term.
- Parentheses can be used to group search terms.
- Parentheses may or may not be separated from other tokens with
spaces.
================
The Code
================
static public class StringParser
{
public static string[] ParseSearchString(string p_InputString)
{
char[] delimiters = new char[] { ' ', '(', ')' };
ArrayList searchTokens = new ArrayList();
int parensOpen = 0;
int parensClose = 0;
string token = "";
int doubleQuoteChars = 0;
Char[] chars = p_InputString.ToCharArray();
for (int c = 0; c < chars.Count(); c++)
{ if (chars[c] == '\"')
{ if (doubleQuoteChars % 2 == 0) // 0 = opening
quote char
//
1 = closing quote char
{ // start tracking a new quoted search term
doubleQuoteChars++;
}
else
{ // mark end of quoted search term
doubleQuoteChars++;
searchTokens.Add(token);
token = "";
}
}
else if (doubleQuoteChars % 2 == 0 &&
delimiters.Contains(chars[c]))
{ if (token != "")
{ searchTokens.Add(token);
token = "";
}
if (chars[c] == '(')
{ searchTokens.Add("(");
parensOpen++;
}
if (chars[c] == ')')
{ searchTokens.Add(")");
parensClose++;
}
}
else
{ token += chars[c];
}
}
if (token != "")
{ searchTokens.Add(token); // add the trailing token
}
if (doubleQuoteChars % 2 == 1)
{ throw new Exception("Invalid search string: Unmatched
quotes. "
+ doubleQuoteChars.ToString() + " double quote
characters found in search string.");
}
if (parensOpen != parensClose)
{ throw new Exception("Invalid search string: Unmatched
parentheses"
+ Environment.NewLine + Environment.NewLine
+ "The search string has " + parensOpen.ToString()
+ " open parentheses, and "
+ parensClose.ToString() + " close parentheses.");
}
for (int t = 1; t < searchTokens.Count; t++)
{ /* Replace implied "and" with explicit "and"
*
* 1st-Token 2nd-Token Insert "and"
* --------- --------- ------------
* SrchTerm SrchTerm Yes
* ')' SrchTerm Yes
* SrchTerm '(' Yes
* '(' SrchTerm No
* SrchTerm ')' No
*/
bool IsToken1SearchTerm =
(IsSearchTermToken(searchTokens[t - 1].ToString()));
bool IsToken2SearchTerm =
(IsSearchTermToken(searchTokens[t].ToString()));
if ( (IsToken1SearchTerm == true &&
(IsToken2SearchTerm == true ||
searchTokens[t].ToString() == "(") ) ||
(searchTokens[t-1].ToString() == ")" &&
IsToken2SearchTerm == true ))
{
searchTokens.Insert(t, "and");
}
}
return (string[])searchTokens.ToArray(typeof(string));
}
public static bool IsSearchTermToken(string token)
{
if (token.ToLower() == "and" || token.ToLower() == "or"||
token == "(" || token == ")")
{ return false;
}
return true;
}
public static string CreateWhereClause(string[] fields,
string[] tokens)
{
string whereClause = "";
for (int t = 0; t < tokens.Count(); t++)
{ if
(JrccWinLib.StringParser.IsSearchTermToken(tokens[t]))
{ whereClause += " (";
for (int f = 0; f < fields.Count(); f++)
{ if (f > 0)
{ whereClause += " OR ";
}
whereClause += "LOWER(" + fields[f] + ") LIKE
'%" + tokens[t].ToLower() + "%'";
}
whereClause += ")";
}
else
{ // token is a '(', ')', "and", "or"
whereClause += " " + tokens[t];
}
}
if (whereClause != "")
{ whereClause = "WHERE" + whereClause;
}
return whereClause;
}
}
clause by applying the user’s tokenized search criteria to a list of
fields from a table or view. I fully admit that I may have missed
something and reinvented a wheel here, but neither the string.split()
method or the RegEx() object seemed to be able to accomplish what I
wanted as far as search string parsing. If this can be accomplished
by functionality already in the .Net framework I’d be interested in
find out how.
My solution consists of a class with three methods
-------------------------------------------------------------------------
- string[] ParseSearchString(string p_InputString)
- bool IsSearchTermToken(string token)
- string CreateWhereClause(string[] fields, string[] tokens)
Basic rules of parsing search string
----------------------------------------------------
- A space between two search terms implies an ‘and’. For example,
‘John Smith’ would mean ‘John’ and ‘Smith’
- Use double quotes to create a search term with imbedded parentheses
or spaces. For example, “(c) John Smith” would be interpreted as a
single search term.
- Parentheses can be used to group search terms.
- Parentheses may or may not be separated from other tokens with
spaces.
================
The Code
================
static public class StringParser
{
public static string[] ParseSearchString(string p_InputString)
{
char[] delimiters = new char[] { ' ', '(', ')' };
ArrayList searchTokens = new ArrayList();
int parensOpen = 0;
int parensClose = 0;
string token = "";
int doubleQuoteChars = 0;
Char[] chars = p_InputString.ToCharArray();
for (int c = 0; c < chars.Count(); c++)
{ if (chars[c] == '\"')
{ if (doubleQuoteChars % 2 == 0) // 0 = opening
quote char
//
1 = closing quote char
{ // start tracking a new quoted search term
doubleQuoteChars++;
}
else
{ // mark end of quoted search term
doubleQuoteChars++;
searchTokens.Add(token);
token = "";
}
}
else if (doubleQuoteChars % 2 == 0 &&
delimiters.Contains(chars[c]))
{ if (token != "")
{ searchTokens.Add(token);
token = "";
}
if (chars[c] == '(')
{ searchTokens.Add("(");
parensOpen++;
}
if (chars[c] == ')')
{ searchTokens.Add(")");
parensClose++;
}
}
else
{ token += chars[c];
}
}
if (token != "")
{ searchTokens.Add(token); // add the trailing token
}
if (doubleQuoteChars % 2 == 1)
{ throw new Exception("Invalid search string: Unmatched
quotes. "
+ doubleQuoteChars.ToString() + " double quote
characters found in search string.");
}
if (parensOpen != parensClose)
{ throw new Exception("Invalid search string: Unmatched
parentheses"
+ Environment.NewLine + Environment.NewLine
+ "The search string has " + parensOpen.ToString()
+ " open parentheses, and "
+ parensClose.ToString() + " close parentheses.");
}
for (int t = 1; t < searchTokens.Count; t++)
{ /* Replace implied "and" with explicit "and"
*
* 1st-Token 2nd-Token Insert "and"
* --------- --------- ------------
* SrchTerm SrchTerm Yes
* ')' SrchTerm Yes
* SrchTerm '(' Yes
* '(' SrchTerm No
* SrchTerm ')' No
*/
bool IsToken1SearchTerm =
(IsSearchTermToken(searchTokens[t - 1].ToString()));
bool IsToken2SearchTerm =
(IsSearchTermToken(searchTokens[t].ToString()));
if ( (IsToken1SearchTerm == true &&
(IsToken2SearchTerm == true ||
searchTokens[t].ToString() == "(") ) ||
(searchTokens[t-1].ToString() == ")" &&
IsToken2SearchTerm == true ))
{
searchTokens.Insert(t, "and");
}
}
return (string[])searchTokens.ToArray(typeof(string));
}
public static bool IsSearchTermToken(string token)
{
if (token.ToLower() == "and" || token.ToLower() == "or"||
token == "(" || token == ")")
{ return false;
}
return true;
}
public static string CreateWhereClause(string[] fields,
string[] tokens)
{
string whereClause = "";
for (int t = 0; t < tokens.Count(); t++)
{ if
(JrccWinLib.StringParser.IsSearchTermToken(tokens[t]))
{ whereClause += " (";
for (int f = 0; f < fields.Count(); f++)
{ if (f > 0)
{ whereClause += " OR ";
}
whereClause += "LOWER(" + fields[f] + ") LIKE
'%" + tokens[t].ToLower() + "%'";
}
whereClause += ")";
}
else
{ // token is a '(', ')', "and", "or"
whereClause += " " + tokens[t];
}
}
if (whereClause != "")
{ whereClause = "WHERE" + whereClause;
}
return whereClause;
}
}