P
Polanski24
Hello!
During my app testing I discovered the following bug in .NET v2.0 (have
not tested 1.1 yet).
Constructors of StreamReader supposed to detect byte order mark fail to
do so.
Simple test case is below just feed it with files with different BOM
and one can see that StreamReader encoding is always default
UTF8Encoding disregard for BOM of file.
In case somone needs BOM detection use code below instead of
StringReader constructors.
StreamReader reader = null;
System.IO.FileStream file = null;
Encoding enc = null;
try
{
file = new System.IO.FileStream(path,
FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
if (file.CanSeek)
{
byte[] bom = new byte[4]; // Get the byte-order mark, if there is one
file.Read(bom, 0, 4);
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf){
enc = Encoding.UTF8;
} // utf-8
else if (bom[0] == 0xff && bom[1] == 0xfe){
enc = Encoding.Unicode;
} // ucs-2le, ucs-4le, and ucs-16le
else if (bom[0] == 0xfe && bom[1] == 0xff) {
enc = Encoding.Unicode;
} // utf-16 and ucs-2
else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] ==
0xff) // ucs-4
{
enc = System.Text.Encoding.UTF32;
}
else
{
enc = System.Text.Encoding.ASCII;
}
file.Close();
}
reader = new StreamReader(path, true);
Trace.WriteLine("StreamReader encoding: " + reader.CurrentEncoding);
Trace.WriteLine("BOM detected encoding: " + enc.ToString());
}
catch (Exception ex)
{
Trace.WriteLine(ex.ToString());
}
finally
{
if (reader != null) reader.Close();
if (file != null) file.Close();
}
Cheers,
http://sourceforge.net/projects/ngmp
During my app testing I discovered the following bug in .NET v2.0 (have
not tested 1.1 yet).
Constructors of StreamReader supposed to detect byte order mark fail to
do so.
Simple test case is below just feed it with files with different BOM
and one can see that StreamReader encoding is always default
UTF8Encoding disregard for BOM of file.
In case somone needs BOM detection use code below instead of
StringReader constructors.
StreamReader reader = null;
System.IO.FileStream file = null;
Encoding enc = null;
try
{
file = new System.IO.FileStream(path,
FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
if (file.CanSeek)
{
byte[] bom = new byte[4]; // Get the byte-order mark, if there is one
file.Read(bom, 0, 4);
if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf){
enc = Encoding.UTF8;
} // utf-8
else if (bom[0] == 0xff && bom[1] == 0xfe){
enc = Encoding.Unicode;
} // ucs-2le, ucs-4le, and ucs-16le
else if (bom[0] == 0xfe && bom[1] == 0xff) {
enc = Encoding.Unicode;
} // utf-16 and ucs-2
else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] ==
0xff) // ucs-4
{
enc = System.Text.Encoding.UTF32;
}
else
{
enc = System.Text.Encoding.ASCII;
}
file.Close();
}
reader = new StreamReader(path, true);
Trace.WriteLine("StreamReader encoding: " + reader.CurrentEncoding);
Trace.WriteLine("BOM detected encoding: " + enc.ToString());
}
catch (Exception ex)
{
Trace.WriteLine(ex.ToString());
}
finally
{
if (reader != null) reader.Close();
if (file != null) file.Close();
}
Cheers,
http://sourceforge.net/projects/ngmp