(fix) fallback to utf-8/latin1 in rare decoding error cases - this is ugly

This commit is contained in:
Ludovic Marcotte
2015-07-23 10:22:36 -04:00
parent a8dd698425
commit 12875539bb

View File

@@ -872,6 +872,19 @@ static NSData* _sanitizeContent(NSData *theData)
usingEncodingNamed: [[bodyInfo objectForKey:@"parameterList"]
objectForKey: @"charset"]];
// In some rare cases (like #3276), we can get utterly broken email messages where
// HTML parts are wrongly encoded. We try to fall back to UTF-8 if that happens and
// if it still happens, we fall back to ISO-Latin-1.
if (!s)
{
s = [[NSString alloc] initWithData: preparsedContent encoding: NSUTF8StringEncoding];
if (!s)
s = [[NSString alloc] initWithData: preparsedContent encoding: NSISOLatin1StringEncoding];
AUTORELEASE(s);
}
#if BYTE_ORDER == BIG_ENDIAN
preparsedContent = [s dataUsingEncoding: NSUTF16BigEndianStringEncoding];
enc = XML_CHAR_ENCODING_UTF16BE;