diff --git a/OpenChange/RTFHandler.m b/OpenChange/RTFHandler.m index e170cccf6..b18754e51 100644 --- a/OpenChange/RTFHandler.m +++ b/OpenChange/RTFHandler.m @@ -25,9 +25,9 @@ // // Useful macros // -#define ADVANCE _bytes++; _current_pos++; -#define ADVANCE_N(N) _bytes += (N); _current_pos += (N); -#define REWIND _bytes--; _current_pos--; +#define ADVANCE self->_bytes++; self->_current_pos++; +#define ADVANCE_N(N) self->_bytes += (N); self->_current_pos += (N); +#define REWIND self->_bytes--; self->_current_pos--; #define DEFAULT_CHARSET 1 #define FONTNAME_LEN_MAX 100 @@ -408,16 +408,47 @@ const unsigned short ansicpg874[256] = { // @implementation RTFHandler -- (id) initWithData: (NSData *) theData +static NSMapTable *_charsets = nil; +static NSMapTable *_cws = nil; +typedef enum { + CW_UNKNOWN = 0, + CW_ANSICPG, + CW_B, + CW_CF, + CW_COLORTBL, + CW_F, + CW_FONTTBL, + CW_I, + CW_PAR, + CW_PICT, + CW_SOFTLINE, + CW_STRIKE, + CW_STYLESHEET, + CW_TAB, + CW_U, + CW_UL, + CW_ULNONE +} commandWordId; + +static NSMapTable *_fontCws = nil; +typedef enum { + FONTCW_UNKNOWN = 0, + FONTCW_F, + FONTCW_FBIDI, + FONTCW_FCHARSET, + FONTCW_FDECOR, + FONTCW_FMODERN, + FONTCW_FNIL, + FONTCW_FPRQ, + FONTCW_FROMAN, + FONTCW_FSCRIPT, + FONTCW_FSWISS, + FONTCW_FTECH +} fontCommandWordId; + +static void _init_charsets_table() { - if ((self = [super init])) - { - ASSIGN(_data, theData); - _bytes = (char *)[_data bytes]; - _len = [_data length]; - _current_pos = 0; - - _charsets = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 10); + _charsets = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 23); // 238 — Eastern European - cpg1250 NSMapInsert(_charsets, @"ansicpg1250", ansicpg1250); NSMapInsert(_charsets, [NSNumber numberWithUnsignedChar: 238], ansicpg1250); @@ -467,6 +498,59 @@ const unsigned short ansicpg874[256] = { // 136 — Big5 // 254 — PC 437 // 255 — OEM +} + +static void _init_cws_table() +{ + _cws = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 16); + NSMapInsert(_cws, @"ansicpg", (void *) CW_ANSICPG); + NSMapInsert(_cws, @"b", (void *) CW_B); + NSMapInsert(_cws, @"cf", (void *) CW_CF); + NSMapInsert(_cws, @"colortbl", (void *) CW_COLORTBL); + NSMapInsert(_cws, @"f", (void *) CW_F); + NSMapInsert(_cws, @"fonttbl", (void *) CW_FONTTBL); + NSMapInsert(_cws, @"i", (void *) CW_I); + NSMapInsert(_cws, @"par", (void *) CW_PAR); + NSMapInsert(_cws, @"pict", (void *) CW_PICT); + NSMapInsert(_cws, @"softline", (void *) CW_SOFTLINE); + NSMapInsert(_cws, @"strike", (void *) CW_STRIKE); + NSMapInsert(_cws, @"stylesheet", (void *) CW_STYLESHEET); + NSMapInsert(_cws, @"tab", (void *) CW_TAB); + NSMapInsert(_cws, @"u", (void *) CW_U); + NSMapInsert(_cws, @"ul", (void *) CW_UL); + NSMapInsert(_cws, @"ulnone", (void *) CW_ULNONE); +} + +static void _init_fontCws_table() +{ + _fontCws = NSCreateMapTable(NSObjectMapKeyCallBacks, NSNonOwnedPointerMapValueCallBacks, 23); + NSMapInsert(_fontCws, @"f", (void *) FONTCW_F); + NSMapInsert(_fontCws, @"fbidi", (void *) FONTCW_FBIDI); + NSMapInsert(_fontCws, @"fcharset", (void *) FONTCW_FCHARSET); + NSMapInsert(_fontCws, @"fdecor", (void *) FONTCW_FDECOR); + NSMapInsert(_fontCws, @"fmodern", (void *) FONTCW_FMODERN); + NSMapInsert(_fontCws, @"fnil", (void *) FONTCW_FNIL); + NSMapInsert(_fontCws, @"fprq", (void *) FONTCW_FPRQ); + NSMapInsert(_fontCws, @"froman", (void *) FONTCW_FROMAN); + NSMapInsert(_fontCws, @"fscript", (void *) FONTCW_FSCRIPT); + NSMapInsert(_fontCws, @"fswiss", (void *) FONTCW_FSWISS); + NSMapInsert(_fontCws, @"ftech", (void *) FONTCW_FTECH); +} + +- (id) initWithData: (NSData *) theData +{ + if ((self = [super init])) + { + ASSIGN(_data, theData); + _bytes = (char *)[_data bytes]; + _len = [_data length]; + _current_pos = 0; + if (_charsets == nil) + _init_charsets_table(); + if (_cws == nil) + _init_cws_table(); + if (_fontCws == nil) + _init_fontCws_table(); } return self; @@ -600,8 +684,6 @@ const unsigned short ansicpg874[256] = { /* In other cases, the delimiting character terminates the control word and is not part of the control word. */ - - *len = end - start + 1; return start; } @@ -716,6 +798,8 @@ const unsigned short ansicpg874[256] = { unsigned int len; BOOL hasArg; int arg; + NSString *cwKey; + fontCommandWordId cwId; cw = [self parseControlWordAndSetLenIn: &len setHasIntArgumentIn: &hasArg @@ -725,76 +809,57 @@ const unsigned short ansicpg874[256] = { else if (cw == NULL) continue; - if (len == 1) - { - if (strncmp((const char*) cw, "f", len) == 0) - { - if (hasArg) - fontInfo->index = arg; - } - - } - else if (len == 4) - { - if (strncmp((const char*) cw, "fnil", len) == 0) - { - fontInfo->family = @"nil"; - } - else if (strncmp((const char*) cw, "fprq", len) == 0) - { - if (hasArg) - fontInfo->pitch = arg; - } - } - else if (len == 5) - { - if (strncmp((const char*) cw, "fbidi", len) == 0) - { - fontInfo->family = @"bidi"; - } - else if (strncmp((const char*) cw, "ftech", len) == 0) - { - fontInfo->family = @"tech"; - } - } - else if (len == 6) - { - if (strncmp((const char*) cw, "froman", len) == 0) - { - fontInfo->family = @"roman"; - } - else if (strncmp((const char*) cw, "fswiss", len) == 0) - { - fontInfo->family = @"swiss"; - } - else if (strncmp((const char*) cw, "fdecor", len) == 0) - { - fontInfo->family = @"decor"; - } - } - else if (len == 7) - { - if (strncmp((const char*) cw, "fmodern", len) == 0) - { - fontInfo->family = @"modern"; - } - } - else if (len == 8) - { - if (strncmp((const char* ) cw, "fcharset", len) == 0) - { - if (hasArg) - { - fontInfo->charset = arg; - } + cwKey= [[NSString alloc] initWithBytesNoCopy: (void *)cw + length: len + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + [cwKey autorelease]; - } - else if (strncmp((const char*) cw, "fscript", len) == 0) - { - fontInfo->family = @"fscript"; - } + cwId = (fontCommandWordId) NSMapGet(_fontCws, cwKey); + switch (cwId) + { + case FONTCW_F: + if (hasArg) + fontInfo->index = arg; + break; + case FONTCW_FBIDI: + fontInfo->family = @"bidi"; + break; + case FONTCW_FCHARSET: + if (hasArg) + fontInfo->charset = arg; + break; + case FONTCW_FDECOR: + fontInfo->family = @"decor"; + break; + case FONTCW_FMODERN: + fontInfo->family = @"modern"; + break; + case FONTCW_FNIL: + fontInfo->family = @"nil"; + break; + case FONTCW_FPRQ: + if (hasArg) + fontInfo->pitch = arg; + break; + case FONTCW_FROMAN: + fontInfo->family = @"roman"; + break; + case FONTCW_FSCRIPT: + fontInfo->family = @"script"; + break; + case FONTCW_FSWISS: + fontInfo->family = @"swiss"; + break; + case FONTCW_FTECH: + fontInfo->family = @"tech"; + break; + case FONTCW_UNKNOWN: + default: + // do nothing + break; } - } + } else // no char { if (level == 2 && isalnum(*_bytes)) @@ -900,9 +965,238 @@ const unsigned short ansicpg874[256] = { [self parseIgnoringEverything]; } -// -// -// + +// todo: This keyword is only valid in the RTF header section right after the \ansi, \mac, \pc or \pca keyword. +inline static void parseAnsicpg (BOOL hasArg, int arg, const unsigned short **out_default_char) +{ + NSString *key; + const unsigned short *res; + + if (!hasArg) + return; + key = [NSString stringWithFormat: @"anscicpg%i", arg]; + res = NSMapGet(_charsets, key); + if (res) + *out_default_char = res; +} + +inline static void parseB(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->bold = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->bold = YES; + } +} + +inline static void parseCf(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions, RTFColorTable *colorTable) +{ + RTFColorDef *colorDef; + char *v; + + if (!hasArg) + return; + if (!formattingOptions) + return; + + colorDef = [colorTable colorDefAtIndex: arg]; + if (!colorDef) + return; + + if (formattingOptions->color_index >= 0) + { + [self->_html appendBytes: "" length: 7]; + } + + formattingOptions->color_index = arg; + + v = calloc(23, sizeof(char)); + sprintf(v, "", colorDef->red, colorDef->green, colorDef->blue); + [self->_html appendBytes: v length: strlen(v)]; + free(v); +} + + +inline static void parseColorTableWrapper(RTFHandler *self, RTFColorTable **colorTable) +{ + *colorTable = [self parseColorTable]; +} + +inline static void parseF(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions, RTFFontTable *fontTable) +{ + RTFFontInfo *fontInfo; + + if (!hasArg) + return; + if (!formattingOptions) + return; + + if (formattingOptions->font_index >= 0 && arg != formattingOptions->font_index) + { + [self->_html appendBytes: "" length: 7]; + } + + formattingOptions->font_index = arg; + + fontInfo = [fontTable fontInfoAtIndex: arg]; + char *v = NULL; + if (fontInfo && fontInfo->name) + { + if ([fontInfo->name length] < 128) + { + int tag_size = 15 + [fontInfo->name length]; + v = calloc(tag_size, sizeof(char)); + snprintf(v, tag_size, "", [fontInfo->name UTF8String]); + } + else + { + NSLog(@"RTFHandler: Font %u has %d chars length, parse error? " + "Ignored", arg, [fontInfo->name length]); + v = calloc(7, sizeof(char)); + sprintf(v, ""); + } + } + else + { + // RTF badformed? We don't know about that font (arg index not found). + // Anyhow, we still open the html tag because in the future + // we will close it (e.g. when new font is used). + v = calloc(7, sizeof(char)); + sprintf(v, ""); + } + + if (fontInfo && fontInfo->charset) + { + if (fontInfo->charset == DEFAULT_CHARSET) + /* charset 1 is default charset */ + formattingOptions->charset = NULL; + else { + NSNumber *key = [NSNumber numberWithUnsignedChar: fontInfo->charset]; + formattingOptions->charset = NSMapGet(_charsets, key); + } + } + + [self->_html appendBytes: v length: strlen(v)]; + free(v); +} + +inline static void parseFontTableWrapper(RTFHandler *self, const char * cw, RTFFontTable **fontTable) +{ + // We rewind our buffer so we start at the beginning of {\fonttbl... + self->_bytes = cw-2; + self->_current_pos -= 9; // Length: {\fonttbl + *fontTable = [self parseFontTable]; + + // We go back 1 byte in order to end our section properly ('}' character) + REWIND; +} + +inline static void parseI(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->italic = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->italic = YES; + } +} + +inline static void parsePar(RTFHandler *self) +{ + [self->_html appendBytes: "
" length: 4]; +} + +inline static void parsePictureWrapper(RTFHandler *self, const char * cw) +{ + self->_bytes = cw-2; + self->_current_pos -= 6; // Length: {\pict + [self parsePicture]; + REWIND; +} + +// same implementation that /par +inline static void parseSoftline(RTFHandler *self) +{ + [self->_html appendBytes: "
" length: 4]; +} + +inline static void parseStrike(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg == 0) + { + [self->_html appendBytes: "" length: 9]; + formattingOptions->strikethrough = NO; + } + else + { + [self->_html appendBytes: "" length: 8]; + formattingOptions->strikethrough = YES; + } +} + +inline static void parseStyleSheetWrapper(RTFHandler *self, const char * cw) +{ + self->_bytes = cw-2; + self->_current_pos -= 12; // Length: {\stylesheet + [self parseStyleSheet]; + REWIND; +} + +inline static void parseTab(RTFHandler *self) +{ + [self->_html appendBytes: "  " length: 12]; +} + +inline static void parseU(RTFHandler *self, BOOL hasArg, int arg) +{ + unichar uch; + NSString *s; + NSData *d; + + if (!hasArg) + return; + if (arg < 0) + // a negative value means a value greater than 32767 + arg = 32767 - arg; + + uch = (unichar) arg; + s = [NSString stringWithCharacters: &uch length: 1]; + d = [s dataUsingEncoding: NSUTF8StringEncoding]; + [self->_html appendData: d]; +} + +inline static void parseUl(RTFHandler *self, BOOL hasArg, int arg, RTFFormattingOptions *formattingOptions) +{ + if (!formattingOptions) + return; + if (hasArg && arg ==0) + { + [self->_html appendBytes: "" length: 4]; + formattingOptions->underline = NO; + } + else + { + [self->_html appendBytes: "" length: 3]; + formattingOptions->underline = YES; + } +} + + - (NSMutableData *) parse { RTFFormattingOptions *formattingOptions; @@ -910,7 +1204,7 @@ const unsigned short ansicpg874[256] = { RTFFontTable *fontTable; RTFStack *stack; - const unsigned short *default_charset; + const unsigned short *defaultCharset; // convenience variables for parsing unsigned char c; @@ -921,7 +1215,7 @@ const unsigned short ansicpg874[256] = { stack = [[RTFStack alloc] init]; fontTable = nil; colorTable = nil; - default_charset = ansicpg1252; + defaultCharset = ansicpg1252; formattingOptions = nil; _html = [[NSMutableData alloc] init]; @@ -942,6 +1236,10 @@ const unsigned short ansicpg874[256] = { { unsigned int len; const char *cw; + BOOL hasArg; + int arg; + NSString *cwKey; + commandWordId cwId; char nextByte = *(_bytes+1); if (nextByte == '\'') @@ -955,7 +1253,7 @@ const unsigned short ansicpg874[256] = { if (formattingOptions && formattingOptions->charset) active_charset = formattingOptions->charset; else - active_charset = default_charset; + active_charset = defaultCharset; ADVANCE; @@ -998,213 +1296,73 @@ const unsigned short ansicpg874[256] = { } - cw = [self parseControlWord: &len]; + cw = [self parseControlWordAndSetLenIn: &len + setHasIntArgumentIn: &hasArg + setIntArgumentIn: &arg]; + if (cw == NULL) + continue; + + cwKey= [[NSString alloc] initWithBytesNoCopy: (void *)cw + length: len + encoding: NSASCIIStringEncoding + freeWhenDone: NO]; + [cwKey autorelease]; - s = [[NSString alloc] initWithBytesNoCopy: (void *)cw - length: len - encoding: NSASCIIStringEncoding - freeWhenDone: NO]; - [s autorelease]; - - // todo: This keyword should be emitted in the RTF header section right after the \ansi, \mac, \pc or \pca keyword. - if (strncmp(cw, "ansicpg", 7) == 0) + cwId = (commandWordId) NSMapGet(_cws, cwKey); + switch (cwId) { - default_charset = NSMapGet(_charsets, s); - } - else if (strncmp(cw, "fonttbl", 7) == 0) - { - // We rewind our buffer so we start at the beginning of {\fonttbl... - _bytes = cw-2; - _current_pos -= 9; // Length: {\fonttbl - fontTable = [self parseFontTable]; - - // We go back 1 byte in order to end our section properly ('}' character) - REWIND; - } - else if (strncmp(cw, "stylesheet", 10) == 0) - { - _bytes = cw-2; - _current_pos -= 12; // Length: {\stylesheet - [self parseStyleSheet]; - REWIND; - } - else if (strncmp(cw, "colortbl", 8) == 0) - { - colorTable = [self parseColorTable]; - } - else if (strncmp(cw, "pict", 4) == 0) - { - _bytes = cw-2; - _current_pos -= 6; // Length: {\pict - [self parsePicture]; - REWIND; - } - else if ([s isEqualToString: @"b"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->bold = YES; - } - else if ([s isEqualToString: @"b0"] && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->bold = NO; - } - else if ([s hasPrefix: @"cf"] && [s length] > 2) - { - RTFColorDef *colorDef; - int color_index; - char *v; - - if (!formattingOptions) continue; - - color_index = [[s substringFromIndex: 2] intValue]; - colorDef = [colorTable colorDefAtIndex: color_index]; - if (!colorDef) continue; - - if (formattingOptions->color_index >= 0) - { - [_html appendBytes: "
" length: 7]; - } - - formattingOptions->color_index = color_index; - - v = malloc(23*sizeof(char)); - memset(v, 0, 23); - sprintf(v, "", colorDef->red, colorDef->green, colorDef->blue); - [_html appendBytes: v length: strlen(v)]; - free(v); - } - // else if ([s hasPrefix: @"fcs"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fs"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fbidis"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fromhtml"]) - // { - // // ignore - // } - // else if ([s hasPrefix: @"fromtext"]) - // { - // // ignore - // } - else if ([s hasPrefix: @"f"] && [s length] > 1 && isdigit([s characterAtIndex: 1])) - { - RTFFontInfo *fontInfo; - int font_index; - - font_index = [[s substringFromIndex: 1] intValue]; - - if (!formattingOptions) - continue; - - if (formattingOptions->font_index >= 0 && - font_index != formattingOptions->font_index) - { - [_html appendBytes: "" length: 7]; - } - - formattingOptions->font_index = font_index; - - fontInfo = [fontTable fontInfoAtIndex: font_index]; - char *v = NULL; - if (fontInfo && fontInfo->name) - { - if ([fontInfo->name length] < 128) - { - int tag_size = 15 + [fontInfo->name length]; - v = calloc(tag_size, sizeof(char)); - snprintf(v, tag_size, "", [fontInfo->name UTF8String]); - } - else - { - NSLog(@"RTFHandler: Font %u has %d chars length, parse error? " - "Ignored", font_index, [fontInfo->name length]); - v = calloc(7, sizeof(char)); - sprintf(v, ""); - } - } - else - { - // RTF badformed? We don't know about that font (font_index). - // Anyhow, we still open the html tag because in the future - // we will close it (e.g. when new font is used). - v = calloc(7, sizeof(char)); - sprintf(v, ""); - } - - if (fontInfo && fontInfo->charset) - { - if (fontInfo->charset == 1) - /* charset 1 is default charset */ - formattingOptions->charset = NULL; - else { - NSNumber *key = [NSNumber numberWithUnsignedChar: fontInfo->charset]; - formattingOptions->charset = NSMapGet(_charsets, key); - } - } - - [_html appendBytes: v length: strlen(v)]; - free(v); - } - else if ([s isEqualToString: @"i"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->italic = YES; - } - else if ([s isEqualToString: @"i0"] && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->italic = NO; - } - else if ([s isEqualToString: @"tab"]) - { - [_html appendBytes: "  " length: 12]; - } - else if ([s isEqualToString: @"softline"] || [s isEqualToString: @"par"]) - { - [_html appendBytes: "
" length: 4]; - } - else if ([s isEqualToString: @"strike"] && formattingOptions) - { - [_html appendBytes: "" length: 8]; - formattingOptions->strikethrough = YES; - } - else if ([s isEqualToString: @"strike0"] && formattingOptions) - { - [_html appendBytes: "" length: 9]; - formattingOptions->strikethrough = NO; - } - else if ([s hasPrefix: @"u"] && [s length] > 1 && - (isdigit([s characterAtIndex: 1]) || '-' == [s characterAtIndex: 1])) - { - int arg; - arg = [[s substringFromIndex: 1] intValue]; - if (arg < 0) - // a negative value means a value greater than 32767 - arg = 32767 - arg; - - uch = (unichar) arg; - s = [NSString stringWithCharacters: &uch length: 1]; - d = [s dataUsingEncoding: NSUTF8StringEncoding]; - [_html appendData: d]; - } - else if ([s isEqualToString: @"ul"] && formattingOptions) - { - [_html appendBytes: "" length: 3]; - formattingOptions->underline = YES; - } - else if (([s isEqualToString: @"ul0"] || [s isEqualToString: @"ulnone"]) - && formattingOptions) - { - [_html appendBytes: "" length: 4]; - formattingOptions->underline = NO; + case CW_ANSICPG: + parseAnsicpg(hasArg, arg, &defaultCharset); + break; + case CW_B: + parseB(self, hasArg, arg, formattingOptions); + break; + case CW_CF: + parseCf(self, hasArg, arg, formattingOptions, colorTable); + break; + case CW_COLORTBL: + parseColorTableWrapper(self, &colorTable); + break; + case CW_F: + parseF(self, hasArg, arg, formattingOptions, fontTable); + break; + case CW_FONTTBL: + parseFontTableWrapper(self, cw, &fontTable); + break; + case CW_I: + parseI(self, hasArg, arg, formattingOptions); + break; + case CW_PAR: + parsePar(self); + break; + case CW_PICT: + parsePictureWrapper(self, cw); + break; + case CW_SOFTLINE: + parseSoftline(self); + break; + case CW_STRIKE: + parseStrike(self, hasArg, arg, formattingOptions); + break; + case CW_STYLESHEET: + parseStyleSheetWrapper(self, cw); + break; + case CW_TAB: + parseTab(self); + break; + case CW_U: + parseU(self, hasArg, arg); + break; + case CW_UL: + parseUl(self, hasArg, arg, formattingOptions); + break; + case CW_ULNONE: + parseUl(self, YES, 0, formattingOptions); + break; + case CW_UNKNOWN: + default: + // do nothing + break; } // If a space delimits the control word, the space does not appear in the document. @@ -1225,7 +1383,7 @@ const unsigned short ansicpg874[256] = { formattingOptions->font_index = -1; formattingOptions->color_index = -1; formattingOptions->start_pos = [_html length]; - formattingOptions->charset = default_charset; + formattingOptions->charset = defaultCharset; [stack push: formattingOptions]; ADVANCE; }