Numeric Character Reference转NSString

在处理数据的时候,有时候会遇到 &# 开头的数据,例如Σ那么这个是 Numeric Character Reference编码。
NCR编码是由一个与号(&)跟着一个井号(#), 然后跟着这个字符的Unicode编码值, 最后跟着一个分号组成的, 如:


其中, nnnn是字符编码的十进制表示, 而hhhh是字符的16进制表示.

-(NSString *)toUnicodeString

    NSMutableString *srcString =    [[NSMutableString alloc]initWithString:self];
    if ([srcString containsString:@"&#"]) {
        [srcString replaceOccurrencesOfString:@"&#" withString:@"" options:NSLiteralSearch range:NSMakeRange(0,     [srcString length])];

    NSMutableString *desString = [[NSMutableString alloc]init];

    NSArray *arr = [srcString componentsSeparatedByString:@";"];

    for(int i=0;i<[arr count]-1;i++){

        NSString *v = [arr objectAtIndex:i];
        char *c = malloc(3);
        int value = [v intValue];
        c[1] = value  &0x00FF;
        c[0] = value >>8 &0x00FF;
        c[2] = '\0';
        [desString appendString:[NSString stringWithCString:c encoding:NSUnicodeStringEncoding]];

    return desString;
    return self;

-(NSString *)toUnicodeString2

NSError * error=nil;
NSData *encodedData = [self dataUsingEncoding:  NSUTF8StringEncoding];
NSDictionary *options = @{NSDocumentTypeDocumentAttribute: NSHTMLTextDocumentType};

NSAttributedString *attributedString = [[NSAttributedString alloc] initWithData:encodedData options:options documentAttributes:nil error:&error];

return [attributedString string];
