I am trying to convert an html to pdf using ITextSharp library version 5.5.13.2. For some reason, the Romanian special characters (ș, ț, ă, î, â) that exists inside the html are omitted and does not appear in the pdf file. Could you please help me?
var htmlString = "Some html string containing romanian characters";
byte[] byteArray = Encoding.Unicode.GetBytes(Format.Invariant(htmlString));
Stream reader = new MemoryStream(byteArray);
Document document = new Document(PageSize.A4, 30, 30, 30, 30);
PdfWriter writer = PdfWriter.GetInstance(document, msOutput);
document.AddTitle(PdfTitle);
document.AddSubject(PdfSubject);
document.AddAuthor(PdfAuthor);
document.AddCreator(PdfCreator);
document.AddKeywords(PdfKeyWords);
document.Open();
XMLWorkerHelper.GetInstance().ParseXHtml(writer, document, reader, null, Encoding.Unicode, new TimesNewRomanUnicodeFontFactory());
document.Close();
And my TimesNewRomanUnicodeFontFactory is like this:
private class TimesNewRomanUnicodeFontFactory : FontFactoryImp
{
private readonly BaseFont _baseFont;
public TimesNewRomanUnicodeFontFactory()
{
_baseFont = BaseFont.CreateFont(FontFactory.TIMES_ROMAN, BaseFont.CP1250, BaseFont.EMBEDDED);
}
}
Could you please help me?