I am trying to convert Pdf document that contains Tamil unicode characters into a word document retaining all the formatting. I am not able to read the unicode character in the Pdf they are appearing as junk character in word. I am using the below code can someone please help?
public static void main(String[] args) throws IOException {
        System.out.println("Document converted started");
        XWPFDocument doc = new XWPFDocument();
        String pdf = "D:\\sample1.pdf";
        PdfReader reader = new PdfReader(pdf);
     //   InputStreamReader isr = new InputStreamReader(reader,"UTF8");
        PdfReaderContentParser parser = new PdfReaderContentParser(reader);
        for (int i = 1; i <= reader.getNumberOfPages(); i++) {
            TextExtractionStrategy strategy = parser.processContent(i,
                    new SimpleTextExtractionStrategy());
            System.out.println(strategy.getResultantText());
            String text = strategy.getResultantText();
            XWPFParagraph p = doc.createParagraph();
            XWPFRun run = p.createRun();
   //         run.setFontFamily(new Font("Arial"));
            run.setFontSize(14);
            run.setText(text);
     //       run.addBreak(BreakType.PAGE);
        }
        FileOutputStream out = new FileOutputStream("D:\\tamildoc.docx");
        doc.write(out);
        out.close();
        reader.close();
        System.out.println("Document converted successfully");
    }