I am trying to index each word in a text file Using java
Index means i am denoting indexing of words here..
This is my sample file https://pastebin.com/hxB8t56p (the actual file I want to index is much larger)
This is the code I have tried so far
ArrayList<String> ar = new ArrayList<String>();
ArrayList<String> sen = new ArrayList<String>();
ArrayList<String> fin = new ArrayList<String>();
ArrayList<String> word = new ArrayList<String>();
String content = new String(Files.readAllBytes(Paths.get("D:\\folder\\poem.txt")), StandardCharsets.UTF_8);
String[] split = content.split("\\s"); // Split text file content
for(String b:split) {
    ar.add(b); // added into the ar arraylist //ar contains every line of poem
}
FileInputStream fstream = null;
String answer = "";fstream=new FileInputStream("D:\\folder\\poemt.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
String strLine;
int count = 1;
int songnum = 0;
while((strLine=br.readLine())!=null) {
    String text = strLine.replaceAll("[0-9]", ""); // Replace numbers from txt
    String nums = strLine.split("(?=\\D)")[0]; // get digits from strLine
    if (nums.matches(".*[0-9].*")) {
        songnum = Integer.parseInt(nums); // Parse string to int
    }
    String regex = ".*\\d+.*";
    boolean result = strLine.matches(regex);
    if (result == true) { // check if strLine contain digit
        count = 1;
    }
    answer = songnum + "." + count + "(" + text + ")";
    count++;
    sen.add(answer); // added songnum + line number and text to sen
}
for(int i = 0;i<sen.size();i++) { // loop to match and get word+poem number+line number
    for (int j = 0; j < ar.size(); j++) {
        if (sen.get(i).contains(ar.get(j))) {
            if (!ar.get(j).isEmpty()) {
                String x = ar.get(j) + " - " + sen.get(i);
                x = x.replaceAll("\\(.*\\)", ""); // replace single line sentence
                String[] sp = x.split("\\s+");
                word.add(sp[0]); // each word in the poem is added to the word arraylist
                fin.add(x); // word+poem number+line number
            }
        }
    }
}
Set<String> listWithoutDuplicates = new LinkedHashSet<String>(fin); // Remove duplicates
fin.clear();fin.addAll(listWithoutDuplicates);
Locale lithuanian = new Locale("ta");
Collator lithuanianCollator = Collator.getInstance(lithuanian); // sort array
Collections.sort(fin,lithuanianCollator);
System.out.println(fin);   
    (change in blossom. - 0.2,1.2, &  the - 0.1,1.2, & then - 0.1,1.2)
 
    