I used tm package from R for text mining. This is what my code looks like:
library(tm)
Load the data in R
pathToData = "R/group_data"
 newsCorpus = Corpus(DirSource(pathToData, recursive = TRUE), 
                readerControl = list(reader = readPlain))
Length of news corpus
      length(newsCorpus)
Pre-processsing the corpus data
newsCorpus = tm_map(newsCorpus,removePunctuation)
newsCorpus[["103806"]]
newsCorpus = tm_map(newsCorpus,removeNumbers)
newsCorpus[["103806"]]
newsCorpus = tm_map(newsCorpus, content_transformer(tolower))
newsCorpus[["103806"]]
newsCorpus = tm_map(newsCorpus, removeWords, stopwords("english"))
newsCorpus[["103806"]]
newsCorpus = tm_map(newsCorpus, stripWhitespace)
newsCorpus[["103806"]]
Corpus elements to plain text
newsCorpus = Corpus(VectorSource(newsCorpus))
Document Term matrix with TFIDF weights
docTermMatrix = DocumentTermMatrix(newsCorpus, 
                               control = list(weighting = weightTfIdf, 
                                              minWordLength = 1,
                                              minDocFreq = 1))                                                  
                                              
Dimensions of resulting matrix
dim(docTermMatrix)
The docTermMatrix looks like this:
<<DocumentTermMatrix (documents: 1986, terms: 22213)>>
 Non-/sparse entries: 173995/43941023
 Sparsity           : 100%
 Maximal term length: 163
 Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
Now I want to inspect the docTermMatrix for the document "101287" and look for the terms "textmining", "clustering". But since the document term matrix has changed the document names(rows) to 1,2,3,4... , I can no longer find the document named "101287" and look for the columns "textmining", "clustering". Is there a way I can preserve the document name ? Apologies if I am missing on something..
Output from R for the above code
> library(tm)
  > pathToData = "R/group_data"
  > newsCorpus = Corpus(DirSource(pathToData, recursive = TRUE), 
              readerControl = list(reader = readPlain))
 > length(newsCorpus)
    [1] 1986
 > newsCorpus[["103806"]]
  <<PlainTextDocument (metadata: 7)>>
  From: cheekeen@tartarus.uwa.edu.au (Desmond Chan)
  Subject: Re: Honda clutch chatter
  Organization: The University of Western Australia
  Lines: 8
  NNTP-Posting-Host: tartarus.uwa.edu.au
  X-Newsreader: NN version 6.4.19 #1
  I also experience this kinda problem in my 89 BMW 318. During cold
  start ups, the clutch seems to be sticky and everytime i drive out, for
  about 5km, the clutch seems to stick onto somewhere that if i depress
  the clutch, the whole chassis moves along. But after preheating, it
  becomes smooth again. I think that your suggestion of being some
  humudity is right but there should be some remedy. I also found out that
  my clutch is already thin but still alright for a couple grand more!
 > newsCorpus = tm_map(newsCorpus,removePunctuation)
 > newsCorpus = tm_map(newsCorpus,removeNumbers) 
 > newsCorpus = tm_map(newsCorpus, content_transformer(tolower))
 > newsCorpus = tm_map(newsCorpus, removeWords, stopwords("english")) 
 > newsCorpus = tm_map(newsCorpus, stripWhitespace)
 > newsCorpus = Corpus(VectorSource(newsCorpus)) 
 > docTermMatrix = DocumentTermMatrix(newsCorpus, control = list(weighting =     weightTfIdf,minWordLength = 1,minDocFreq = 1))  
                                                                                              
                                              
 > dim(docTermMatrix)
 [1]  1986 22213
>inspect(docTermMatrix["1","bmw"])
<<DocumentTermMatrix (documents: 1, terms: 1)>>
Non-/sparse entries: 0/1
Sparsity           : 100%
Maximal term length: 3
Weighting          : term frequency - inverse document frequency (normalized) (tf-idf)
    Terms
Docs bmw
  1   0
>inspect(docTermMatrix["103806", "bmw"])
Error in `[.simple_triplet_matrix`(docTermMatrix, "103806", "bmw") : 
Subscript out of bounds.
 
     
    