I have array of nearly 1,000,000 records, each record has a field "filename".
There are many records with exactly the same filename.
My goal is to improve memory footprint by deduplicating string instances (filename instances, not records).
.NET Framework 2.0 is a constraint. no LINQ here.
I wrote a generic (and thread-safe) class for the deduplication:
public class Deduplication<T>
    where T : class
{
    private static Deduplication<T> _global = new Deduplication<T>();
    public static Deduplication<T> Global
    {
        get { return _global; }
    }
    private Dictionary<T, T> _dic;// = new Dictionary<T, T>();
    private object _dicLocker = new object();
    public T GetInstance(T instance)
    {
        lock (_dicLocker)
        {
            if (_dic == null)
            {
                _dic = new Dictionary<T, T>();
            }
            T savedInstance;
            if (_dic.TryGetValue(instance, out savedInstance))
            {
                return savedInstance;
            }
            else
            {
                _dic.Add(instance, instance);
                return instance;
            }
        }
    }
    public void Clear()
    {
        lock (_dicLocker)
        {
            _dic = null;
        }
    }
}
The problem with this class is that it adds a lot of more memory usage, and it stays there until the next GC.
I searching for a way to reduce the memory footprint without adding a lot of more memory usage and without waiting for the next GC. Also i do not want to use GC.Collect() because it freezes the GUI for a couple of seconds.
 
     
     
    