I have a relatively large file that I needed to ensure contained only unique lines. The file is only 500MB. I understand that there is plenty of overhead, but I was seeing nearly 5GB of RAM usage. I could have done this using an external merge sort and maintain a small amount of RAM, but this seemed faster to code.
I am using VC++14.
#include <string>
#include <vector>
#include <fstream>
#include <iostream>
#include <algorithm>
#include <unordered_set>
using std::vector;
using std::string;
using std::unordered_set;
class uniqify {
    unordered_set<string> s;
public:
    auto exists(const string &filename) const -> bool {
        std::ifstream fin(filename);
        bool good = fin.good();
        return fin.close(), good;
    }
    void read(const string &filename) {
        std::ifstream input(filename);
        string line;
        while (std::getline(input, line))
            if (line.size())
                s.insert(line);
    }
    void write(const string &filename) const {
        std::ofstream fout(filename);
        for (auto line : s)
            fout << line << "\n";
        fout.close();
    }
};
int main(int argc, char **argv) {
    uniqify u;
    string file("file.txt");
    if(u.exists(file))
        u.read(file);
    u.write("output_file.txt");
    return 0;
}
What causes the RAM to skyrocket over 10x as much?
 
     
     
     
     
    