In an algorithm I have to calculate the 75th percentile of a data set whenever I add a value. Right now I am doing this:
- Get value x
- Insert xin an already sorted array at the back
- swap xdown until the array is sorted
- Read the element at position array[array.size * 3/4]
Point 3 is O(n), and the rest is O(1), but this is still quite slow, especially if the array gets larger. Is there any way to optimize this?
UPDATE
Thanks Nikita! Since I am using C++ this is the solution easiest to implement. Here is the code:
template<class T>
class IterativePercentile {
public:
  /// Percentile has to be in range [0, 1(
  IterativePercentile(double percentile)
    : _percentile(percentile)
  { }
  // Adds a number in O(log(n))
  void add(const T& x) {
    if (_lower.empty() || x <= _lower.front()) {
      _lower.push_back(x);
      std::push_heap(_lower.begin(), _lower.end(), std::less<T>());
    } else {
      _upper.push_back(x);
      std::push_heap(_upper.begin(), _upper.end(), std::greater<T>());
    }
    unsigned size_lower = (unsigned)((_lower.size() + _upper.size()) * _percentile) + 1;
    if (_lower.size() > size_lower) {
      // lower to upper
      std::pop_heap(_lower.begin(), _lower.end(), std::less<T>());
      _upper.push_back(_lower.back());
      std::push_heap(_upper.begin(), _upper.end(), std::greater<T>());
      _lower.pop_back();
    } else if (_lower.size() < size_lower) {
      // upper to lower
      std::pop_heap(_upper.begin(), _upper.end(), std::greater<T>());
      _lower.push_back(_upper.back());
      std::push_heap(_lower.begin(), _lower.end(), std::less<T>());
      _upper.pop_back();
    }            
  }
  /// Access the percentile in O(1)
  const T& get() const {
    return _lower.front();
  }
  void clear() {
    _lower.clear();
    _upper.clear();
  }
private:
  double _percentile;
  std::vector<T> _lower;
  std::vector<T> _upper;
};
 
     
     
     
     
     
     
    