I am working on a small pice of code which transports a density from one timestep to another iteratively until the destination time is reachted
The code I am working on has the following form:
- initialize
- ping-pong between two buffers now and next - compute new refinement level for next
- for each value of the now vector some quantity gets addet to next.
 
My question: is there a general pattern how to speed up such a piece of code?
Side-question: can I implement this in a nicer/better way using stl algorithms?
std::vector<double> ping;
std::vector<double> pong;
ping.reserve(1000000);
pong.reserve(1000000);
std::vector* now= &ping;
std::vector* next = &pong;
Initialize(now);  // Fill first timestep
for(size_t t = 0; t < 1000; t++)  // timesteps
{
    size_t M = now->size();
    size_t N = calcNewRefinement(t, now);
    next->resize(N);
    for(size_t i = 0; i < N; i++) // all elements
    {
        for(size_t j = 0; j < now->size(); j++)
        {
            if (j > 0 && j < N)
                (*next)[i] += ExpensiveFunction((*now)[j-1], (*now)[j], (*now)[j+1], (*next)[i])
            else if (j == 0)
                (*next)[i] += ExpensiveFunction2((*now)[j], (*now)[j+1], (*next)[i])
            else if (j == M-1)
                (*next)[i] += ExpensiveFunction3((*now)[j-1], (*now)[j], (*next)[i])
        }
    }
    vector<double> *intermediate = now;
    now = next;
    next = intermediate;
}
 
    