I'd like to offer another solution that utilizes the speed and efficiency of the programming paradigm at the very core of Node: events.
Everything you can do with Promises or modules designed to manage flow-control, like async, can be accomplished using events and a simple state-machine, which I believe offers a methodology that is, perhaps, easier to understand than other options.
For example assume you wish to sum the length of multiple files in parallel:
const EventEmitter = require('events').EventEmitter;
// simple event-driven state machine
const sm = new EventEmitter();
// running state
let context={
  tasks:    0,    // number of total tasks
  active:   0,    // number of active tasks
  results:  []    // task results
};
const next = (result) => { // must be called when each task chain completes
  if(result) { // preserve result of task chain
    context.results.push(result);
  }
  // decrement the number of running tasks
  context.active -= 1; 
  // when all tasks complete, trigger done state
  if(!context.active) { 
    sm.emit('done');
  }
};
// operational states
// start state - initializes context
sm.on('start', (paths) => {
  const len=paths.length;
  console.log(`start: beginning processing of ${len} paths`);
  context.tasks = len;              // total number of tasks
  context.active = len;             // number of active tasks
  sm.emit('forEachPath', paths);    // go to next state
});
// start processing of each path
sm.on('forEachPath', (paths)=>{
  console.log(`forEachPath: starting ${paths.length} process chains`);
  paths.forEach((path) => sm.emit('readPath', path));
});
// read contents from path
sm.on('readPath', (path) => {
  console.log(`  readPath: ${path}`);
  fs.readFile(path,(err,buf) => {
    if(err) {
      sm.emit('error',err);
      return;
    }
    sm.emit('processContent', buf.toString(), path);
  });
});
// compute length of path contents
sm.on('processContent', (str, path) => {
  console.log(`  processContent: ${path}`);
  next(str.length);
});
// when processing is complete
sm.on('done', () => { 
  const total = context.results.reduce((sum,n) => sum + n);
  console.log(`The total of ${context.tasks} files is ${total}`);
});
// error state
sm.on('error', (err) => { throw err; });
// ======================================================
// start processing - ok, let's go
// ======================================================
sm.emit('start', ['file1','file2','file3','file4']);
Which will output:
start: beginning processing of 4 paths
forEachPath: starting 4 process chains
  readPath: file1
  readPath: file2
  processContent: file1
  readPath: file3
  processContent: file2
  processContent: file3
  readPath: file4
  processContent: file4
The total of 4 files is 4021
Note that the ordering of the process chain tasks is dependent upon system load.
You can envision the program flow as:
start -> forEachPath -+-> readPath1 -> processContent1 -+-> done
                      +-> readFile2 -> processContent2 -+
                      +-> readFile3 -> processContent3 -+
                      +-> readFile4 -> processContent4 -+
For reuse, it would be trivial to create a module to support the various flow-control patterns, i.e. series, parallel, batch, while, until, etc.