There are works on extending ImageDataGenerator to be more flexible for exactly these type of cases (see in this issue on Github for examples).
Additionally, as mentioned by Mikael Rousson in the comments, you can easily create your own version of ImageDataGenerator yourself, while leveraging many of its built-in functions to make it easier. Here is an example code I've used for an image denoising problem, where I use random crops + additive noise to generate clean and noisy image pairs on the fly. You could easily modify this to add other types of augmentations. After which, you can use Model.fit_generator to train using these methods.
from keras.preprocessing.image import load_img, img_to_array, list_pictures
def random_crop(image, crop_size):
    height, width = image.shape[1:]
    dy, dx = crop_size
    if width < dx or height < dy:
        return None
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return image[:, y:(y+dy), x:(x+dx)]
def image_generator(list_of_files, crop_size, to_grayscale=True, scale=1, shift=0):
    while True:
        filename = np.random.choice(list_of_files)
        try:
            img = img_to_array(load_img(filename, to_grayscale))
        except:
            return
        cropped_img = random_crop(img, crop_size)
        if cropped_img is None:
            continue
        yield scale * cropped_img - shift
def corrupted_training_pair(images, sigma):
    for img in images:
        target = img
        if sigma > 0:
            source = img + np.random.normal(0, sigma, img.shape)/255.0
        else:
            source = img
        yield (source, target)
def group_by_batch(dataset, batch_size):
    while True:
        try:
            sources, targets = zip(*[next(dataset) for i in xrange(batch_size)])
            batch = (np.stack(sources), np.stack(targets))
            yield batch
        except:
            return
def load_dataset(directory, crop_size, sigma, batch_size):
    files = list_pictures(directory)
    generator = image_generator(files, crop_size, scale=1/255.0, shift=0.5)
    generator = corrupted_training_pair(generator, sigma)
    generator = group_by_batch(generator, batch_size)
    return generator
You can then use the above like so:
train_set = load_dataset('images/train', (patch_height, patch_width), noise_sigma, batch_size)
val_set = load_dataset('images/val', (patch_height, patch_width), noise_sigma, batch_size)
model.fit_generator(train_set, samples_per_epoch=batch_size * 1000, nb_epoch=nb_epoch, validation_data=val_set, nb_val_samples=1000)