I think this is a reasonable starting point if you favour performance over readability.
attempt 1
Basically I am avoiding any memory allocations.
#include <string>
#include <vector>
#include <bitset>
#include <iostream>
#include <iterator>
#include <tuple>
#include <array>
template<class From, std::size_t N>
auto
select(From const& from, std::bitset<N> const& bits)
{
    std::array<const std::string*, N> result { nullptr };
    auto i = std::begin(result);
    std::size_t found;
    std::size_t count = found = bits.count();
    std::size_t index = 0;
    while (count)
    {
        if (bits.test(index)) {
            *i++ = &from[index];
            --count;
        }
        ++index;
    }
    return std::make_tuple(found, result);
}
int main()
{
    std::vector<std::string> alphabet = { "a", "b", "c", "d", "e", "f", "g", "h" };
    for (unsigned x = 0 ; x < 256 ; ++x)
    {
        auto info = select(alphabet, std::bitset<8>(x));
        auto ptrs = std::get<1>(info).data();
        auto size = std::get<0>(info);
        while(size--)
        {
            std::cout << *(*ptrs++) << ", ";
        }
        std::cout << '\n';
    }
}
attempt 2 - runtime lookup is in the order of nanoseconds...
Here I am pre-computing all possible alphabets at compile time.
The run-time is of course, blindingly fast. However, an alphabet of more than say, 14 characters could take a while to compile...
update: warning! when I set the alphabet size to 16 clang to consumed 32GB of memory, stopped every other application on the desktop and required a reboot of my macbook before I could do anything else. You have been warned.
#include <string>
#include <vector>
#include <bitset>
#include <iostream>
#include <iterator>
#include <tuple>
#include <array>
template<class From, std::size_t N>
auto
select(From const& from, std::bitset<N> const& bits)
{
    std::array<const std::string*, N> result { nullptr };
    auto i = std::begin(result);
    std::size_t found;
    std::size_t count = found = bits.count();
    std::size_t index = 0;
    while (count)
    {
        if (bits.test(index)) {
            *i++ = &from[index];
            --count;
        }
        ++index;
    }
    return std::make_tuple(found, result);
}
template<std::size_t Limit>
struct alphabet
{
    constexpr alphabet(std::size_t mask)
    : size(0)
    , data { }
    {
        for (std::size_t i = 0 ; i < Limit ; ++i)
        {
            if (mask & (1 << i))
                data[size++] = char('a' + i);
        }
    }
    std::size_t size;
    char data[Limit];
    friend decltype(auto) operator<<(std::ostream& os, alphabet const& a)
    {
        auto sep = "";
        for (std::size_t i = 0 ; i < a.size; ++i)
        {
            std::cout << sep << a.data[i];
            sep = ", ";
        }
        return os;
    }
};
template<std::size_t Limit>
constexpr alphabet<Limit> make_iteration(std::size_t mask)
{
    alphabet<Limit> result { mask };
    return result;
}
template<std::size_t Limit, std::size_t...Is>
constexpr auto make_iterations(std::index_sequence<Is...>)
{
    constexpr auto result_space_size = sizeof...(Is);
    std::array<alphabet<Limit>, result_space_size> result
    {
        make_iteration<Limit>(Is)...
    };
    return result;
}
template<std::size_t Limit>
constexpr auto make_iterations()
{
    return make_iterations<Limit>(std::make_index_sequence<std::size_t(1 << Limit) - 1>());
}
int main()
{
    static constexpr auto alphabets = make_iterations<8>();
    for(const auto& alphabet : alphabets)
    {
        std::cout << alphabet << std::endl;
    }
}
attempt 3
Using a very rudimentary fixed capacity container of pointers to matching elements. I've added constexpr. This won't improve the majority of cases but could certainly improve statically allocated selections.
#include <vector>
#include <bitset>
#include <iostream>
#include <iterator>
#include <tuple>
#include <array>
namespace quick_and_dirty {
template<class T, std::size_t Capacity>
struct fixed_capacity_vector
{
    using value_type = T;
    constexpr fixed_capacity_vector()
    : store_()
    , size_(0)
    {}
    constexpr auto push_back(value_type v)
    {
        store_[size_] = std::move(v);
        ++ size_;
    }
    constexpr auto begin() const { return store_.begin(); }
    constexpr auto end() const { return begin() + size_; }
private:
    std::array<T, Capacity> store_;
    std::size_t size_;
};
} // namespace quick_and_dirty
template<class From, std::size_t N>
constexpr
auto
select(From const& from, std::bitset<N> const& bits)
{
    using value_type = typename From::value_type;
    using ptr_type = std::add_pointer_t<std::add_const_t<value_type>>;
    auto result = quick_and_dirty::fixed_capacity_vector<ptr_type, N>();
    std::size_t count = bits.count();
    for (std::size_t index = 0 ; count ; ++index)
    {
        if (bits.test(index)) {
            result.push_back(&from[index]);
            --count;
        }
    }
    return result;
}
int main()
{
    std::vector<std::string> alphabet = { "a", "b", "c", "d", "e", "f", "g", "h" };
    for (unsigned x = 0 ; x < 256 ; ++x)
    {
        for(auto p : select(alphabet, std::bitset<8>(x)))
        {
            std::cout << (*p) << ", ";
        }
        std::cout << '\n';
    }
}