I want to serialize a struct on a 32bit aligned platform (armv7). The idea is to serialize the struct members (which I extract with some meta programming) into a std::byte array an then to copy into a std::uint32_t array (the output buffer).
The two serializer look like this:
// serialize to std::byte array
template <typename T, class OutputIterator, 
  typename std::enable_if_t<
    std::is_same<typename std::iterator_traits<OutputIterator>::value_type, 
      std::byte>::value, int> = 0>
std::size_t serialize(const T& value, OutputIterator iterator)
{
  std::size_t offset = 0; 
  visit_struct::for_each(value,
  [&](const char*, const auto& element) 
  {
    auto raw = reinterpret_cast<std::byte const*>(&element);
    auto type_size = sizeof(decltype(element));
    std::copy(raw, std::next(raw, type_size), std::next(iterator, offset));
    offset += type_size;
  });
  return offset;
}
// serialize to std::uint32_t array
template <typename T, class OutputIterator, 
  typename std::enable_if_t<
    std::is_same<typename std::iterator_traits<OutputIterator>::value_type, 
      std::uint32_t>::value, int> = 0>
std::size_t serialize(const T& value, OutputIterator iterator)
{
  constexpr std::size_t type_size = ext::mock_maker<T>::size;
  constexpr std::size_t aligned_type_size = (type_size + 4 - 1) / 4;
  std::array<std::byte, type_size> raw;
  serialize(value, raw.begin()); 
  auto raw_aligned = reinterpret_cast<std::uint32_t const*>(raw.data());
  std::copy(raw_aligned, std::next(raw_aligned, aligned_type_size), iterator);
  return aligned_type_size;
}
My hopes were that the compiler can somehow optimize away the intermediate representation as a std::byte array, but my test implementation suggests otherway. Is there a a way to achieve this elegantly?
