This question is related to another post of mine: why allocate_shared and make_shared so slow
In here I can describe the question more clearly.
Think about the following code:
struct A {
    char data_[0x10000];
};
class C {
public:
    C() : a_() { }
    A a_;
};
int main() {
    C c;
    return 0;
}
I found for the code C() : a_(), the compiler uses memset(addr,0,0x10000) as the constructor of the A. And if the type A has a empty constructor, the asm code is right.
To describe the issue more clearly, I wrote some test code:
#include <stdlib.h>
struct A {
    //A() {}
    char data_[0x10000];
    void dummy() { // avoid optimize erase by compiler
        data_[rand() % sizeof(data_)] = 1;
    }
    int dummy2() { // avoid optimize erase by compiler
        return data_[0];
    }
};
class B {
public:
    template<class ... T> B(T&...t) 
        : a_(std::forward<T>(t)...) {
    }
    A a_;
};
class C {
public:
    C() : a_() {
    }
    A a_;
};
template<class ... T>
int test(T&...t) {
    A a(t...);
    a.dummy();
    return a.dummy2();
}
int main() {
    A a;
    a.dummy();
    auto r1 = a.dummy2();
    auto r2 = test();
    B b;
    b.a_.dummy();
    auto r3 = b.a_.dummy2();
    C c;
    c.a_.dummy();
    auto r4 = c.a_.dummy2();
    return r1 + r2 + r3 + r4;
}
I compiled the code with vs2017, in windows 10, x86 release build. Then I checked the asm code:
template<class ... T>
int test(T&...t) {
00E510B8  call        _chkstk (0E51CE0h)  
00E510BD  mov         eax,dword ptr [__security_cookie (0E53004h)]  
00E510C2  xor         eax,ebp  
00E510C4  mov         dword ptr [ebp-4],eax  
    A a(t...);
00E510C7  push        10000h  
00E510CC  lea         eax,[a]  
00E510D2  push        0  
00E510D4  push        eax  
00E510D5  call        _memset (0E51C3Ah)  
00E510DA  add         esp,0Ch  
    a.dummy();
00E510DD  call        dword ptr [__imp__rand (0E520B4h)]  
}
00E510E3  mov         ecx,dword ptr [ebp-4]  
It is very clear that the function test() calls memset(p, 0, 0x10000).
And if I add an empty constructor in A (line A(){}), the compiler removes the memset.
So why does the code call memset when type A does not have constructor but does not call memset when A has a constructor?
Is it part of the c++ standard, or just a compiler bug?
Obviously the memset(p, 0, sizeof(T)) is useless and harmful which slows down the program. How do I workaround it?