Extracting bitmap images from videos can be a complex task, especially if you're developing a commercial app and cannot use patented codecs like H.264 or H.265 with FFmpeg. However, there is a solution: the MediaCodec API and OpenGL ES. This approach offers several advantages:
- Suitable for commercial apps.
 
- No external library dependencies.
 
- Supports decoding of patented codecs and any codec supported by the Android device.
 
- Hardware decoding is supported.
 
The answer to the question looks like the following.

We create a media extractor and decoder for decoding video input. Using a SurfaceTexture, we capture frames from the decoder as OpenGL ES textures. After calling the releaseOutputBuffer method, the frame is rendered onto this SurfaceTexture. The rendering and frame availability callback occurs in a separate thread, so the decoding process must wait until the frame is captured as an OpenGL ES texture. Once captured, the frame is rendered onto an offscreen pixel buffer allocated by EGL. We can then extract pixel data from this buffer into a ByteBuffer, which is subsequently copied into a bitmap.
class VideoExtractor {
    fun extract(context: Context, srcUri: Uri): List<Bitmap> {
        val frames = mutableListOf<Bitmap>()
        var extractor: MediaExtractor? = null
        var decoder: MediaCodec? = null
        var outputSurface: DecoderOutputSurface? = null
        try {
            // create extractor
            extractor = MediaExtractor()
            extractor.setDataSource(context, srcUri, null)
            val videoTrackIndex = findVideoTrackIndex(extractor)
            extractor.selectTrack(videoTrackIndex)
            // create decoder
            val format = extractor.getTrackFormat(videoTrackIndex)
            val mime = format.getString(MediaFormat.KEY_MIME)
                ?: throw RuntimeException("Could not retrieve video mime type")
            decoder = MediaCodec.createDecoderByType(mime)
            // create output surface
            val width = format.getInteger(MediaFormat.KEY_WIDTH)
            val height = format.getInteger(MediaFormat.KEY_HEIGHT)
            outputSurface = DecoderOutputSurface(width, height)
            // configure and start decoder
            decoder.configure(format, outputSurface.surface, null, 0)
            decoder.start()
            // extract video data chunks and decode frames
            val info = MediaCodec.BufferInfo()
            var inputDone = false
            var outputDone = false
            while (!(inputDone && outputDone)) {
                // feed input data to decoder
                if (!inputDone) {
                    val bufferIndex = decoder.dequeueInputBuffer(0)
                    if (bufferIndex >= 0) {
                        val inputBuffer = decoder.getInputBuffer(bufferIndex)
                            ?: throw RuntimeException("Could not retrieve input buffer")
                        val chunkSize = extractor.readSampleData(inputBuffer, 0)
                        if (chunkSize < 0) {
                            // End of stream - send empty frame with EOS flag set.
                            decoder.queueInputBuffer(
                                bufferIndex,
                                0,
                                0,
                                0,
                                MediaCodec.BUFFER_FLAG_END_OF_STREAM
                            )
                            inputDone = true
                        } else {
                            decoder.queueInputBuffer(
                                bufferIndex,
                                0,
                                chunkSize,
                                extractor.sampleTime,
                                0
                            )
                            extractor.advance()
                        }
                    }
                }
                // render frame on OpenGL surface
                if (!outputDone) {
                    val decoderStatus = decoder.dequeueOutputBuffer(info, 0)
                    if (decoderStatus >= 0) {
                        if (info.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) {
                            outputDone = true
                        }
                        val doRender = info.size > 0
                        decoder.releaseOutputBuffer(decoderStatus, doRender)
                        if (doRender) {
                            outputSurface.awaitNewImage()
                            outputSurface.drawImage()
                            val bitmap = outputSurface.getImage()
                            frames.add(bitmap)
                        }
                    }
                }
            }
        } finally {
            extractor?.release()
            decoder?.stop()
            decoder?.release()
            outputSurface?.release()
        }
        return frames
    }
    private fun findVideoTrackIndex(extractor: MediaExtractor): Int {
        for (i in 0 until extractor.trackCount) {
            val format = extractor.getTrackFormat(i)
            val mime = format.getString(MediaFormat.KEY_MIME)
            if (mime?.startsWith("video") == true) {
                return i
            }
        }
        throw RuntimeException("No video track found")
    }
}
/**
 * Creates a CodecOutputSurface backed by a pbuffer with the specified dimensions.  The
 * new EGL context and surface will be made current.  Creates a Surface that can be passed
 * to MediaCodec.configure().
 */
class DecoderOutputSurface(
    private var width: Int,
    private var height: Int,
) {
    companion object {
        private const val TIMEOUT_MS = 2500L
    }
    private var textureRender: FrameTextureRenderer? = null
    private var surfaceTexture: SurfaceTexture? = null
    var surface: Surface? = null
        private set
    private var eglDisplay: EGLDisplay? = EGL14.EGL_NO_DISPLAY
    private var eglContext: EGLContext? = EGL14.EGL_NO_CONTEXT
    private var eglSurface: EGLSurface? = EGL14.EGL_NO_SURFACE
    private var pixelBuf: ByteBuffer? = null // used by getImage()
    private val frameSyncObject = Object() // guards frameAvailable
    private var frameAvailable = false
    // SurfaceTexture callback
    private val onFrameAvailable = SurfaceTexture.OnFrameAvailableListener {
        synchronized(frameSyncObject) {
            if (frameAvailable) {
                throw RuntimeException("mFrameAvailable already set, frame could be dropped")
            }
            frameAvailable = true
            frameSyncObject.notifyAll()
        }
    }
    init {
        require(width > 0 && height > 0) {
            "Width and height must be greater then zero"
        }
        eglSetup()
        makeCurrent()
        setup()
    }
    /**
     * Latches the next buffer into the texture. Must be called from the thread that created
     * the CodecOutputSurface object. (More specifically, it must be called on the thread
     * with the EGLContext that contains the GL texture object used by SurfaceTexture.)
     */
    fun awaitNewImage() {
        synchronized(frameSyncObject) {
            while (!frameAvailable) {
                try {
                    // Wait for onFrameAvailable() to signal us.  Use a timeout to avoid
                    // stalling the test if it doesn't arrive.
                    frameSyncObject.wait(TIMEOUT_MS)
                    if (!frameAvailable) {
                        // TODO: if "spurious wakeup", continue while loop
                        throw RuntimeException("frame wait timed out")
                    }
                } catch (ie: InterruptedException) {
                    // shouldn't happen
                    throw RuntimeException(ie)
                }
            }
            frameAvailable = false
        }
        // Latch the data.
        checkGlError("before updateTexImage")
        surfaceTexture?.updateTexImage()
    }
    /**
     * Draws the data from SurfaceTexture onto the current EGL surface.
     */
    fun drawImage() {
        textureRender!!.drawFrame()
    }
    fun getImage(): Bitmap {
        // OpenGL draws textures upside down if not changed in the shader.
        // glReadPixels reads from lower left corner
        // So, the image turn into correct side naturally
        pixelBuf!!.rewind()
        GLES20.glReadPixels(
            0,
            0,
            width,
            height,
            GLES20.GL_RGBA,
            GLES20.GL_UNSIGNED_BYTE,
            pixelBuf
        )
        pixelBuf!!.rewind()
        val bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.ARGB_8888)
        bitmap.copyPixelsFromBuffer(pixelBuf)
        return bitmap
    }
    /**
     * Prepares EGL.  We want a GLES 2.0 context and a surface that supports pbuffer.
     */
    private fun eglSetup() {
        // Obtains the EGL display connection to the native display
        eglDisplay = EGL14.eglGetDisplay(EGL14.EGL_DEFAULT_DISPLAY)
        if (eglDisplay == EGL14.EGL_NO_DISPLAY) {
            throw RuntimeException("Unable to get EGL14 display")
        }
        // Initialize an EGL display connection
        val version = IntArray(2)
        if (!EGL14.eglInitialize(eglDisplay, version, 0, version, 1)) {
            eglDisplay = null
            throw RuntimeException("Unable to initialize EGL14")
        }
        // Configure EGL for pbuffer and OpenGL ES 2.0, 24-bit RGB.
        val attribList = intArrayOf(
            EGL14.EGL_SURFACE_TYPE, EGL14.EGL_PBUFFER_BIT,
            EGL14.EGL_RENDERABLE_TYPE, EGL14.EGL_OPENGL_ES2_BIT,
            EGL14.EGL_ALPHA_SIZE, 8,
            EGL14.EGL_BLUE_SIZE, 8,
            EGL14.EGL_GREEN_SIZE, 8,
            EGL14.EGL_RED_SIZE, 8,
            EGL14.EGL_NONE
        )
        val configs = arrayOfNulls<EGLConfig>(1)
        val numConfigs = IntArray(1)
        // Return a list of frame buffer configurations that match specified attributes
        if (!EGL14.eglChooseConfig(
                eglDisplay,
                attribList,
                0,
                configs,
                0,
                configs.size,
                numConfigs,
                0
            )
        ) {
            throw RuntimeException("Unable to find RGB888+recordable ES2 EGL config")
        }
        // Configure context for OpenGL ES 2.0.
        eglContext = EGL14.eglCreateContext(
            eglDisplay,
            configs[0],
            EGL14.EGL_NO_CONTEXT,
            intArrayOf(
                EGL14.EGL_CONTEXT_CLIENT_VERSION, 2,
                EGL14.EGL_NONE
            ),
            0
        )
        checkEglError("eglCreateContext")
        if (eglContext == null) {
            throw RuntimeException("null context")
        }
        // Create a pbuffer surface.
        val surfaceAttribs = intArrayOf(
            EGL14.EGL_WIDTH, width,
            EGL14.EGL_HEIGHT, height,
            EGL14.EGL_NONE
        )
        eglSurface = EGL14.eglCreatePbufferSurface(
            eglDisplay,
            configs[0],
            surfaceAttribs,
            0
        )
        checkEglError("eglCreatePbufferSurface")
        if (eglSurface == null) {
            throw RuntimeException("surface was null")
        }
    }
    /**
     * Creates interconnected instances of TextureRender, SurfaceTexture, and Surface.
     */
    private fun setup() {
        val renderer = FrameTextureRenderer()
        val texture = SurfaceTexture(renderer.textureId)
        texture.setOnFrameAvailableListener(onFrameAvailable)
        textureRender = renderer
        surfaceTexture = texture
        surface = Surface(texture)
        pixelBuf = ByteBuffer
            .allocateDirect(width * height * 4)
            .order(ByteOrder.nativeOrder())
    }
    /**
     * Discard all resources held by this class, notably the EGL context.
     */
    fun release() {
        if (eglDisplay != EGL14.EGL_NO_DISPLAY) {
            EGL14.eglDestroySurface(eglDisplay, eglSurface)
            EGL14.eglDestroyContext(eglDisplay, eglContext)
            EGL14.eglReleaseThread()
            EGL14.eglTerminate(eglDisplay)
        }
        eglDisplay = EGL14.EGL_NO_DISPLAY
        eglContext = EGL14.EGL_NO_CONTEXT
        eglSurface = EGL14.EGL_NO_SURFACE
        surface?.release()
        textureRender = null
        surface = null
        surfaceTexture = null
    }
    /**
     * Makes our EGL context and surface current.
     */
    private fun makeCurrent() {
        if (!EGL14.eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) {
            throw RuntimeException("eglMakeCurrent failed")
        }
    }
}
class FrameTextureRenderer {
    companion object {
        private const val FLOAT_SIZE_BYTES = 4
        private const val TRIANGLE_VERTICES_DATA_STRIDE_BYTES = 5 * FLOAT_SIZE_BYTES
        private const val TRIANGLE_VERTICES_DATA_POS_OFFSET = 0
        private const val TRIANGLE_VERTICES_DATA_UV_OFFSET = 3
        private val triangleVerticesData = floatArrayOf(
            // X, Y, Z, U, V
            -1f, -1f, 0f, 0f, 0f,
            1f, -1f, 0f, 1f, 0f,
            -1f, 1f, 0f, 0f, 1f,
            1f, 1f, 0f, 1f, 1f
        )
        private const val VERTEX_SHADER = """
                attribute vec4 aPosition;
                attribute vec2 aTextureCoord;
                varying vec2 vTextureCoord;
                
                void main() {
                    gl_Position = aPosition;
                    vTextureCoord = aTextureCoord;
                }
                """
        private const val FRAGMENT_SHADER = """
                #extension GL_OES_EGL_image_external : require
                precision mediump float;
                varying vec2 vTextureCoord;
                uniform samplerExternalOES sTexture;
                
                void main() {
                    gl_FragColor = texture2D(sTexture, vTextureCoord);
                }
                """
    }
    private val triangleVertices = ByteBuffer
        .allocateDirect(triangleVerticesData.size * FLOAT_SIZE_BYTES)
        .order(ByteOrder.nativeOrder())
        .asFloatBuffer()
        .put(triangleVerticesData)
    private var program = 0
    private var positionHandle = -1
    private var textureCoordHandle = -1
    var textureId = -1
        private set
    init {
        initProgram()
    }
    fun drawFrame() {
        checkGlError("onDrawFrame start")
        // Specify clear values for the color buffers
        GLES20.glClearColor(0f, 0f, 0f, 1f)
        // Clear buffers to preset values
        // GL_COLOR_BUFFER_BIT - Indicates the buffers currently enabled for color writing
        GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT)
        // Install a program object as part of current rendering state
        GLES20.glUseProgram(program)
        checkGlError("glUseProgram")
        GLES20.glActiveTexture(GLES20.GL_TEXTURE0)
        GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, textureId)
        triangleVertices.position(TRIANGLE_VERTICES_DATA_POS_OFFSET)
        GLES20.glVertexAttribPointer(
            positionHandle,
            3,
            GLES20.GL_FLOAT,
            false,
            TRIANGLE_VERTICES_DATA_STRIDE_BYTES,
            triangleVertices
        )
        checkGlError("glVertexAttribPointer aPosition")
        GLES20.glEnableVertexAttribArray(positionHandle)
        checkGlError("glEnableVertexAttribArray positionHandle")
        triangleVertices.position(TRIANGLE_VERTICES_DATA_UV_OFFSET)
        GLES20.glVertexAttribPointer(
            textureCoordHandle,
            2,
            GLES20.GL_FLOAT,
            false,
            TRIANGLE_VERTICES_DATA_STRIDE_BYTES,
            triangleVertices
        )
        checkGlError("glVertexAttribPointer maTextureHandle")
        GLES20.glEnableVertexAttribArray(textureCoordHandle)
        checkGlError("glEnableVertexAttribArray maTextureHandle")
        GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4)
        checkGlError("glDrawArrays")
        GLES20.glBindTexture(GLES11Ext.GL_TEXTURE_EXTERNAL_OES, 0)
    }
    private fun initProgram() {
        program = createProgram()
        if (program == 0) {
            throw RuntimeException("failed creating program")
        }
        positionHandle = GLES20.glGetAttribLocation(program, "aPosition")
        checkLocation(positionHandle, "aPosition")
        textureCoordHandle = GLES20.glGetAttribLocation(program, "aTextureCoord")
        checkLocation(textureCoordHandle, "aTextureCoord")
        val textures = IntArray(1)
        GLES20.glGenTextures(1, textures, 0)
        textureId = textures[0]
        GLES20.glTexParameteri(
            GLES11Ext.GL_TEXTURE_EXTERNAL_OES,
            GLES20.GL_TEXTURE_MIN_FILTER,
            GLES20.GL_NEAREST
        )
        GLES20.glTexParameteri(
            GLES11Ext.GL_TEXTURE_EXTERNAL_OES,
            GLES20.GL_TEXTURE_MAG_FILTER,
            GLES20.GL_NEAREST
        )
        GLES20.glTexParameteri(
            GLES11Ext.GL_TEXTURE_EXTERNAL_OES,
            GLES20.GL_TEXTURE_WRAP_S,
            GLES20.GL_CLAMP_TO_EDGE
        )
        GLES20.glTexParameteri(
            GLES11Ext.GL_TEXTURE_EXTERNAL_OES,
            GLES20.GL_TEXTURE_WRAP_T,
            GLES20.GL_CLAMP_TO_EDGE
        )
        checkGlError("glTexParameter")
    }
    private fun createProgram(): Int {
        val vertexShader = loadShader(GLES20.GL_VERTEX_SHADER, VERTEX_SHADER)
        if (vertexShader == 0) {
            return 0
        }
        val pixelShader = loadShader(GLES20.GL_FRAGMENT_SHADER, FRAGMENT_SHADER)
        if (pixelShader == 0) {
            return 0
        }
        val program = GLES20.glCreateProgram()
        if (program == 0) {
            throw RuntimeException("Could not create the program")
        }
        GLES20.glAttachShader(program, vertexShader)
        checkGlError("glAttachShader")
        GLES20.glAttachShader(program, pixelShader)
        checkGlError("glAttachShader")
        GLES20.glLinkProgram(program)
        val linkStatus = IntArray(1)
        GLES20.glGetProgramiv(program, GLES20.GL_LINK_STATUS, linkStatus, 0)
        if (linkStatus[0] != GLES20.GL_TRUE) {
            GLES20.glDeleteProgram(program)
            throw RuntimeException("Could not link the program")
        }
        return program
    }
    private fun loadShader(shaderType: Int, source: String): Int {
        val shader = GLES20.glCreateShader(shaderType)
        checkGlError("glCreateShader type=$shaderType")
        GLES20.glShaderSource(shader, source)
        GLES20.glCompileShader(shader)
        val compiled = IntArray(1)
        GLES20.glGetShaderiv(shader, GLES20.GL_COMPILE_STATUS, compiled, 0)
        if (compiled[0] == GL10.GL_FALSE) {
            GLES20.glDeleteShader(shader)
            throw RuntimeException("Failed to compile shader")
        }
        return shader
    }
}
In this scenario, the EGL surface size is similar to the frame. To modify the output frame size, create an EGLSurface with different width and height and introduce a transformation matrix in the vertex shader to map the texture onto the output surface. Additionally, by changing GL_TEXTURE_MAG_FILTER to GL_LINEAR, you can achieve a smoother texture drawing.
References