The text data is organized as vector with 20,000 elements, like [2, 1, 0, 0, 5, ...., 0]. i-th element indicates the frequency of the i-th word in a text.
The ground truth label data is also represented as vector with 4,000 elements, like [0, 0, 1, 0, 1, ...., 0]. i-th element indicates whether the i-th label is a positive label for a text. The number of labels for a text differs depending on texts.
I have a code for single-label text classification.
How can I edit the following code for multilabel text classification?
Especially, I would like to know following points.
- How to compute accuracy using TensorFlow.
- How to set a threshold which judges whether a label is positive or negative. For instance, if the output is [0.80, 0.43, 0.21, 0.01, 0.32] and the ground truth is [1, 1, 0, 0, 1], the labels with scores over 0.25 should be judged as positive.
Thank you.
import tensorflow as tf
# hidden Layer
class HiddenLayer(object):
    def __init__(self, input, n_in, n_out):
        self.input = input
        w_h = tf.Variable(tf.random_normal([n_in, n_out],mean = 0.0,stddev = 0.05))
        b_h = tf.Variable(tf.zeros([n_out]))
        self.w = w_h
        self.b = b_h
        self.params = [self.w, self.b]
    def output(self):
        linarg = tf.matmul(self.input, self.w) + self.b
        self.output = tf.nn.relu(linarg)
        return self.output
# output Layer
class OutputLayer(object):
    def __init__(self, input, n_in, n_out):
        self.input = input
        w_o = tf.Variable(tf.random_normal([n_in, n_out], mean = 0.0, stddev = 0.05))
        b_o = tf.Variable(tf.zeros([n_out]))
        self.w = w_o
        self.b = b_o
        self.params = [self.w, self.b]
    def output(self):
        linarg = tf.matmul(self.input, self.w) + self.b
        self.output = tf.nn.relu(linarg)
        return self.output
# model
def model():
    h_layer = HiddenLayer(input = x, n_in = 20000, n_out = 1000)
    o_layer = OutputLayer(input = h_layer.output(), n_in = 1000, n_out = 4000)
    # loss function
    out = o_layer.output()
    cross_entropy = -tf.reduce_sum(y_*tf.log(out + 1e-9), name='xentropy')    
    # regularization
    l2 = (tf.nn.l2_loss(h_layer.w) + tf.nn.l2_loss(o_layer.w))
    lambda_2 = 0.01
    # compute loss
    loss = cross_entropy + lambda_2 * l2
    # compute accuracy for single label classification task
    correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"))
    return loss, accuracy
 
     
    