评论情感分类数据处理

# 评论情感分类数据处理
# labels: 0负面、1中性、2正面
class CommentProcessor(DataProcessor):
    def get_train_examples(self, data_dir):
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir,"train.tsv")),"train"
        )
    def get_dev_examples(self, data_dir):
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev"
        )
    def get_test_examples(self, data_dir):
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "test.tsv")), "test"
        )
    def get_labels(self):
        return ["0","1","2"]
    def _create_examples(self,lines,set_type):
        examples=[]
        for (i ,line) in enumerate(lines):
            if i == 0:
                continue
            guid = "%s-%s" % (set_type,i)
            try:
                text_a = tokenization.convert_to_unicode(line[1])
            except:
                continue
            if set_type == "test":
                label = "0"
            else:
                label = tokenization.convert_to_unicode(line[0])
            examples.append(
                InputExample(guid=guid,text_a = text_a, label=label)
            )
        return examples

将Bert模型ckpt文件转为 tfserving部署所需的pb

#!/usr/bin/python3.6
'''
BERT模型ckpt文件转为部署tfserving所需的文件
'''
import json
import os
from enum import Enum
import sys
import modeling
from termcolor import colored
import logging
import tensorflow as tf
import argparse
import pickle

tf.app.flags.DEFINE_string('export_model_dir', "./output/comment_0/versions", 'Directory where the model exported files should be placed.')
tf.app.flags.DEFINE_integer('model_version', 10001, 'Models version number.')
FLAGS = tf.app.flags.FLAGS

def create_classification_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels):
    """
    :param bert_config:
    :param is_training:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param num_labels:
    :param use_one_hot_embedding:
    :return:
    """

    #import tensorflow as tf
    #import modeling

    # 通过传入的训练数据,进行representation
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
    )

    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        if labels is not None:
            one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

            per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
            loss = tf.reduce_mean(per_example_loss)
        else:
            loss, per_example_loss = None, None
    return (loss, per_example_loss, logits, probabilities)

def main(max_seq_len, model_dir, num_labels):

    with tf.Session() as sess:
        #输入占位符
        input_ids = tf.placeholder(tf.int32, (None, max_seq_len), 'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, max_seq_len), 'input_mask')
        #模型前向传播
        bert_config = modeling.BertConfig.from_json_file('./uncased_L-2_H-128_A-2/bert_config.json')
        loss, per_example_loss, logits, probabilities = create_classification_model(bert_config=bert_config, is_training=False,
            input_ids=input_ids, input_mask=input_mask, segment_ids=None, labels=None, num_labels=num_labels)
        #转换结果格式
        logits = tf.argmax(logits, 1)
        probabilities = tf.identity(probabilities, 'pred_prob')
        #模型保存的对象
        saver = tf.train.Saver()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        latest_checkpoint = tf.train.latest_checkpoint(model_dir)
        saver.restore(sess,latest_checkpoint )
        # Create SavedModelBuilder class
        # defines where the model will be exported
        export_path_base = FLAGS.export_model_dir
        export_path = os.path.join(
            tf.compat.as_bytes(export_path_base),
            tf.compat.as_bytes(str(FLAGS.model_version)))
        print('Exporting trained model to', export_path)
        builder = tf.saved_model.builder.SavedModelBuilder(export_path)
        # Creates the TensorInfo protobuf objects that encapsulates the input/output tensors
        input_ids_tensor = tf.saved_model.utils.build_tensor_info(input_ids)
        input_mask_tensor = tf.saved_model.utils.build_tensor_info(input_mask)
        # output tensor info
        logits_output = tf.saved_model.utils.build_tensor_info(logits)
        print("logits_output")
        print(logits_output)
        probabilities_output = tf.saved_model.utils.build_tensor_info(probabilities)
        print("probabilities_output")
        print(probabilities_output)

        # Defines the DeepLab signatures, uses the TF Predict API
        # It receives an image and its dimensions and output the segmentation mask
        labels_map = ['0','1','2']
        prediction_signature = (
            tf.saved_model.signature_def_utils.build_signature_def(
                inputs={'input_ids': input_ids_tensor, 'input_mask': input_mask_tensor},
                outputs={'pred_label': logits_output , 'score':probabilities_output},
                method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME))

        builder.add_meta_graph_and_variables(
            sess, [tf.saved_model.tag_constants.SERVING],
            signature_def_map={
                'result':
                    prediction_signature,
            })
        # export the model
        builder.save(as_text=True)
        print('Done exporting!')

if __name__ == '__main__':
    max_seq_len = 128
    num_labels = 3
    model_dir = './output/comment_0'
    main(max_seq_len, model_dir, num_labels)



Artificial Intelligence   Machine Learning   Algorithm   NLP   Bert      Machine Learning Algorithm NLP Bert Tensorflow

本博客所有文章除特别声明外,均采用 CC BY-SA 3.0协议 。转载请注明出处!