Source code for nlpir.text_similarity

#! coding=utf-8
"""
high-level toolbox for Summarization
"""
from nlpir import get_instance as __get_instance__
from nlpir import native
from nlpir.native.text_similarity import SIM_MODEL_WORD, SIM_MODEL_CHAR, SIM_MODEL_KEY
import typing

# class and class instance
__cls__ = native.text_similarity.TextSimilarity
__instance__: typing.Optional[native.TextSimilarity] = None
# Location of DLL
__lib__ = None
# Data directory
__data__ = None
# license_code
__license_code__ = None
# encode
__nlpir_encode__ = native.UTF8_CODE

__all__ = [
    "get_native_instance",
    "similarity",
    "SIM_MODEL_WORD",
    "SIM_MODEL_KEY",
    "SIM_MODEL_CHAR"
]


[docs]@__get_instance__ def get_native_instance() -> native.TextSimilarity: """ 返回原生NLPIR接口,使用更多函数 :return: The singleton instance """ return __instance__
[docs]@__get_instance__ def similarity(text_1: str, text_2: str, model=SIM_MODEL_WORD) -> float: """ compute text similarity, there are three models: - :data:`SIM_MODEL_WORD` 词模型,速度适中,常规适用于正常规范的长文档 - :data:`IM_MODEL_CHAR` 字模型,速度最快,适用于相对规范的短文本 - :data:`SIM_MODEL_KEY` 主题词模型,速度最慢,考虑语义最多,适合于复杂文本 :param text_1: :param text_2: :param model: :return: """ return __instance__.compute_sim( text_1=text_1, text_2=text_2, model=model, )