#! coding=utf-8
"""
high-level toolbox for Chinese Key-word Extraction
"""
# pylint: disable=duplicate-code
from nlpir import get_instance as __get_instance__
from nlpir import native
import typing
import nlpir
import json
# class and class instance
__cls__ = native.key_extract.KeyExtract
__instance__: typing.Optional[native.key_extract.KeyExtract] = None
# Location of DLL
__lib__ = None
# Data directory
__data__ = None
# license_code
__license_code__ = None
# encode
__nlpir_encode__ = native.UTF8_CODE
[docs]@__get_instance__
def get_native_instance() -> native.key_extract.KeyExtract:
"""
返回原生NLPIR接口,使用更多函数
:return: The singleton instance
"""
return __instance__
[docs]@__get_instance__
def import_dict(word_list: list) -> list:
"""
See :func:`nlpir.import_dict`
:param word_list: list of words want to add to NLPIR
:return: the word fail to add to the NLPIR
"""
return nlpir.import_dict(word_list=word_list, instance=__instance__)
[docs]@__get_instance__
def clean_user_dict() -> bool:
"""
See :func:`nlpir.clean_user_dict`
:return: success or not
"""
return nlpir.clean_user_dict(instance=__instance__)
[docs]@__get_instance__
def clean_temp_user_dict() -> bool:
"""
See :func:`nlpir.clean_temp_user_dict`
:return: success or not
"""
return nlpir.clean_temp_user_dict(instance=__instance__)
[docs]@__get_instance__
def delete_user_word(word_list: list):
"""
See :func:`nlpir.delete_user_word`
:param word_list: list of words want to delete
"""
return nlpir.delete_user_word(word_list=word_list, instance=__instance__)
[docs]@__get_instance__
def save_user_dict() -> bool:
"""
See :func:`nlpir.save_user_dict`
:return: Success or not
"""
return nlpir.save_user_dict(instance=__instance__)
[docs]@__get_instance__
def clean_saved_user_dict():
"""
See :func:`nlpir.clean_saved_user_dict`
:return: Delete success or not
"""
return nlpir.clean_saved_user_dict()
[docs]@__get_instance__
def import_blacklist(filename: str, pos_blacklist=typing.List[str]) -> bool:
"""
Import Blacklist to system, see :func:`nlpir.import_blacklist`
"""
return nlpir.import_blacklist(__instance__, filename, pos_blacklist)
[docs]@__get_instance__
def clean_blacklist() -> bool:
"""
清除黑名单词表, see :func:`nlpir.clean_blacklist`
:return: clean success or not
"""
return nlpir.clean_blacklist()
[docs]@__get_instance__
def recover_blacklist() -> bool:
"""
恢复黑名单词表,仅在被重命名的词表存在时才起作用, see :func:`nlpir.recover_blacklist`
:return:
"""
return nlpir.recover_blacklist()
[docs]@__get_instance__
def get_key_words(text: str, max_key: int = 50) -> typing.List[dict]:
"""
获取文本对应的关键词,以及对应的权值,词性,词频等信息
Get keyword from text with weight, frequent and pos
:param text:
:param max_key: max number keyword want to get
:return: a list of keywords with weight, example:
::
[
{
'freq': 2,
'pos': 'n_new',
'weight': 7.771335980376418,
'word': '国家权力'
},{
'freq': 7,
'pos': 'n',
'weight': 7.438759706600493,
'word': '权力'
},{
'freq': 1,
'pos': 'nrf',
'weight': 5.280000338096665,
'word': '孟德斯鸠'
},{ ...
}, ...
]
"""
result = __instance__.get_keywords(line=text, max_key_limit=max_key, format_opt=native.OUTPUT_FORMAT_JSON)
try:
result = json.loads(result)
if result is not None:
return result
else:
return []
except json.decoder.JSONDecodeError:
return []