Source code for nlpir

# coding : utf-8
import os
import typing
import re
import logging
import sys
import functools
from .exception import NLPIRException

__version__ = "0.9.3"
PACKAGE_DIR = os.path.abspath(os.path.dirname(__file__))
logger = logging.getLogger("nlpir")


[docs]def clean_logs(data_path: typing.Optional[str] = None, include_current: bool = False): """ Clean logs :param data_path: the cus :param include_current: include current directory or not """ if data_path is None: data_path = os.path.join(PACKAGE_DIR, "Data") delete_list = [data_path] if include_current: delete_list.append(os.path.abspath("./")) delete_file_list = [] for path in delete_list: for filename in os.listdir(path): if re.match(r'\d{8}\.log|err', filename): delete_file_list.append(os.path.abspath(os.path.join(path, filename))) logger.info("The following file will be deleted: \n\t{}".format("\n\t".join(delete_file_list))) for f in delete_file_list: try: os.remove(f) except OSError as e: logger.error(e)
[docs]def get_instance(func: callable) -> callable: """ A wrapper to init instance when call the function 直接使用单层装饰器时,此装饰器会在import module的时候直接被调用, 生成对应的函数,导致对应的类实例过早初始化. 为了让类实例真正在函数调用时才初始化,使用下面的 :func:`functions.warps`, 此方法在直接import的时候不会被调用(因为初始化时仅仅为函数没有函数参数),故使用 这种方式. 让函数在真正执行时才进行类实例初始化的原因是为了使 :func:`init_setting` 可以被使用, 类似于 :func:logging.basicConfig 方法,可以在import对应module后可以有一次修改初始化 参数的可能. """ @functools.wraps(func) def wraps(*args, **kwargs): """ """ module = sys.modules[func.__module__] module = init_setting(module) if module.__instance__ is None else module module.__instance__ = module.__cls__( encode=module.__nlpir_encode__, lib_path=module.__lib__, data_path=module.__data__, license_code=module.__license_code__ ) if module.__instance__ is None else module.__instance__ return func(*args, **kwargs) return wraps
[docs]def init_setting( init_module, encode: typing.Optional[int] = None, lib_path: typing.Optional[int] = None, data_path: typing.Optional[str] = None, license_code: str = '' ): """ Init the NLPIR module for custom usage. **Only can init it , before call any process function in that module** :param ModuleType init_module: The high-level module want to use :param int encode: same as in :class:`nlpir.native.nlpir_base.NLPIRBase()` :param str lib_path: same as in :class:`nlpir.native.nlpir_base.NLPIRBase()` :param str data_path: same as in :class:`nlpir.native.nlpir_base.NLPIRBase()` :param str license_code: same as in :class:`nlpir.native.nlpir_base.NLPIRBase()` :raise: NLPIRException :return: init module """ if init_module.__instance__ is not None: raise NLPIRException("Already have a instance can not change the setting") init_module.__nlpir_encode__ = encode if encode is not None else init_module.__nlpir_encode__ init_module.__lib__ = lib_path if lib_path is not None else init_module.__lib__ init_module.__data__ = data_path if data_path is not None else init_module.__data__ init_module.__license_code__ = license_code if license_code is not None else init_module.__license_code__ init_module.__cls__( encode=init_module.__nlpir_encode__, lib_path=init_module.__lib__, data_path=init_module.__data__, license_code=init_module.__license_code__ ) return init_module
[docs]def import_dict(word_list: list, instance) -> list: """ Temporary add word as dictionary, will loss it when restart the Program. Can use :func:`save_user_dict` to make persistence, :func:`clean_user_dict` to delete all temporary words or :func:`delete_user_word` to delete part of them. The persistent dict cannot be clean by using method above. :func:`clean_saved_user_dict` will be used in this situation. But it will delete all user dict include saved dict in the past. Every word in `word_list` can be a single word and the POS will be `n`. The custom POS can be added as `word pos` in `word_list`. :param instance: instance to execute the function :param word_list: list of words want to add to NLPIR :return: the word fail to add to the NLPIR """ if not hasattr(instance, "add_user_word"): raise NLPIRException("This instance not support this method") fail_list = list() for word in word_list: if 0 != instance.add_user_word(word): fail_list.append(word_list) return fail_list
[docs]def clean_user_dict(instance) -> bool: """ Clean all temporary dictionary, more information shows in :func:`import_dict` :param instance: instance to execute the function :return: success or not """ if not hasattr(instance, "clean_user_word"): raise NLPIRException("This instance not support this method") return instance.clean_user_word() == 0
[docs]def delete_user_word(word_list: list, instance): """ Delete words in temporary dictionary, more information shows in :func:`import_dict` :param instance: instance to execute the function :param word_list: list of words want to delete """ if not hasattr(instance, "del_usr_word"): raise NLPIRException("This instance not support this method") for word in word_list: instance.del_usr_word(word)
[docs]def save_user_dict(instance) -> bool: """ Save temporary dictionary to Data, more information shows in :func:`import_dict` :param instance: instance to execute the function :return: Success or not """ if not hasattr(instance, "save_the_usr_dic"): raise NLPIRException("This instance not support this method") return 1 == instance.save_the_usr_dic()
[docs]def clean_saved_user_dict(): """ Delete user dict from disk, which is : 1. ``Data/FieldDict.pdat`` 2. ``Data/FieldDict.pos`` 3. ``Data/FieldDict.wordlist`` 4. ``Data/UserDefinedDict.lst`` :return: Delete success or not """ try: # for ictclas with open(os.path.join(PACKAGE_DIR, "Data/FieldDict.pdat"), 'w') as f: f.write("") with open(os.path.join(PACKAGE_DIR, "Data/FieldDict.pos"), 'w') as f: f.write("") with open(os.path.join(PACKAGE_DIR, "Data/FieldDict.wordlist"), 'w') as f: f.write("") with open(os.path.join(PACKAGE_DIR, "Data/UserDefinedDict.lst"), 'w') as f: f.write("") # for key_extract with open(os.path.join(PACKAGE_DIR, "Data/UserDict.pdat"), 'w') as f: f.write("") return True except OSError: return False
# noinspection PyTypeChecker
[docs]def import_blacklist(instance, filename: str, pos_blacklist=typing.List[str]) -> bool: """ Import Blacklist to system This function will permanently import blacklist words to system not to the memory . If you want to delete the blacklist words, you should run :func:`clean_blacklist` to delete blacklist form system . 此函数将会把词永久性保存在NLPIR中,和保存用户词典类似.这里删除使用的是 :func:`clean_blacklist` . 停用词表,Format of stop word:: word1 n1 word2 n2 word3 n3 若 `pos_blacklist` 为: ``['n1', 'n2']`` 则 `word1`, `word2` 将会进入屏蔽列表 If `pos_blacklist` : ``['n1', 'n2']`` Then `word1`, `word2` will be in the blacklist :param instance: instance to execute the function :param filename: A word list that the words want to import to the blacklist (stop word list), 一个停用词词表,里面为想进行屏蔽的词,也可以包括别的词,是否不进行抽取是按照词表中的词性来确定的. :param pos_blacklist: A list of pos that want to block in the system, 想要屏蔽的词的词性 :return: 是否成功导入 """ if not hasattr(instance, "import_key_blacklist"): raise NLPIRException("This instance not support this method") try: os.rename( os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat"), os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat.bak") ) except OSError: pass return_result = instance.import_key_blacklist( filename=filename, pos_blacklist="#".join(pos_blacklist) ) if return_result > 0: return True else: clean_blacklist() return False
def __rename__(src, dst): if os.path.isfile(dst): os.remove(dst) os.rename(src, dst)
[docs]def clean_blacklist() -> bool: """ 清除黑名单词表, 会将对应的文件进行重命名, 之后可以通过 :func:`recover_blacklist` 进行恢复,但仅可以进行一次,若重复调用本函数则恢复函数不起作用 :return: clean success or not """ black_dir = os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat") black_dir_bak = os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat.bak") try: __rename__(black_dir, black_dir_bak) return True except OSError: return False
[docs]def recover_blacklist() -> bool: """ 恢复黑名单词表,仅在被重命名的词表存在时才起作用 :return: """ black_dir = os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat") black_dir_bak = os.path.join(PACKAGE_DIR, "Data/KeyBlackList.pdat.bak") try: __rename__(black_dir_bak, black_dir) return True except OSError: return False