(資料圖片僅供參考)
Neo4j的數據庫構建完成后,現在就是要實現醫療知識的解答功能了。因為是初版,這里的問題解答不會涉及深度學習,目前只是一個條件查詢的過程。而這個過程包括對問題的關鍵詞拆解分類,然后提取詞語和類型去圖數據庫查詢,最后就是根據查詢結果和問題類型組裝語言完成回答,那么以下就是完成這個過程的全部代碼流程了。
這里所需的環境除了前面提到的外,還需要ahocorasick庫,用于從問題中提取關鍵詞。另一個是colorama,用于給輸出面板文字美化的庫。
from colorama import init,Fore,Style,Backfrom classifier import Classifierfrom parse import Parsefrom answer import Answerclass ChatRobot: def __init__(self): init(autoreset=True) print("====================================") print(Back.BLUE+"歡迎進入智慧醫療問答面板!") print("====================================") def main(self, question): print("") default_answer = "您好,小北知識有限,暫時回答不上來,正在努力迭代中!" final_classify = Classifier().classify(question) parse_sql = Parse().main(final_classify) final_answer = Answer().main(parse_sql) if not final_answer: return default_answer return "\n\n".join(final_answer)if __name__ == "__main__": robot = ChatRobot() while 1: print(" ") question = input("您問:") if "關閉" in question: print("") print("小北說:", "好的,已經關閉了哦,歡迎您下次提問~") break; answer = robot.main(question) print(Fore.LIGHTRED_EX+"小北答:", Fore.GREEN + answer)
import ahocorasickclass Classifier: def __init__(self): # print("開始初始化:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) self.checks_wds = [i.strip() for i in open("dict/checks.txt", encoding="utf-8", mode="r") if i.strip()] self.departments_wds = [i.strip() for i in open("dict/departments.txt", encoding="utf-8", mode="r") if i.strip()] self.diseases_wds = [i.strip() for i in open("dict/diseases.txt", encoding="utf-8", mode="r") if i.strip()] self.drugs_wds = [i.strip() for i in open("dict/drugs.txt", encoding="utf-8", mode="r") if i.strip()] self.foods_wds = [i.strip() for i in open("dict/foods.txt", encoding="utf-8", mode="r") if i.strip()] self.producers_wds = [i.strip() for i in open("dict/producers.txt", encoding="utf-8", mode="r") if i.strip()] self.symptoms_wds = [i.strip() for i in open("dict/symptoms.txt", encoding="utf-8", mode="r") if i.strip()] self.features_wds = set(self.checks_wds+self.departments_wds+self.diseases_wds+self.drugs_wds+self.foods_wds+self.producers_wds+self.symptoms_wds) self.deny_words = [name.strip() for name in open("dict/deny.txt", encoding="utf-8", mode="r") if name.strip()] # actree 從輸入文本中提取出指定分詞表中的詞 self.actree = self.build_actree(list(self.features_wds)) # 給每個詞創建類型詞典(相當慢的操作) self.wds_dict = self.build_words_dict() # print("給每個詞創建類型詞典結束:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) # 問句疑問詞 self.symptom_qwds = ["癥狀", "表征", "現象", "癥候", "表現"] self.cause_qwds = ["原因", "成因", "為什么", "怎么會", "怎樣才", "咋樣才", "怎樣會", "如何會", "為啥", "為何", "如何才會", "怎么才會", "會導致", "會造成"] self.acompany_qwds = ["并發癥", "并發", "一起發生", "一并發生", "一起出現", "一并出現", "一同發生", "一同出現", "伴隨發生", "伴隨", "共現"] self.food_qwds = ["飲食", "飲用", "吃", "食", "伙食", "膳食", "喝", "菜", "忌口", "補品", "保健品", "食譜", "菜譜", "食用", "食物", "補品"] self.drug_qwds = ["藥", "藥品", "用藥", "膠囊", "口服液", "炎片"] self.prevent_qwds = ["預防", "防范", "抵制", "抵御", "防止", "躲避", "逃避", "避開", "免得", "逃開", "避開", "避掉", "躲開", "躲掉", "繞開", "怎樣才能不", "怎么才能不", "咋樣才能不", "咋才能不", "如何才能不", "怎樣才不", "怎么才不", "咋樣才不", "咋才不", "如何才不", "怎樣才可以不", "怎么才可以不", "咋樣才可以不", "咋才可以不", "如何可以不", "怎樣才可不", "怎么才可不", "咋樣才可不", "咋才可不", "如何可不"] self.lasttime_qwds = ["周期", "多久", "多長時間", "多少時間", "幾天", "幾年", "多少天", "多少小時", "幾個小時", "多少年"] self.cureway_qwds = ["怎么治療", "如何醫治", "怎么醫治", "怎么治", "怎么醫", "如何治", "醫治方式", "療法", "咋治", "怎么辦", "咋辦", "咋治"] self.cureprob_qwds = ["多大概率能治好", "多大幾率能治好", "治好希望大么", "幾率", "幾成", "比例", "可能性", "能治", "可治", "可以治", "可以醫"] self.easyget_qwds = ["易感人群", "容易感染", "易發人群", "什么人", "哪些人", "感染", "染上", "得上"] self.check_qwds = ["檢查", "檢查項目", "查出", "檢查", "測出", "試出"] self.belong_qwds = ["屬于什么科", "屬于", "什么科", "科室"] self.cure_qwds = ["治療什么", "治啥", "治療啥", "醫治啥", "治愈啥", "主治啥", "主治什么", "有什么用", "有何用", "用處", "用途", "有什么好處", "有什么益處", "有何益處", "用來", "用來做啥", "用來作甚", "需要", "要"] """構造actree,加速過濾""" def build_actree(self, wordlist): actree = ahocorasick.Automaton() for index, word in enumerate(wordlist): actree.add_word(word, (index, word)) actree.make_automaton() return actree # 構建特征詞屬性 def build_words_dict(self): words_dict = {} check_words = set(self.checks_wds) department_words = set(self.departments_wds) disease_words = set(self.diseases_wds) drug_words = set(self.drugs_wds) food_words = set(self.foods_wds) producer_words = set(self.producers_wds) symptom_words = set(self.symptoms_wds) for word in self.features_wds: words_dict[word] = [] if word in check_words: words_dict[word].append("check") if word in department_words: words_dict[word].append("department") if word in disease_words: words_dict[word].append("disease") if word in drug_words: words_dict[word].append("drug") if word in food_words: words_dict[word].append("food") if word in producer_words: words_dict[word].append("producer") if word in symptom_words: words_dict[word].append("symptom") return words_dict # 根據輸入返回問題類型 def classify(self, sent): # 最終輸入給解析器的字典 data = {} region_words = [] lists = self.actree.iter(sent) for ii in lists: cur_word = ii[1][1] region_words.append(cur_word) # {"職業黑變病": ["diseases"], "倒睫": ["diseases", "symptom"]} final_dict = {i_name: self.wds_dict.get(i_name) for i_name in region_words} data["args"] = final_dict question_type = "other" questions_type = [] # ["diseases", "diseases", "symptom"] type = [] for i_type in final_dict.values(): type += i_type # 判斷type中是否有指定類型, 提出的問題是否包含指定的修飾詞,給問題定類型 # 1. 如提問詞是否出現狀態詞語,那就是問某種疾病會出現什么癥狀 if self.check_word_exist(self.symptom_qwds, sent) and ("disease" in type): question_type = "disease_symptom" questions_type.append(question_type) # 根據癥狀問疾病 if self.check_word_exist(self.symptom_qwds, sent) and ("symptom" in type): question_type = "symptom_disease" questions_type.append(question_type) # 原因 if self.check_word_exist(self.cause_qwds, sent) and ("disease" in type): question_type = "disease_cause" questions_type.append(question_type) # 并發癥 if self.check_word_exist(self.acompany_qwds, sent) and ("disease" in type): question_type = "disease_acompany" questions_type.append(question_type) # 推薦食品 if self.check_word_exist(self.food_qwds, sent) and "disease" in type: deny_status = self.check_word_exist(self.deny_words, sent) if deny_status: question_type = "disease_not_food" else: question_type = "disease_do_food" questions_type.append(question_type) # 已知食物找疾病 if self.check_word_exist(self.food_qwds + self.cure_qwds, sent) and "food" in type: deny_status = self.check_word_exist(self.deny_words, sent) if deny_status: question_type = "food_not_disease" else: question_type = "food_do_disease" questions_type.append(question_type) # 推薦藥品 if self.check_word_exist(self.drug_qwds, sent) and "disease" in type: question_type = "disease_drug" questions_type.append(question_type) # 藥品治啥病 if self.check_word_exist(self.cure_qwds, sent) and "drug" in type: question_type = "drug_disease" questions_type.append(question_type) # 疾病接受檢查項目 if self.check_word_exist(self.check_qwds, sent) and "disease" in type: question_type = "disease_check" questions_type.append(question_type) # 已知檢查項目查相應疾病 if self.check_word_exist(self.check_qwds + self.cure_qwds, sent) and "check" in type: question_type = "check_disease" questions_type.append(question_type) # 癥狀防御 if self.check_word_exist(self.prevent_qwds, sent) and "disease" in type: question_type = "disease_prevent" questions_type.append(question_type) # 疾病醫療周期 if self.check_word_exist(self.lasttime_qwds, sent) and "disease" in type: question_type = "disease_lasttime" questions_type.append(question_type) # 疾病治療方式 if self.check_word_exist(self.cureway_qwds, sent) and "disease" in type: question_type = "disease_cureway" questions_type.append(question_type) # 疾病治愈可能性 if self.check_word_exist(self.cureprob_qwds, sent) and "disease" in type: question_type = "disease_cureprob" questions_type.append(question_type) # 疾病易感染人群 if self.check_word_exist(self.easyget_qwds, sent) and "disease" in type: question_type = "disease_easyget" questions_type.append(question_type) # 若沒有查到相關的外部查詢信息,那么則將該疾病的描述信息返回 if questions_type == [] and "disease" in type: questions_type = ["disease_desc"] # 若沒有查到相關的外部查詢信息,那么則將該疾病的描述信息返回 if questions_type == [] and "symptom" in type: questions_type = ["symptom_disease"] # 將多個分類結果進行合并處理,組裝成一個字典 data["question_types"] = questions_type return data def check_word_exist(self, word_list, words): for item in word_list: if item in words: return True return False
class Parse: def main(self, classify): entity = classify["args"] questions_type = classify["question_types"] entity_dict = self.entity_transform(entity) sqls = [] for question in questions_type: sql_dict = {} sql_dict["qustion_type"] = question sql_dict["sql"] = [] sql = [] if question == "disease_symptom": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "symptom_disease": sql = self.sql_transfer(question, entity_dict.get("symptom")) elif question == "disease_cause": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_acompany": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_not_food": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_do_food": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "food_not_disease": sql = self.sql_transfer(question, entity_dict.get("food")) elif question == "food_do_disease": sql = self.sql_transfer(question, entity_dict.get("food")) elif question == "disease_drug": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "drug_disease": sql = self.sql_transfer(question, entity_dict.get("drug")) elif question == "disease_check": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "check_disease": sql = self.sql_transfer(question, entity_dict.get("check")) elif question == "disease_prevent": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_lasttime": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_cureway": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_cureprob": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_easyget": sql = self.sql_transfer(question, entity_dict.get("disease")) elif question == "disease_desc": sql = self.sql_transfer(question, entity_dict.get("disease")) if sql: sql_dict["sql"] = sql sqls.append(sql_dict) return sqls def sql_transfer(self, question_type, entities): # 查詢語句 sql = [] # 查詢疾病的原因 if question_type == "disease_cause": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.cause".format(i) for i in entities] # 查詢疾病的防御措施 elif question_type == "disease_prevent": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.prevent".format(i) for i in entities] # 查詢疾病的持續時間 elif question_type == "disease_lasttime": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.cure_lasttime".format(i) for i in entities] # 查詢疾病的治愈概率 elif question_type == "disease_cureprob": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.cured_prob".format(i) for i in entities] # 查詢疾病的治療方式 elif question_type == "disease_cureway": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.cure_way".format(i) for i in entities] # 查詢疾病的易發人群 elif question_type == "disease_easyget": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.easy_get".format(i) for i in entities] # 查詢疾病的相關介紹 elif question_type == "disease_desc": sql = ["MATCH (m:Diseases) where m.name = "{0}" return m.name, m.desc".format(i) for i in entities] # 查詢疾病有哪些癥狀 elif question_type == "disease_symptom": sql = [ "MATCH (m:Diseases)-[r:has_symptoms]->(n:Symptoms) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] # 查詢癥狀會導致哪些疾病 elif question_type == "symptom_disease": sql = [ "MATCH (m:Diseases)-[r:has_symptoms]->(n:Symptoms) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] # 查詢疾病的并發癥 elif question_type == "disease_acompany": sql1 = [ "MATCH (m:Diseases)-[r:acompany_with]->(n:Symptoms) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql2 = [ "MATCH (m:Diseases)-[r:acompany_with]->(n:Symptoms) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql = sql1 + sql2 # 查詢疾病的忌口 elif question_type == "disease_not_food": sql = ["MATCH (m:Diseases)-[r:not_eat]->(n:Foods) where m.name = "{0}" return m.name, r.name, n.name".format(i) for i in entities] # 查詢疾病建議吃的東西 elif question_type == "disease_do_food": sql1 = [ "MATCH (m:Diseases)-[r:do_eat]->(n:Foods) where m.name = "{0}" return m.name, r.name, n.name".format(i) for i in entities] sql2 = [ "MATCH (m:Diseases)-[r:recomment_eat]->(n:Foods) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql = sql1 + sql2 # 已知忌口查疾病 elif question_type == "food_not_disease": sql = ["MATCH (m:Diseases)-[r:not_eat]->(n:Foods) where n.name = "{0}" return m.name, r.name, n.name".format(i) for i in entities] # 已知推薦查疾病 elif question_type == "food_do_disease": sql1 = [ "MATCH (m:Diseases)-[r:do_eat]->(n:Foods) where n.name = "{0}" return m.name, r.name, n.name".format(i) for i in entities] sql2 = [ "MATCH (m:Diseases)-[r:recomment_eat]->(n:Foods) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql = sql1 + sql2 # 查詢疾病常用藥品-藥品別名記得擴充 elif question_type == "disease_drug": sql1 = [ "MATCH (m:Diseases)-[r:common_drug]->(n:Drugs) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql2 = [ "MATCH (m:Diseases)-[r:recommand_drug]->(n:Drugs) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql = sql1 + sql2 # 已知藥品查詢能夠治療的疾病 elif question_type == "drug_disease": sql1 = [ "MATCH (m:Diseases)-[r:common_drug]->(n:Drugs) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql2 = [ "MATCH (m:Diseases)-[r:recommand_drug]->(n:Drugs) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] sql = sql1 + sql2 # 查詢疾病應該進行的檢查 elif question_type == "disease_check": sql = [ "MATCH (m:Diseases)-[r:need_check]->(n:Checks) where m.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] # 已知檢查查詢疾病 elif question_type == "check_disease": sql = [ "MATCH (m:Diseases)-[r:need_check]->(n:Checks) where n.name = "{0}" return m.name, r.name, n.name".format( i) for i in entities] return sql def entity_transform(self, entity): entity_dict = {} for args, types in entity.items(): for type in types: if type in entity_dict: entity_dict[type] = [args] else: entity_dict[type] = [] entity_dict[type].append(args) return entity_dict
from py2neo import Graph, Nodeclass Answer: def __init__(self): self.neo4j = Graph("bolt://localhost:7687", auth=("neo4j", "beiqiaosu123456")) self.num_limit = 20 def main(self, question_parse): answers_final = [] for item in question_parse: question_type = item["qustion_type"] sqls = item["sql"] answer = [] for sql in sqls: data = self.neo4j.run(sql) answer+=data.data() final_answer = self.answer_prettify(question_type, answer) if final_answer: answers_final.append(final_answer) return answers_final """根據對應的qustion_type,調用相應的回復模板""" def answer_prettify(self, question_type, answers): final_answer = [] if not answers: return "" if question_type == "disease_symptom": desc = [i["n.name"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}的癥狀包括:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "symptom_disease": desc = [i["m.name"] for i in answers] subject = answers[0]["n.name"] final_answer = "癥狀{0}可能染上的疾病有:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_cause": desc = [i["m.cause"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}可能的成因有:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_prevent": desc = [i["m.prevent"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}的預防措施包括:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_lasttime": desc = [i["m.cure_lasttime"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}治療可能持續的周期為:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_cureway": desc = [";".join(i["m.cure_way"]) for i in answers] subject = answers[0]["m.name"] final_answer = "{0}可以嘗試如下治療:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_cureprob": desc = [i["m.cured_prob"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}治愈的概率為(僅供參考):{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_easyget": desc = [i["m.easy_get"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}的易感人群包括:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_desc": desc = [i["m.desc"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0},熟悉一下:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_acompany": desc1 = [i["n.name"] for i in answers] desc2 = [i["m.name"] for i in answers] subject = answers[0]["m.name"] desc = [i for i in desc1 + desc2 if i != subject] final_answer = "{0}的癥狀包括:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_not_food": desc = [i["n.name"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}忌食的食物包括有:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_do_food": do_desc = [i["n.name"] for i in answers if i["r.name"] == "可以吃"] recommand_desc = [i["n.name"] for i in answers if i["r.name"] == "推薦吃"] subject = answers[0]["m.name"] final_answer = "{0}宜食的食物包括有:{1}\n推薦食譜包括有:{2}".format(subject, ";".join(list(set(do_desc))[:self.num_limit]), ";".join(list(set(recommand_desc))[:self.num_limit])) elif question_type == "food_not_disease": desc = [i["m.name"] for i in answers] subject = answers[0]["n.name"] final_answer = "患有{0}的人最好不要吃{1}".format(";".join(list(set(desc))[:self.num_limit]), subject) elif question_type == "food_do_disease": desc = [i["m.name"] for i in answers] subject = answers[0]["n.name"] final_answer = "患有{0}的人建議多試試{1}".format(";".join(list(set(desc))[:self.num_limit]), subject) elif question_type == "disease_drug": desc = [i["n.name"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}通常的使用的藥品包括:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "drug_disease": desc = [i["m.name"] for i in answers] subject = answers[0]["n.name"] final_answer = "{0}主治的疾病有{1},可以試試".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "disease_check": desc = [i["n.name"] for i in answers] subject = answers[0]["m.name"] final_answer = "{0}通常可以通過以下方式檢查出來:{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) elif question_type == "check_disease": desc = [i["m.name"] for i in answers] subject = answers[0]["n.name"] final_answer = "通??梢酝ㄟ^{0}檢查出來的疾病有{1}".format(subject, ";".join(list(set(desc))[:self.num_limit])) return final_answer
以上就是這個醫療知識問答機器人的全部代碼了,從上面的問答里也能看出,回答得還是很生硬。因為這就只是一個程序化得思維導圖,所以修改完善空間還是很大,這個就要后期用深度學習得方式對分類解析部分進行改動。
?關鍵詞: