{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import spacy\n", "import medspacy\n", "import quickumls.spacy_component\n", "from quickumls import constants\n", "from medspacy.visualization import visualize_ent, visualize_dep" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#### Create a new spacy pipeline\n", "nlp = spacy.blank(\"nl\")\n", "nlp.add_pipe('sentencizer')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['sentencizer', 'medspacy_quickumls']\n" ] } ], "source": [ "#### Add the concept extraction module\n", "\n", "# set the location of the concept database\n", "quickumls_file_path = \"./QuickUMLS_resources/name_of_database_folder/\"\n", "\n", "# set the semtypes you want to find (T000 to T999)\n", "semtypes=constants.ACCEPTED_SEMTYPES.add(tuple([\"T\"+str(x).zfill(3) for x in range(0,1000)]))\n", "\n", "# setup and set configuration options\n", "nlp.add_pipe(\"medspacy_quickumls\", config={\"threshold\": 0.7,\n", " \"result_type\": \"ents\",\n", " \"accepted_semtypes\":semtypes,\n", " \"best_match\": True, \n", " \"window\": 10,\n", " \"similarity_name\":\"jaccard\",\n", " \"min_match_length\": 3,\n", " \"overlapping_criteria\":\"score\",\n", " \"quickumls_fp\": quickumls_file_path})\n", "\n", "print(nlp.pipe_names)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['sentencizer', 'medspacy_quickumls', 'medspacy_context']\n" ] } ], "source": [ "#### Add the context extraction module\n", "\n", "# set the location of the rules\"\n", "rules_path=\"./Context_resources/context_rules_Dutch.json\"\n", "\n", "nlp.add_pipe(\"medspacy_context\", config={\"rules\":rules_path})\n", "\n", "print(nlp.pipe_names)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[2024-03-12T12:36:07.337257] 4 extracted from 22 tokens\n" ] } ], "source": [ "#### test the pipeline\n", "concept_text = 'Patient heeft een jaar geleden zijn been gebroken maar heeft nu nog steeds veel pijn, maar geen moeite met lopen.'\n", "doc = nlp(concept_text)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " Patient\n", " C0030705\n", "\n", " heeft \n", "\n", " een jaar geleden\n", " HISTORICAL\n", "\n", " zijn \n", "\n", " been\n", " C0005931\n", "\n", " gebroken maar heeft nu nog steeds veel \n", "\n", " pijn\n", " C0030193\n", "\n", ", maar \n", "\n", " geen\n", " NEGATED_EXISTENCE\n", "\n", " \n", "\n", " moeite met lopen\n", " C0311394\n", "\n", ".
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "visualize_ent(doc)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " Patient heeft\n", " FAMILY\n", "\n", "\n", "\n", " een jaar geleden\n", " HISTORICAL\n", "\n", "\n", "\n", " zijn\n", " \n", "\n", "\n", "\n", " been\n", " C0005931\n", "\n", "\n", "\n", " gebroken\n", " \n", "\n", "\n", "\n", " maar\n", " NEGATED_EXISTENCE\n", "\n", "\n", "\n", " heeft\n", " \n", "\n", "\n", "\n", " nu\n", " HISTORICAL\n", "\n", "\n", "\n", " nog\n", " \n", "\n", "\n", "\n", " steeds\n", " \n", "\n", "\n", "\n", " veel\n", " \n", "\n", "\n", "\n", " pijn\n", " C0030193\n", "\n", "\n", "\n", " ,\n", " \n", "\n", "\n", "\n", " maar\n", " NEGATED_EXISTENCE\n", "\n", "\n", "\n", " geen\n", " NEGATED_EXISTENCE\n", "\n", "\n", "\n", " moeite met lopen\n", " C0311394\n", "\n", "\n", "\n", " .\n", " \n", "\n", "\n", "\n", " \n", " \n", " HISTORICAL\n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " NEGATED_EXISTENCE\n", " \n", " \n", "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "visualize_dep(doc)" ] } ], "metadata": { "kernelspec": { "display_name": "medspacy20230927", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.17" } }, "nbformat": 4, "nbformat_minor": 2 }