{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "import medspacy\n",
    "import quickumls.spacy_component\n",
    "from quickumls import constants\n",
    "from medspacy.visualization import visualize_ent, visualize_dep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<spacy.pipeline.sentencizer.Sentencizer at 0x1e8cf7054c0>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#### Create a new spacy pipeline\n",
    "nlp = spacy.blank(\"nl\")\n",
    "nlp.add_pipe('sentencizer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sentencizer', 'medspacy_quickumls']\n"
     ]
    }
   ],
   "source": [
    "#### Add the concept extraction module\n",
    "\n",
    "# set the location of the concept database\n",
    "quickumls_file_path = \"./QuickUMLS_resources/name_of_database_folder/\"\n",
    "\n",
    "# set the semtypes you want to find (T000 to T999)\n",
    "semtypes=constants.ACCEPTED_SEMTYPES.add(tuple([\"T\"+str(x).zfill(3) for x in range(0,1000)]))\n",
    "\n",
    "# setup and set configuration options\n",
    "nlp.add_pipe(\"medspacy_quickumls\", config={\"threshold\": 0.7,\n",
    "                                            \"result_type\": \"ents\",\n",
    "                                            \"accepted_semtypes\":semtypes,\n",
    "                                            \"best_match\": True, \n",
    "                                            \"window\": 10,\n",
    "                                            \"similarity_name\":\"jaccard\",\n",
    "                                            \"min_match_length\": 3,\n",
    "                                            \"overlapping_criteria\":\"score\",\n",
    "                                            \"quickumls_fp\": quickumls_file_path})\n",
    "\n",
    "print(nlp.pipe_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['sentencizer', 'medspacy_quickumls', 'medspacy_context']\n"
     ]
    }
   ],
   "source": [
    "#### Add the context extraction module\n",
    "\n",
    "# set the location of the rules\"\n",
    "rules_path=\"./Context_resources/context_rules_Dutch.json\"\n",
    "\n",
    "nlp.add_pipe(\"medspacy_context\", config={\"rules\":rules_path})\n",
    "\n",
    "print(nlp.pipe_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[2024-03-12T12:36:07.337257] 4 extracted from 22 tokens\n"
     ]
    }
   ],
   "source": [
    "#### test the pipeline\n",
    "concept_text = 'Patient heeft een jaar geleden zijn been gebroken maar heeft nu nog steeds veel pijn, maar geen moeite met lopen.'\n",
    "doc = nlp(concept_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">\n",
       "<mark class=\"entity\" style=\"background: #d62728; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    Patient\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">C0030705</span>\n",
       "</mark>\n",
       " heeft \n",
       "<mark class=\"entity\" style=\"background: #9467bd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    een jaar geleden\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">HISTORICAL</span>\n",
       "</mark>\n",
       " zijn \n",
       "<mark class=\"entity\" style=\"background: #ff7f0e; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    been\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">C0005931</span>\n",
       "</mark>\n",
       " gebroken maar heeft nu nog steeds veel \n",
       "<mark class=\"entity\" style=\"background: #2ca02c; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    pijn\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">C0030193</span>\n",
       "</mark>\n",
       ", maar \n",
       "<mark class=\"entity\" style=\"background: #1f77b4; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    geen\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">NEGATED_EXISTENCE</span>\n",
       "</mark>\n",
       " \n",
       "<mark class=\"entity\" style=\"background: #8c564b; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n",
       "    moeite met lopen\n",
       "    <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">C0311394</span>\n",
       "</mark>\n",
       ".</div></span>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "visualize_ent(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<span class=\"tex2jax_ignore\"><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"0a94e2db8c6e4c52b7ca3a68e00062f4-0\" class=\"displacy\" width=\"3025\" height=\"224.5\" direction=\"ltr\" style=\"max-width: none; height: 224.5px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">Patient heeft</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">FAMILY</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"225\">een jaar geleden</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"225\">HISTORICAL</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"400\">zijn</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"400\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"575\">been</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"575\">C0005931</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"750\">gebroken</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"750\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"925\">maar</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"925\">NEGATED_EXISTENCE</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1100\">heeft</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1100\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1275\">nu</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1275\">HISTORICAL</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1450\">nog</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1450\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1625\">steeds</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1625\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1800\">veel</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1800\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"1975\">pijn</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"1975\">C0030193</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2150\">,</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2150\"></tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2325\">maar</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2325\">NEGATED_EXISTENCE</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2500\">geen</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2500\">NEGATED_EXISTENCE</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2675\">moeite met lopen</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2675\">C0311394</tspan>\n",
       "</text>\n",
       "\n",
       "<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"134.5\">\n",
       "    <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"2850\">.</tspan>\n",
       "    <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"2850\"></tspan>\n",
       "</text>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0a94e2db8c6e4c52b7ca3a68e00062f4-0-0\" stroke-width=\"2px\" d=\"M245,89.5 C245,2.0 575.0,2.0 575.0,89.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0a94e2db8c6e4c52b7ca3a68e00062f4-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">HISTORICAL</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M575.0,91.5 L583.0,79.5 567.0,79.5\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "\n",
       "<g class=\"displacy-arrow\">\n",
       "    <path class=\"displacy-arc\" id=\"arrow-0a94e2db8c6e4c52b7ca3a68e00062f4-0-1\" stroke-width=\"2px\" d=\"M2520,89.5 C2520,2.0 2675.0,2.0 2675.0,89.5\" fill=\"none\" stroke=\"currentColor\"/>\n",
       "    <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
       "        <textPath xlink:href=\"#arrow-0a94e2db8c6e4c52b7ca3a68e00062f4-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">NEGATED_EXISTENCE</textPath>\n",
       "    </text>\n",
       "    <path class=\"displacy-arrowhead\" d=\"M2675.0,91.5 L2683.0,79.5 2667.0,79.5\" fill=\"currentColor\"/>\n",
       "</g>\n",
       "</svg></span>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "visualize_dep(doc)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "medspacy20230927",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}