Getting Started

UMLSmapper finds concepts from the Unified Medical Language (UMLS) Metathesaurus in Spanish or English clinical texts.

UMLSmapper API's base URL is:

https://um-public.nlp.vicomtech.org
Access Key

Your API Access Key is your unique authentication key. You will need it to make requests. Append the access_key parameter to the API's base URL and set it to your access key value. Here is an example:

https://um-public.nlp.vicomtech.org?access_key=YOUR-ACCESS-KEY
Register to get an access key
Methods
The UMLSmapper API works in three steps:
  • Post a document: First, you must provide the text you want to process. The API will return the document's identifier, which you must use in the next steps to get the mapping or delete the document. Texts are stored in UMLSmapper's database so you can process them more than once.
  • Get mappings: This method processes a document and returns the result in JSON format. UMLSmapper can be configured in each call (read more). Note that the usage of this method is limited to 300 requests/month.
  • Delete a document: Optionally, you can delete any of the documents in your collection.
Response

The UMLSmapper API delivers the mapper's response in JSON format. Here is an example:

{
  "analysis": {
    "concepts": [
      {
        "ci": "C0332288",
        "id": "bc1",
        "references": [
          [
            "t1"
          ]
        ],
        "source": "SCTSPA",
        "type": "qualitative_concept"
      },
      {
        "ci": "C0311392",
        "id": "bc2",
        "references": [
          [
            "t2"
          ]
        ],
        "source": "SCTSPA",
        "type": "finding"
      },
      {
        "ci": "C0205699",
        "id": "bc3",
        "references": [
          [
            "t4"
          ]
        ],
        "source": "SCTSPA",
        "type": "neoplastic_process"
      }
    ],
    "terms": [
      {
        "id": "t1",
        "lemma": "sin",
        "length": 3,
        "morphofeat": "SPS00",
        "offset": 0,
        "para": 1,
        "pos": "P",
        "sent": 1,
        "wf": "Sin"
      },
      {
        "id": "t2",
        "lemma": "signo",
        "length": 6,
        "morphofeat": "NCMP000",
        "offset": 4,
        "para": 1,
        "pos": "N",
        "sent": 1,
        "wf": "signos"
      },
      {
        "id": "t3",
        "lemma": "de",
        "length": 2,
        "morphofeat": "SPS00",
        "offset": 11,
        "para": 1,
        "pos": "P",
        "sent": 1,
        "wf": "de"
      },
      {
        "id": "t4",
        "lemma": "carcinomatosis",
        "length": 14,
        "morphofeat": "NCFC000",
        "offset": 14,
        "para": 1,
        "pos": "N",
        "sent": 1,
        "wf": "carcinomatosis"
      }
    ]
  },
  "datetime": "21/05/2019 11:40",
  "document": {
    "id": "24",
    "language": "es",
    "text": "Sin signos de carcinomatosis"
  },
  "settings": {
    "abbr": "ml",
    "chunker": "ngram",
    "disamb": "ukb",
    "excludeConcepts": null,
    "excludeLanguages": null,
    "excludeSources": null,
    "excludeTerms": null,
    "excludeTermsTypes": null,
    "includeLanguages": null,
    "includeObsolete": false,
    "includeSources": null,
    "includeSuppressible": false,
    "includeTermsTypes": null,
    "scorer": "castro",
    "size": 5,
    "spreadExcludeTermsTypes": false,
    "spreadIncludeTermsTypes": false,
    "threshold": 0.7
  }
}
{
    "type": "object",
    "properties": {
        "analysis": {
            "type": "object",
            "properties": {
                "concepts" {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "ci": {
                                "type": "string"
                            },
                            "id": {
                                "type": "string"
                            },
                            "references": {
                                "type": "array",
                                "items": {
                                    "type": "array",
                                    "items": {
                                        "type": "string"
                                    }
                                }
                            },
                            "source": {
                                "type": "string"
                            },
                            "type": {
                                "type": "string"
                            },
                        }
                    }
                },
                "terms": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "id": {
                                "type": "string"
                            },
                            "lemma": {
                                "type": "string"
                            },
                            "length": {
                                "type": "number"
                            },
                            "morphofeat": {
                                "type": "string"
                            },
                            "offset": {
                                "type": "number"
                            },
                            "para": {
                                "type": "number"
                            },
                            "pos": {
                                "type": "string"
                            },
                            "sent": {
                                "type": "number"
                            },
                            "wf": {
                                "type": "string"
                            },
                        }
                    }
                },
            }
        },
        "datetime": {
            "type": "string"

        },
        "document": {
            "type": "object",
            "properties": {
                "id": {
                    "type": "string"
                },
                "language": {
                    "type": "string"
                },
                "text": {
                    "type": "string"
                }
            }
        },
        "settings": {
            "type": "object",
            "properties": {
                "abbr": {
                    "type": "string"
                }
                "chunker": {
                    "type": "string"
                }
                "disamb": {
                    "type": "string"
                }
                "excludeConcepts": {
                    "type": "string"
                }
                "excludeLanguages": {
                    "type": "string"
                }
                "excludeSources": {
                    "type": "string"
                }
                "excludeTerms": {
                    "type": "string"
                }
                "excludeTermsTypes": {
                    "type": "string"
                }
                "includeLanguages": {
                    "type": "string"
                }
                "includeObsolete": {
                    "type": "boolean"
                }
                "includeSources": {
                    "type": "string"
                }
                "includeSuppressible": {
                    "type": "boolean"
                }
                "includeTermsTypes": {
                    "type": "string"
                }
                "scorer": {
                    "type": "string"
                }
                "size": {
                    "type": "number"
                }
                "spreadExcludeTermsTypes": {
                    "type": "boolean"
                }
                "spreadIncludeTermsTypes": {
                    "type": "boolean"
                }
                "threshold": {
                    "type": "number"
                }
            }
        }
    }
}
API responses consist of 4 main fields:
  • datetime: the date and time when the analysis was carried out.
  • settings: UMLSmapper's settings, with which the results reported were produced.
  • document: metadata and text of the analyzed document.
  • analysis: the analysis itself.
The analysis has two layers:
  • terms: this layer has one JSON object per token in the document and provides information about the tokens' position in the text and their morphology.
  • concepts: the layer of concepts consists of one JSON object per concept identified. The field ci indicates the concept's identifier in the UMLS (i.e., the CUI). The field source is the UMLS terminology that triggered the mapping. type is the semantic type of the concept. Finally, references points to the text spans mapped with this concept.
Error Codes
When the UMLSmapper API encounters an error, it returns a JSON file with the error's code a and a brief explanation. These are the error codes that the API uses:
Code Name Description
500 Internal Server Error UMLSmapper encountered an unexpected condition that prevented it from fulfilling the request.
400 Bad Request UMLSmapper cannot or will not process the request due to an apparent client error (e.g., missing parameters).
403 Forbidden The API key provided is not valid or has not been confirmed, or the monthly call limit has been reached.
404 Not Found The document identifier does not exist.
410 Gone The document identifier corresponds to a deleted document.

Methods

POST

The UMLSmapper API usage flow starts with POSTing the text you want to analyze. Just make a POST call to the base URL with the access_key set to your access key value, like so:

https://um-public.nlp.vicomtech.org?access_key=YOUR-ACCESS-KEY

The body of the call should be a JSON file that contains your text and the language in which the text is written: Spanish (es) or English (en). For instance:

{
    "text": "Sin signos de carcinomatosis",
    "language": "es"
}
{
    "type": "object",
    "properties": {
        "text": {
            "type": "string",
            "maxLength": 6000
        },
        "language": {
            "type": "string",
            "enum": ["es", "en"]
        },
        "required": ["text", "language"],
        "additionalProperties": False
    }
}
The text can't be longer than 6,000 characters.
Response

The response of this method is the registered document's identifier as plain text:

42
In the example above, the document identifier is 42. This identifier is necessary to process the document with UMLSmapper or delete the document from your collection.
GET

This method serves to get the mapping to the UMLS Metathesaurus of a previously posted document. In the example below, we retrieve the mappings of the document 42:

https://um-public.nlp.vicomtech.org/42?access_key=YOUR-ACCESS-KEY

You can make a maximum of 300 GET requests/month. Only requests that are successfully terminated are taken into account.

Parametrization

UMLSmapper is highly configurable. On the one hand, you can configure the mapping strategy, that is, the settings of each of the modules that make UMLSmapper. On the other, the knowledge base may be configured in order to map different subsets of the UMLS Metathesaurus in each call.

You configure UMLSmapper by appending parameters to the base URL. Please consult the parameters available below. For a detailed explanation on how UMLSmapper works, you can consult the following bibliography:

  • N. Perez, P. Accuosto, À. Bravo, M. Cuadros, E. Martínez-Garcia, H. Saggion, G. Rigau, Cross-lingual semantic annotation of biomedical literature: experiments in Spanish and English, Bioinformatics, 2019. [link]
  • N. Perez, M. Cuadros, G. Rigau, Biomedical term normalization of EHRs with UMLS, in: Proc. of LREC 2018, 2018, pag. 2045-2051. [pdf]
  • M. Cuadros, N. Perez, I. Montoya, A. García Pablos, Vicomtech at BARR2: Detecting Biomedical Abbreviations with ML Methods and Dictionary-based Heuristics, in: Proc. of IberEval 2018, 2018, pag. 322-328. [pdf]
  • N. Perez, Mapping of Electronic Health Records in Spanish to the Unified Medical Language System Metathesaurus, Master's thesis, University of the Basque Country (UPV/EHU), 2017. [pdf]
Knowledge base configuration
excludeConcepts
Blacklist of concepts; these concepts are excluded from the knowledge base and are never mapped to the input text.
Accepted values: (string) comma-separated UMLS CUIs Default: n/a
excludeTerms
Blacklist of terms. These terms are masked (case-insensitive) from the input text in order to avoid their mapping. Multi-word terms are accepted.
Accepted values: (string) comma-separated terms Default: n/a
includeSources
List of UMLS Metatehsaurus sources; the mapping will be limited to these sources.
Accepted values: (string) comma-separated abbreviated forms of terminologies Default: n/a
excludeSources
List of UMLS Metatehsaurus sources; these sources will be excluded from the mapping. If includeSources is given, this parameter is ignored.
Accepted values: (string) comma-separated abbreviated forms of terminologies Default: n/a
includeTermTypes
List of UMLS Semantic Network term types; the mapping will be limited to these term types.
Accepted values: (string) comma-separated abbreviated forms of semantic types Default: n/a
spreadIncludeTermTypes
Extend includeTermTypes with all the hyponyms of the term types given.
Accepted values: (boolean) true or false Default: false
excludeTermTypes
List of UMLS Semantic Network term types; these term types will be excluded from the mapping. If includeTermTypes is given, this parameter is ignored.
Accepted values: (string) comma-separated abbreviated forms of semantic types Default: n/a
spreadExcludeTermTypes
Extend excludeTermTypes with all the hyponyms of the term types given.
Accepted values: (boolean) true or false Default: false
includeObsolete
Accept mappings of concepts marked as obsolete in the UMLS Metathesuarus.
Accepted values: (boolean) true or false Default: false
includeSuppressible
Accept mappings of concepts marked as suppressible in the UMLS Metathesuarus.
Accepted values: (boolean) true or false Default: false
Mapping strategy configuration
abbr
Mechanism to detect abbreviated forms:
  • ml: automatic classifier
  • rule: rule-based heuristics
Accepted values: (string) ml or rule Default: ml
chunker
Mechanism to generate phrases from the input text:
  • ngram: compute all the n-grams up to the size determined in size
  • phrase: perform a syntactic analysis to retrieve noun phrases with max size size
Accepted values: (string) ngram or phrase Default: ngram
size
Maximum length in tokens of the phrases generated
Accepted values: (integer) > 0 Default: 5
scorer
Function to score mapping candidates:
  • castro: Castro E., et Al. (2010) "Automatic Identification of Biomedical Concepts in Spanish-Language Unstructured Clinical texts"
  • weighted: a variant of castro
  • chars: another variant of castro
  • lucene: keep the scores given by Apache LuceneTM
Accepted values: (string) castro, weighted, chars, or lucene Default: weighted
threshold
Threshold to filter mapping candidates
Accepted values: (float) ≥ 0.0 Default: 0.7
disamb
Strategy to handle ambiguities:
  • ukb: use UKB to resolve ambiguities
  • first: resolve ambiguities by choosing the first candidate
  • skip: do not attempt to map ambiguous phrases
  • none: do not resolve ambiguities, i.e., return all the mapping candidates
Accepted values: (string) ukb, first, skip or none Default: ukb
Response

The response is delivered in JSON format, as described above (Response).

DELETE

Use this method to remove from your collection a previously posted document simply by indicating the document's identifier in the URL. In the following example, we delete the document 42:

https://um-public.nlp.vicomtech.org/42?access_key=YOUR-ACCESS-KEY
Response

If the document is successfully deleted, the API returns the document's identifier as plain text.

Sample Code

curl
Post a document
curl -X POST \
  -H "Content-type: application/json" \
  -H "Accept: text/plain" \
  -d '{"text": "Sin signos de carcinomatosis", "language": "es"}' \
  "https://um-public.nlp.vicomtech.org?access_key=YOUR-ACCESS-KEY"
Get mappings
curl -X GET \
  -H "Accept: application/json" \
  "https://um-public.nlp.vicomtech.org/42?access_key=YOUR-ACCESS-KEY&includeSources=SCTSPA,MDRSPA,MSHSPA"
Delete a document
curl -X DELETE \
  -H "Accept: text/plain" \
  "https://um-public.nlp.vicomtech.org/42?access_key=YOUR-ACCESS-KEY"
Python3
Post a document
import requests

base_url = "https://um-public.nlp.vicomtech.org"

post_url = base_url
post_headers = {"Content-Type": "application/json", "Accept": "text/plain"}
post_params = {"access_key": "YOUR-ACCESS-KEY"}
post_body = {"text": "Sin signos de carcinomatosis", "language": "es"}

post_r = requests.post(post_url, headers=post_headers, params=post_params, json=post_body)
post_r.raise_for_status()
document_id = post_r.text

print("New document ID:", document_id)
Get mappings
get_url = base_url + "/" + document_id
get_headers = {"Accept": "application/json"}
get_params = {
    "access_key": "YOUR-ACCESS-KEY",
    "includeSources": "SCTSPA,MDRSPA,MSHSPA"
}

get_r = requests.get(get_url, headers=get_headers, params=get_params)
get_r.raise_for_status()
cuis = [concept_obj['ci'] for concept_obj in get_r.json()['analysis']['concepts']]

print(len(cuis), "concepts found:", ", ".join(cuis))
Delete a document
delete_url = base_url + "/" + document_id
delete_headers = {"Accept": "text/plain"}
delete_params = {"access_key": "YOUR-ACCESS-KEY"}

delete_r = requests.delete(delete_url, headers=delete_headers, params=delete_params)
delete_r.raise_for_status()
assert document_id == delete_r.text

print("Document", document_id, "successfully deleted")