CodelistGenerator search options: codes for osteoarthritis

Creating a codelist for osteoarthritis

For this example we are going to generate a candidate codelist for osteoarthritis, looking at the impact of alternative search strategies.

library(DBI)
library(RPostgres)
# postgres database connection details
serverDbi <- Sys.getenv("server")
user <- Sys.getenv("user")
password <- Sys.getenv("password")
port <- Sys.getenv("port")
host <- Sys.getenv("host")

db <- dbConnect(RPostgres::Postgres(),
  dbname = serverDbi,
  port = port,
  host = host,
  user = user,
  password = password
)

# name of vocabulary schema
vocabularyDatabaseSchema <- "vocabulary"

# create cdm reference
cdm <- CDMConnector::cdm_from_con(
  con = db,
  cdm_schema = vocabularyDatabaseSchema,
  cdm_tables = tidyselect::all_of(c(
    "concept",
    "concept_relationship",
    "concept_ancestor",
    "concept_synonym",
    "drug_strength",
    "vocabulary"
  ))
)

Search strategies

Condition domain, without searching via or in synonyms, without fuzzy match, with exclusions, without including descendants or ancestor

To start we will search for “osteoarthritis”, while excluding “post-infection” and “post-traumatic”, but without searching synonyms, without searching via non-standard codes, without fuzzy matching, and without including descendants or the direct ancestor of the included concepts.

oaCodes1 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchViaSynonyms = FALSE,
  searchNonStandard = FALSE,
  fuzzyMatch = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What is the candidate codelist?

datatable(oaCodes1,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 250)
  )
)

Including descendants

Now we will also include the descendants of included concepts.

oaCodes2 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchViaSynonyms = FALSE,
  searchNonStandard = FALSE,
  fuzzyMatch = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = TRUE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To2 <- compareCodelists(oaCodes1, oaCodes2) %>%
  filter(codelist == "Only codelist 2") %>%
  select(-"codelist")

datatable(newCodes1To2,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 50)
  )
)

Including observation domain

Now we will search the observation domain as well as the condition domain.

oaCodes3 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = c("Condition", "Observation"),
  searchInSynonyms = FALSE,
  searchViaSynonyms = FALSE,
  searchNonStandard = FALSE,
  fuzzyMatch = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To3 <- compareCodelists(oaCodes1, oaCodes3) %>%
  filter(codelist == "Only codelist 2") %>%
  select(-"codelist")

datatable(newCodes1To3,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 50)
  )
)

Search synonyms (both in and via)

Now we will search the concept synonym table to identify concepts to include.

oaCodes4 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = TRUE,
  searchViaSynonyms = TRUE,
  searchNonStandard = FALSE,
  fuzzyMatch = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To4 <- compareCodelists(oaCodes1, oaCodes4) %>%
  filter(codelist == "Only codelist 2") %>%
  select(-"codelist")

datatable(newCodes1To4,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 50)
  )
)

Search via non-standard

Now we will search the concept synonym table to identify concepts to include.

oaCodes5 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchViaSynonyms = FALSE,
  searchNonStandard = TRUE,
  fuzzyMatch = FALSE,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = FALSE
)

What new codes do we pick up?

newCodes1To5 <- compareCodelists(oaCodes1, oaCodes5) %>%
  filter(codelist == "Only codelist 2") %>%
  select(-"codelist")

datatable(newCodes1To5,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 50)
  )
)

Include ancestor

Now we include the direct ancestor of included terms.

oaCodes8 <- getCandidateCodes(
  cdm = cdm,
  keywords = "osteoarthritis",
  domains = "Condition",
  searchInSynonyms = FALSE,
  searchViaSynonyms = FALSE,
  searchNonStandard = FALSE,
  fuzzyMatch = FALSE,
  maxDistanceCost = 0.1,
  exclude = c(
    "post-infection",
    "post-traumatic"
  ),
  includeDescendants = FALSE,
  includeAncestor = TRUE
)

What new codes do we pick up?

newCodes1To8 <- compareCodelists(oaCodes1, oaCodes8) %>%
  filter(codelist == "Only codelist 2") %>%
  select(-"codelist")

datatable(newCodes1To8,
  rownames = FALSE,
  options = list(
    pageLength = 10,
    lengthMenu = c(10, 20, 50)
  )
)