Skip to content

Commit

Permalink
updated script to use the new KGs (robokop and rtx-kg2); added a filt…
Browse files Browse the repository at this point in the history
…er to mvp1 answers which removes chemicals that cause the input disease
  • Loading branch information
kaiwenho committed May 20, 2024
1 parent 1c1f5aa commit a0b818c
Show file tree
Hide file tree
Showing 8 changed files with 12,263 additions and 86 deletions.
12,133 changes: 12,133 additions & 0 deletions medikanren2/neo/neo-biolink/biolink_4_2_1/biolink-model.yaml

Large diffs are not rendered by default.

39 changes: 33 additions & 6 deletions medikanren2/neo/neo-low-level/make-query-low-level.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -372,11 +372,14 @@
(error 'query:X->Y->Known "obsolete"))

(define (query:Known->Known curie*.S predicate*.S->O curie*.O)
(query:dict.Known->dict.Known
(string*->id=>1 curie*.S) predicate*.S->O (string*->id=>1 curie*.O)))

(define (query:dict.Known->dict.Known curie=>1.S predicate*.S->O curie=>1.O)
(define (query yield)
(if (> (length curie*.S) (length curie*.O))
(query:dict.Known->dict.Known
(string*->id=>1 curie*.S) predicate*.S->O (string*->id=>1 curie*.O) 'obj->sub)
(query:dict.Known->dict.Known
(string*->id=>1 curie*.S) predicate*.S->O (string*->id=>1 curie*.O) 'sub->obj)))

(define (query:dict.Known->dict.Known curie=>1.S predicate*.S->O curie=>1.O direction-tag)
(define (query.sub->obj yield)
(let* ((ekey.predicate (string->id str.predicate))
(ckey.name (string->id "name"))
(predicate=>1 (string*->id=>1 predicate*.S->O))
Expand All @@ -397,7 +400,31 @@
(O (id->string id.O)))
(yield (list* S name.S predicate.S->O O name.O
(edge-id->properties eid))))))))))))))))
(maybe-time (enumerator->rlist query)))
(define (query.obj->sub yield)
(let* ((ekey.predicate (string->id str.predicate))
(ckey.name (string->id "name"))
(predicate=>1 (string*->id=>1 predicate*.S->O))
(predicate=>eid=>1 (dict-get ekey=>evalue=>eid=>1 ekey.predicate)))
((merge-join fx< curie=>1.O object=>eid=>subject=>1)
(lambda (id.O __ eid=>S=>1)
(let* ((name.O (get-name-from-dict-safe (dict-get curie=>ckey=>cvalue=>1 id.O) ckey.name))
(O (id->string id.O)))
((merge-join fx< predicate=>1 predicate=>eid=>1)
(lambda (id.predicate.O->S __ eid=>1)
(let ((predicate.O->S (id->string id.predicate.O->S)))
((merge-join fx< eid=>1 eid=>S=>1)
(lambda (eid __ S=>1)
((merge-join fx< curie=>1.S S=>1)
(lambda (id.S __ ___)
(let* ((name.S (get-name-from-dict-safe (dict-get curie=>ckey=>cvalue=>1 id.S)
ckey.name))
(S (id->string id.S)))
(yield (list* S name.S predicate.O->S O name.O
(edge-id->properties eid))))))))))))))))
(cond
[(eq? direction-tag 'obj->sub) (maybe-time (enumerator->rlist query.obj->sub))]
[(eq? direction-tag 'sub->obj) maybe-time (enumerator->rlist query.sub->obj)]
[else (error "unknown direction tag")]))

(define (query:Concept curie*)
(define (query yield)
Expand Down
53 changes: 20 additions & 33 deletions medikanren2/neo/neo-low-level/query-low-level-multi-db.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -111,19 +111,6 @@

(define (query:Known->Known curie*.S predicate*.S->O curie*.O)
(append
(query:Known->Known-robokop
(filter curie-in-db?-robokop curie*.S)
(filter curie-in-db?-robokop predicate*.S->O)
(filter curie-in-db?-robokop curie*.O))
(query:Known->Known-text-mining
(filter curie-in-db?-text-mining curie*.S)
(filter curie-in-db?-text-mining predicate*.S->O)
(filter curie-in-db?-text-mining curie*.O))
(query:Known->Known-rtx-kg2
(filter curie-in-db?-rtx-kg2 curie*.S)
(filter curie-in-db?-rtx-kg2 predicate*.S->O)
(filter curie-in-db?-rtx-kg2 curie*.O)))
#;(append
(query:Known->Known-robokop
(curies-in-db-robokop curie*.S)
(curies-in-db-robokop predicate*.S->O)
Expand All @@ -140,23 +127,23 @@
(define (query:Known->X curie*.K predicate*.K->X category*.X)
(append
(query:Known->X-robokop
(filter curie-in-db?-robokop curie*.K)
(curies-in-db-robokop curie*.K)
(and predicate*.K->X
(filter curie-in-db?-robokop predicate*.K->X))
(curies-in-db-robokop predicate*.K->X))
(and category*.X
(filter curie-in-db?-robokop category*.X)))
(curies-in-db-robokop category*.X)))
(query:Known->X-text-mining
(filter curie-in-db?-text-mining curie*.K)
(and predicate*.K->X
(filter curie-in-db?-text-mining predicate*.K->X))
(curies-in-db-text-mining curie*.K)
(and predicate*.K->X
(curies-in-db-text-mining predicate*.K->X))
(and category*.X
(filter curie-in-db?-text-mining category*.X)))
(curies-in-db-text-mining category*.X)))
(query:Known->X-rtx-kg2
(filter curie-in-db?-rtx-kg2 curie*.K)
(and predicate*.K->X
(filter curie-in-db?-rtx-kg2 predicate*.K->X))
(curies-in-db-rtx-kg2 curie*.K)
(and predicate*.K->X
(curies-in-db-rtx-kg2 predicate*.K->X))
(and category*.X
(filter curie-in-db?-rtx-kg2 category*.X)))))
(curies-in-db-rtx-kg2 category*.X)))))

(define (query:Known->X-scored curie*.K predicate*.K->X category*.X score*)
(append
Expand All @@ -183,22 +170,22 @@
(append
(query:X->Known-robokop
(and category*.X
(filter curie-in-db?-robokop category*.X))
(curies-in-db-robokop category*.X))
(and predicate*.X->K
(filter curie-in-db?-robokop predicate*.X->K))
(filter curie-in-db?-robokop curie*.K))
(curies-in-db-robokop predicate*.X->K))
(curies-in-db-robokop curie*.K))
(query:X->Known-text-mining
(and category*.X
(filter curie-in-db?-text-mining category*.X))
(curies-in-db-text-mining category*.X))
(and predicate*.X->K
(filter curie-in-db?-text-mining predicate*.X->K))
(filter curie-in-db?-text-mining curie*.K))
(curies-in-db-text-mining predicate*.X->K))
(curies-in-db-text-mining curie*.K))
(query:X->Known-rtx-kg2
(and category*.X
(filter curie-in-db?-rtx-kg2 category*.X))
(curies-in-db-rtx-kg2 category*.X))
(and predicate*.X->K
(filter curie-in-db?-rtx-kg2 predicate*.X->K))
(filter curie-in-db?-rtx-kg2 curie*.K))))
(curies-in-db-rtx-kg2 predicate*.X->K))
(curies-in-db-rtx-kg2 curie*.K))))

(define (query:X->Known-scored category*.X predicate*.X->K curie*.K score*)
(append
Expand Down
2 changes: 1 addition & 1 deletion medikanren2/neo/neo-low-level/query-low-level-robokop.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"make-query-low-level.rkt"
racket/match)

(define db-path-under-parent "robokop-dec-6-2023/full_oct_2023/robokop.db")
(define db-path-under-parent "robokop-may-9-2024/march-7-2024/robokop.db")

(match-define
(list
Expand Down
2 changes: 1 addition & 1 deletion medikanren2/neo/neo-low-level/query-low-level-rtx-kg2.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"make-query-low-level.rkt"
racket/match)

(define db-path-under-parent "rtx-kg2-july-17-2023/may_2023/rtx-kg2pre_may_2023.db")
(define db-path-under-parent "rtx-kg2-may-9-2024/rtx-kg2-2.9.0pre/rtx-kg2.db")

(match-define
(list
Expand Down
33 changes: 10 additions & 23 deletions medikanren2/neo/neo-server/neo-server-utils.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,8 @@

(define mvp2-filter
(lambda (target-eprop direction)
(let* ((aspect (or (get-assoc "object_aspect_qualifier" target-eprop)
(get-assoc "qualified_object_aspect" target-eprop)
))
(direction^ (or (get-assoc "object_direction_qualifier" target-eprop)
(get-assoc "qualified_object_direction" target-eprop)
)))
(let* ((aspect (get-assoc "object_aspect_qualifier" target-eprop))
(direction^ (get-assoc "object_direction_qualifier" target-eprop)))
(and
aspect
direction^
Expand Down Expand Up @@ -103,6 +99,7 @@
(printf "return ~a answers\n" (length r))
r]
[else
#;(printf "number of answers: ~a, take next round\n" (length r))
(loop (append r (hop-proc sl))
(list (minus-one-before-zero (list-ref sl 0))
(minus-one-before-zero (list-ref sl 1))
Expand All @@ -119,21 +116,14 @@
(loop (cdr n*) greatest)))))))

(define (get-source props)
(let ((source (or (get-assoc "biolink:primary_knowledge_source" props)
(get-assoc "primary_knowledge_source" props) ;rkx-kg2
(let ((source (or (get-assoc "primary_knowledge_source" props)
(and (get-assoc "json_attributes" props)
"infores:text-mining-provider-targeted")))) ;text-mining
(hash
'resource_id source
'resource_role "primary_knowledge_source")))

(define (num-pubs props)
(let ((pubs (or (get-assoc "publications" props)
(get-assoc "supporting_publications" props)
(get-assoc "publications:string[]" props))))
(if (and pubs (not (equal? "()" pubs)))
(max (length (string-split pubs "|")) (length (string-split pubs "; ")) (length (string-split pubs)))
0)))
(define (num-pubs props) (string->number (get-assoc "mediKanren-score" props)))

(define (get-score-from-result result)
(let ((analyses (hash-ref result 'analyses #f)))
Expand All @@ -160,8 +150,7 @@

(define edge-has-source?
(lambda (props)
(or (get-assoc "biolink:primary_knowledge_source" props)
(get-assoc "primary_knowledge_source" props)
(or (get-assoc "primary_knowledge_source" props)
(and (get-assoc "json_attributes" props)
(let ((attr-hl (string->jsexpr (get-assoc "json_attributes" props))))
(let loop ((hl attr-hl))
Expand All @@ -171,8 +160,7 @@
(hash-ref (car hl) 'attribute_type_id #f)
"biolink:primary_knowledge_source")
#t)
(else (loop (cdr hl)))))))
(get-assoc "knowledge_source" props))))
(else (loop (cdr hl))))))))))

(define (data-attributes props)
(list (get-publications props)))
Expand All @@ -184,14 +172,13 @@
[(null? props) pubs]
[else
(let ((publication (or (get-assoc "publications" (car props))
(get-assoc "supporting_publications" (car props))
(get-assoc "publications:string[]" (car props)))))
(get-assoc "supporting_publications" (car props)))))
(helper (cdr props)
(append
(cond
[(string-prefix? publication "(")
(string-split (string-trim (string-trim publication "(") ")"))]
[(string-contains? publication "|") (string-split publication "|")]
(string-split (string-trim (string-trim publication "(") ")"))] ;rtx-kg2 & robokop
[(string-contains? publication "|") (string-split publication "|")] ;text-mining
[(string-contains? publication ";") (string-split publication "; ")]
[else (string-split publication)])
pubs)))]))
Expand Down
64 changes: 43 additions & 21 deletions medikanren2/neo/neo-server/neo-server.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

(define DEFAULT_PORT 8384)

(define NEO_SERVER_VERSION "1.38")
(define NEO_SERVER_VERSION "1.39")

;; Maximum number of results to be returned from *each individual* KP,
;; or from mediKanren itself.
Expand All @@ -38,6 +38,8 @@
;; Maximum number of results to score and then sort.
(define MAX_RESULTS_TO_SCORE_AND_SORT 100000)

(define MAX_DESCENDENT 100)

;; Number of seconds before a connection times out, collecting all
;; resources from the connection (was 10 seconds in the original
;; tutorial).
Expand Down Expand Up @@ -962,8 +964,9 @@
(hash-ref (hash-ref qg_nodes qg_object-node-id) 'ids))
(define disease-ids+
(set->list
(get-descendent-curies*-in-db
(curies->synonyms-in-db disease-ids))))
(get-n-descendent-curies*-in-db
(curies->synonyms-in-db disease-ids)
MAX_DESCENDENT)))
(define chemical-catogory+
(set->list
(get-non-deprecated/mixin/abstract-ins-and-descendent-classes*-in-db
Expand Down Expand Up @@ -1006,8 +1009,9 @@
(hash-ref (hash-ref qg_nodes qg_subject-node-id) 'ids))
(define chemical-ids+
(time (set->list
(get-descendent-curies*-in-db
(curies->synonyms-in-db chemical-ids)))))
(get-n-descendent-curies*-in-db
(curies->synonyms-in-db chemical-ids)
MAX_DESCENDENT))))
(define direction
(let ((qualifer-set
(hash-ref (car (hash-ref qg_edge-hash 'qualifier_constraints)) 'qualifier_set)))
Expand Down Expand Up @@ -1062,11 +1066,12 @@
'("biolink:gene_product_of")
gene-ids-syns
;; TODO: give names to #f and (list 0) - easy to read
(list (list 1112) #f (list 0))))))
(list (list 1112) #f (list 1112))))))
(define gene-ids+
(set->list
(get-descendent-curies*-in-db
(append gene-ids-syns (curies->synonyms-in-db protein-ids)))))
(get-n-descendent-curies*-in-db
(append gene-ids-syns (curies->synonyms-in-db protein-ids))
MAX_DESCENDENT)))
(define chemical-catogory+
(set->list
(get-non-deprecated/mixin/abstract-ins-and-descendent-classes*-in-db
Expand Down Expand Up @@ -1171,11 +1176,28 @@

(define old-scored/q-sorted-short (take-at-most scored/q-sorted-long MAX_RESULTS_FROM_COMPONENT))

(printf "Toke the best ~s edges for MVP mode creative query\n"
(length old-scored/q-sorted-short))

(define subjs-from-results (remove-duplicates (map cadr old-scored/q-sorted-short)))
(define objs-from-results (remove-duplicates (map (lambda (e) (get-object e)) old-scored/q-sorted-short)))

(when (eq? which-mvp 'mvp1)
(let* ((chemicals (remove-duplicates (curies->synonyms-in-db subjs-from-results)))
(disease-id+ (remove-duplicates (curies->synonyms-in-db objs-from-results)))
(chem-worsen-disease (remove-duplicates
(curies->synonyms-in-db
(map car
(time (query:Known->Known
chemicals
'("biolink:causes"
"biolink:exacerbates"
"biolink:has_adverse_event"
"biolink:contributes_to")
disease-id+))))))
(not-cause-old-scored/q-sorted-short
(filter (lambda (e) (not (member (cadr e) chem-worsen-disease))) old-scored/q-sorted-short)))
(set! old-scored/q-sorted-short not-cause-old-scored/q-sorted-short)))

(printf "Toke the best ~s edges for MVP mode creative query\n"
(length old-scored/q-sorted-short))

(define curie-representative-table (add-curies-representative-to-hash
(build-curies-representative-hash subjs-from-results)
Expand Down Expand Up @@ -1296,17 +1318,15 @@
(object obj)
(subject subj)
(predicate (get-assoc "predicate" props))
(aspect-qualifier (or (get-assoc "object_aspect_qualifier" props)
(get-assoc "qualified_object_aspect" props)))
(direction-qualifier (or (get-assoc "object_direction_qualifier" props)
(get-assoc "qualified_object_direction" props)))
(aspect-qualifier (get-assoc "object_aspect_qualifier" props))
(direction-qualifier (get-assoc "object_direction_qualifier" props))
(qualifed-predicate (get-assoc "qualified_predicate" props)))
(add-node! object)
(add-node! subject)
(unless (hash-has-key? edges id-sym)
(if (= (num-pubs props) 0)
(if (and
(or (eq? which-mvp 'mvp2-chem) (eq? which-mvp 'mvp2-gene))
#;(or (eq? which-mvp 'mvp2-chem) (eq? which-mvp 'mvp2-gene))
aspect-qualifier direction-qualifier qualifed-predicate)
(hash-set! edges id-sym
(hash 'object object
Expand All @@ -1326,7 +1346,7 @@
'subject subject
'sources (list (get-source props) UNSECRET-SOURCE))))
(if (and
(or (eq? which-mvp 'mvp2-chem) (eq? which-mvp 'mvp2-gene))
#;(or (eq? which-mvp 'mvp2-chem) (eq? which-mvp 'mvp2-gene))
aspect-qualifier direction-qualifier qualifed-predicate)
(hash-set! edges id-sym
(hash 'attributes
Expand Down Expand Up @@ -1940,8 +1960,9 @@
(get-non-deprecated/mixin/abstract-ins-and-descendent-predicates*-in-db
'("biolink:treats")))
(set->list
(get-descendent-curies*-in-db
(curie->synonyms-in-db "DOID:9351"))))))))))
(get-n-descendent-curies*-in-db
(curie->synonyms-in-db "DOID:9351")
MAX_DESCENDENT)))))))))
#f))

(module+ main
Expand Down Expand Up @@ -1975,8 +1996,9 @@
(get-non-deprecated/mixin/abstract-ins-and-descendent-predicates*-in-db
'("biolink:treats")))
(set->list
(get-descendent-curies*-in-db
(curie->synonyms-in-db "DOID:9351")))
(get-n-descendent-curies*-in-db
(curie->synonyms-in-db "MONDO:0007827")
MAX_DESCENDENT))
TOP_BUCKET_NUMBERS))

(length q3)
Expand Down
Loading

0 comments on commit a0b818c

Please sign in to comment.