Skip to contents

Query PubMed for literature evidence supporting predicted gene–gene interactions.

Usage

edge_mining(
  predicted_list,
  ground_truth,
  delay = 1,
  query_field = "Title/Abstract",
  query_edge_types = c("TP", "FP", "FN"),
  max_retries = 10,
  BPPARAM = BiocParallel::bpparam()
)

Arguments

predicted_list

A list of predicted adjacency matrices (row and column names are gene symbols).

ground_truth

A 0/1 adjacency matrix with row and column names.

delay

Numeric. Seconds to wait between consecutive queries (default = 1).

query_field

Character. PubMed search field. Options: "Title/Abstract" (default), "Title", "Abstract".

query_edge_types

Character vector. Edge types to query: c("TP", "FP", "FN") (default all).

max_retries

Integer. Max retries for PubMed queries (default = 10).

BPPARAM

A BiocParallel parameter object. Default = bpparam().

Value

A named list of data.frames. Each data.frame has columns:

gene1

First gene in interaction

gene2

Second gene

edge_type

One of "TP", "FP", or "FN"

pubmed_hits

Number of PubMed hits

PMIDs

Comma-separated PubMed IDs or NA

query_status

One of "hits_found", "no_hits", or "error"

Details

This function compares predicted adjacency matrices against a ground truth matrix, identifies edge types (TP, FP, FN), and queries PubMed for each gene pair. Returns counts of hits, PMIDs, and query status.

Examples

data(count_matrices)
data(adj_truth)

networks <- infer_networks(
    count_matrices_list = count_matrices,
    method = "GENIE3",
    nCores = 1
)
head(networks[[1]])
#>   regulatoryGene targetGene    weight
#> 1          ARPC2      ARPC3 0.2034202
#> 2          HLA-A       CD74 0.1781413
#> 3          ARPC3      ARPC2 0.1594665
#> 4           CD3E       CD3D 0.1547138
#> 5           CD3E        JUN 0.1499728
#> 6          HLA-E        FOS 0.1461567

wadj_list <- generate_adjacency(networks)
swadj_list <- symmetrize(wadj_list, weight_function = "mean")

binary_listj <- cutoff_adjacency(
    count_matrices = count_matrices,
    weighted_adjm_list = swadj_list,
    n = 2,
    method = "GENIE3",
    quantile_threshold = 0.99,
    nCores = 1,
    debug = TRUE
)
#> [Method: GENIE3] Matrix 1 → Cutoff = 0.10085
#> [Method: GENIE3] Matrix 2 → Cutoff = 0.09541
#> [Method: GENIE3] Matrix 3 → Cutoff = 0.09843
head(binary_listj[[1]])
#>       ACTG1 ARPC2 ARPC3 BTF3 CD3D CD3E CD74 CFL1 COX4I1 COX7C CXCR4 EEF1A1
#> ACTG1     0     0     0    0    0    0    0    0      0     0     0      0
#> ARPC2     0     0     1    0    0    0    0    0      0     0     0      0
#> ARPC3     0     1     0    0    0    0    0    0      0     0     0      0
#> BTF3      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3D      0     0     0    0    0    1    0    0      0     0     0      0
#> CD3E      0     0     0    0    1    0    0    0      0     0     0      0
#>       EEF1D EEF2 EIF1 EIF3K EIF4A2 FOS FTH1 FTL GNB2L1 HLA-A HLA-B HLA-C HLA-E
#> ACTG1     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC2     0    0    0     0      0   0    0   0      0     0     0     0     1
#> ARPC3     0    0    0     0      0   0    0   0      0     0     0     0     0
#> BTF3      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3D      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3E      0    0    0     0      0   0    0   0      0     0     0     0     0
#>       JUN JUNB MYL12B MYL6 NACA PABPC1 PFN1 TMSB4X UBA52 UBC
#> ACTG1   0    0      0    0    0      0    0      0     0   0
#> ARPC2   0    0      0    0    0      0    0      0     0   0
#> ARPC3   0    0      0    0    0      0    0      0     0   0
#> BTF3    0    0      0    0    0      0    0      0     0   0
#> CD3D    0    0      0    0    0      0    0      0     0   0
#> CD3E    0    0      0    0    0      0    0      0     0   0

consensus <- create_consensus(binary_listj, method = "vote")
head(consensus)
#>       ACTG1 ARPC2 ARPC3 BTF3 CD3D CD3E CD74 CFL1 COX4I1 COX7C CXCR4 EEF1A1
#> ACTG1     0     0     0    0    0    0    0    0      0     0     0      0
#> ARPC2     0     0     1    0    0    0    0    0      0     0     0      0
#> ARPC3     0     1     0    0    0    0    0    0      0     0     0      0
#> BTF3      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3D      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3E      0     0     0    0    0    0    0    0      0     0     0      0
#>       EEF1D EEF2 EIF1 EIF3K EIF4A2 FOS FTH1 FTL GNB2L1 HLA-A HLA-B HLA-C HLA-E
#> ACTG1     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC2     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC3     0    0    0     0      0   0    0   0      0     0     0     0     0
#> BTF3      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3D      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3E      0    0    0     0      0   0    0   0      0     0     0     0     0
#>       JUN JUNB MYL12B MYL6 NACA PABPC1 PFN1 TMSB4X UBA52 UBC
#> ACTG1   0    0      0    0    0      0    0      0     0   0
#> ARPC2   0    0      0    0    0      0    0      0     0   0
#> ARPC3   0    0      0    0    0      0    0      0     0   0
#> BTF3    0    0      0    0    0      0    0      0     0   0
#> CD3D    0    0      0    0    0      0    0      0     0   0
#> CD3E    0    0      0    0    0      0    0      0     0   0
em <- edge_mining(list(consensus), adj_truth, query_edge_types = "TP")