Edge Mining of Gene Interactions Using PubMed — edge

Query PubMed for literature evidence supporting predicted gene–gene interactions.

Usage

edge_mining(
  predicted_list,
  ground_truth,
  delay = 1,
  query_field = "Title/Abstract",
  query_edge_types = c("TP", "FP", "FN"),
  max_retries = 10,
  BPPARAM = BiocParallel::bpparam()
)

Arguments

predicted_list: A list of predicted adjacency matrices (row and column names are gene symbols).
ground_truth: A 0/1 adjacency matrix with row and column names.
delay: Numeric. Seconds to wait between consecutive queries (default = 1).
query_field: Character. PubMed search field. Options: "Title/Abstract" (default), "Title", "Abstract".
query_edge_types: Character vector. Edge types to query: c("TP", "FP", "FN") (default all).
max_retries: Integer. Max retries for PubMed queries (default = 10).
BPPARAM: A BiocParallel parameter object. Default = bpparam().

Value

A named list of data.frames. Each data.frame has columns:

gene1: First gene in interaction
gene2: Second gene
edge_type: One of "TP", "FP", or "FN"
pubmed_hits: Number of PubMed hits
PMIDs: Comma-separated PubMed IDs or NA
query_status: One of "hits_found", "no_hits", or "error"

Details

This function compares predicted adjacency matrices against a ground truth matrix, identifies edge types (TP, FP, FN), and queries PubMed for each gene pair. Returns counts of hits, PMIDs, and query status.

Examples

data(count_matrices)
data(adj_truth)

networks <- infer_networks(
    count_matrices_list = count_matrices,
    method = "GENIE3",
    nCores = 1
)
head(networks[[1]])
#>   regulatoryGene targetGene    weight
#> 1          ARPC2      ARPC3 0.1951320
#> 2          HLA-A       CD74 0.1775219
#> 3          ARPC3      ARPC2 0.1604873
#> 4           CD3E       CD3D 0.1570717
#> 5          HLA-E        FOS 0.1520265
#> 6          UBA52     GNB2L1 0.1494235

wadj_list <- generate_adjacency(networks)
swadj_list <- symmetrize(wadj_list, weight_function = "mean")

binary_listj <- cutoff_adjacency(
    count_matrices = count_matrices,
    weighted_adjm_list = swadj_list,
    n = 2,
    method = "GENIE3",
    quantile_threshold = 0.99,
    nCores = 1,
    debug = TRUE
)
#> [Method: GENIE3] Matrix 1 → Cutoff = 0.10502
#> [Method: GENIE3] Matrix 2 → Cutoff = 0.10020
#> [Method: GENIE3] Matrix 3 → Cutoff = 0.10234
head(binary_listj[[1]])
#>       ACTG1 ARPC2 ARPC3 BTF3 CD3D CD3E CD74 CFL1 COX4I1 COX7C CXCR4 EEF1A1
#> ACTG1     0     0     0    0    0    0    0    0      0     0     0      0
#> ARPC2     0     0     1    0    0    0    0    0      0     0     0      0
#> ARPC3     0     1     0    0    0    0    0    0      0     0     0      0
#> BTF3      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3D      0     0     0    0    0    1    0    0      0     0     0      0
#> CD3E      0     0     0    0    1    0    0    0      0     0     0      0
#>       EEF1D EEF2 EIF1 EIF3K EIF4A2 FOS FTH1 FTL GNB2L1 HLA-A HLA-B HLA-C HLA-E
#> ACTG1     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC2     0    0    0     0      0   0    0   0      0     0     0     0     1
#> ARPC3     0    0    0     0      0   0    0   0      0     0     0     0     0
#> BTF3      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3D      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3E      0    0    0     0      0   0    0   0      0     0     0     0     0
#>       JUN JUNB MYL12B MYL6 NACA PABPC1 PFN1 TMSB4X UBA52 UBC
#> ACTG1   0    0      0    0    0      0    0      0     0   0
#> ARPC2   0    0      0    0    0      0    0      0     0   0
#> ARPC3   0    0      0    0    0      0    0      0     0   0
#> BTF3    0    0      0    0    0      0    0      0     0   0
#> CD3D    0    0      0    0    0      0    0      0     0   0
#> CD3E    0    0      0    0    0      0    0      0     0   0

consensus <- create_consensus(binary_listj, method = "vote")
head(consensus)
#>       ACTG1 ARPC2 ARPC3 BTF3 CD3D CD3E CD74 CFL1 COX4I1 COX7C CXCR4 EEF1A1
#> ACTG1     0     0     0    0    0    0    0    0      0     0     0      0
#> ARPC2     0     0     1    0    0    0    0    0      0     0     0      0
#> ARPC3     0     1     0    0    0    0    0    0      0     0     0      0
#> BTF3      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3D      0     0     0    0    0    0    0    0      0     0     0      0
#> CD3E      0     0     0    0    0    0    0    0      0     0     0      0
#>       EEF1D EEF2 EIF1 EIF3K EIF4A2 FOS FTH1 FTL GNB2L1 HLA-A HLA-B HLA-C HLA-E
#> ACTG1     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC2     0    0    0     0      0   0    0   0      0     0     0     0     0
#> ARPC3     0    0    0     0      0   0    0   0      0     0     0     0     0
#> BTF3      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3D      0    0    0     0      0   0    0   0      0     0     0     0     0
#> CD3E      0    0    0     0      0   0    0   0      0     0     0     0     0
#>       JUN JUNB MYL12B MYL6 NACA PABPC1 PFN1 TMSB4X UBA52 UBC
#> ACTG1   0    0      0    0    0      0    0      0     0   0
#> ARPC2   0    0      0    0    0      0    0      0     0   0
#> ARPC3   0    0      0    0    0      0    0      0     0   0
#> BTF3    0    0      0    0    0      0    0      0     0   0
#> CD3D    0    0      0    0    0      0    0      0     0   0
#> CD3E    0    0      0    0    0      0    0      0     0   0
em <- edge_mining(list(consensus), adj_truth, query_edge_types = "TP")