Skip to content

Commit 7d43865

Browse files
authored
feat: annotating with Orphanet diseases (#58)
1 parent 6c19f59 commit 7d43865

File tree

12 files changed

+109
-5
lines changed

12 files changed

+109
-5
lines changed

.github/workflows/main.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,21 @@ jobs:
6767
cache-environment: true
6868
post-cleanup: none # breaks otherwise
6969

70+
- name: Install qsv (not in conda-forge yet)
71+
run: |
72+
cd /tmp
73+
wget https://github.com/jqnatividad/qsv/releases/download/0.112.0/qsv-0.112.0-x86_64-unknown-linux-musl.zip
74+
unzip qsv-0.112.0-x86_64-unknown-linux-musl.zip
75+
cp qsv_musl-1.2.3 /usr/local/bin/qsv
76+
7077
- name: Install python package
7178
run: |
7279
pip install -e .
7380
shell: bash -el {0}
7481

7582
- name: Run in test mode
7683
run: |
77-
CI=true snakemake --cores=1 -p
84+
CI=true snakemake --cores=1 -p all
7885
shell: bash -el {0}
86+
env:
87+
QSV_NO_UPDATE: "1"

.github/workflows/release-please.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ jobs:
1515
id: release
1616
with:
1717
release-type: simple
18+
token: ${{ secrets.BOT_TOKEN }}
1819

1920
- name: Checkout repository
2021
uses: actions/checkout@v3

Snakefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,15 @@ rule all:
8787
# genes
8888
f"work/download/genes/clingen/{DV.clingen_gene}/clingen.csv",
8989
f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
90+
f"work/download/genes/orphapacket/{DV.orphapacket}/orphapacket.tar.gz",
9091
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
9192
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
9293
f"work/genes/enst_ensg/grch37/{DV.ensembl_37}/enst_ensg.tsv",
9394
f"work/genes/entrez/{DV.today}/gene_info.jsonl",
9495
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
9596
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
9697
f"work/genes/omim/{DV.hpo}+{DV.today}/omim_diseases.tsv",
98+
f"work/genes/orphapacket/{DV.orphapacket}+{DV.today}/orpha_diseases.tsv",
9799
"work/genes/rcnv/2022/rcnv_collins_2022.tsv",
98100
"work/genes/shet/2019/shet_weghorn_2019.tsv",
99101
# reference-specific annotations
@@ -160,7 +162,7 @@ rule all:
160162
f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
161163
f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
162164
# ----- genes
163-
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
165+
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.orphapacket}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
164166
# -- worker data
165167
f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin",
166168
f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin",
@@ -329,6 +331,7 @@ include: "rules/work/genes/hgnc.smk"
329331
include: "rules/work/genes/mehari_data_tx.smk"
330332
include: "rules/work/genes/ncbi.smk"
331333
include: "rules/work/genes/omim.smk"
334+
include: "rules/work/genes/orphapacket.smk"
332335
include: "rules/work/genes/rcnv.smk"
333336
include: "rules/work/genes/shet.smk"
334337
# Reference sequence--related rules.

download_urls.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
- url: https://github.com/Orphanet/orphapacket/archive/refs/tags/v10.1.tar.gz
2+
excerpt_strategy:
3+
strategy: no-excerpt
4+
count: null
5+
16
- url: https://zenodo.org/record/6347673/files/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz
27

38
- comment: The curation activity summary report is built in real-time.

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
# Parallel (de)compression.
4141
- pigz
4242
# Varfish related
43-
- annonars =0.14.1
43+
- annonars =0.15.0
4444
- viguno =0.1.6
4545
- mehari =0.6.2
4646
- varfish-server-worker =0.10.1
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:58b5f890e962de336ac264c788ae9136ad1eb69f52c1f536379c2984b85beef1
3+
size 71
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:38de2709b2f9a9c4e0b9ec18c5ebae337a47e97897808069dcee7ba5c39d3224
3+
size 1503498

rules/output/annonars/genes.smk

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
1010
hgnc="work/genes/hgnc/{date}/hgnc_info.jsonl",
1111
ncbi="work/genes/entrez/{date}/gene_info.jsonl",
1212
omim="work/genes/omim/{v_hpo}+{date}/omim_diseases.tsv",
13+
orpha="work/genes/orphapacket/{v_orpha}+{date}/orpha_diseases.tsv",
1314
rcnv="work/genes/rcnv/2022/rcnv_collins_2022.tsv",
1415
shet="work/genes/shet/2019/shet_weghorn_2019.tsv",
1516
output:
1617
rocksdb_identity=(
17-
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/"
18+
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{v_orpha}+{date}+{v_annonars}/"
1819
"rocksdb/IDENTITY"
1920
),
2021
spec_yaml=(
21-
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{date}+{v_annonars}/"
22+
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{v_hpo}+{v_orpha}+{date}+{v_annonars}/"
2223
"spec.yaml"
2324
),
2425
wildcard_constraints:
@@ -42,6 +43,7 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
4243
--path-in-dbnsfp {input.dbnsfp} \
4344
--path-in-hgnc {input.hgnc} \
4445
--path-in-omim {input.omim} \
46+
--path-in-orpha {input.orpha} \
4547
--path-in-ncbi {input.ncbi} \
4648
--path-in-rcnv {input.rcnv} \
4749
--path-in-shet {input.shet}
@@ -58,5 +60,6 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
5860
\
5961
--value v_annonars={wildcards.v_annonars} \
6062
--value v_downloader={PV.downloader} \
63+
--value v_orphapacket={wildcards.v_orpha} \
6164
> {output.spec_yaml}
6265
"""

rules/output/annonars/genes.spec.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ x-created-from:
3434
version: {{ today }}
3535
- name: OMIM
3636
version: {{ today }}
37+
- name: ORDO
38+
version: {{ v_orphapacket }}
3739
- name: rCNV pHaplo/pTriplo scores
3840
version: 2022-Collins-et-al
3941
- name: sHet scores

rules/work/genes/orphapacket.smk

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
## Rules related to annotating genes with ORDO terms
2+
3+
4+
rule genes_orphapacket_download: # -- download orphapacket file
5+
output:
6+
tar="work/download/genes/orphapacket/{version}/orphapacket.tar.gz",
7+
shell:
8+
r"""
9+
wget -O {output.tar} \
10+
https://github.com/Orphanet/orphapacket/archive/refs/tags/v10.1.tar.gz
11+
"""
12+
13+
14+
rule genes_orphapacket_diseases: # -- postprocess file for HGNC gene IDs
15+
input:
16+
tar="work/download/genes/orphapacket/{version}/orphapacket.tar.gz",
17+
xlink="output/full/mehari/genes-xlink-{date}/genes-xlink.tsv",
18+
output:
19+
tsv="work/genes/orphapacket/{version}+{date}/orpha_diseases.tsv",
20+
tsv_md5="work/genes/orphapacket/{version}+{date}/orpha_diseases.tsv.md5",
21+
shell:
22+
"""
23+
export TMPDIR=$(mktemp -d)
24+
trap "rm -rf $TMPDIR" ERR EXIT
25+
26+
tar -C $TMPDIR -xf $(readlink -f {input.tar})
27+
28+
python ./scripts/genes-orpha-diseases.py {input.xlink} $TMPDIR/orphapacket-*/json \
29+
| qsv sort -d '\t' \
30+
| qsv fmt -t '\t' \
31+
> {output.tsv}
32+
33+
md5sum {output.tsv} > {output.tsv}.md5
34+
"""

0 commit comments

Comments
 (0)