From 89ac981263d607d1aa089fc5d0ba7fee1f357bb5 Mon Sep 17 00:00:00 2001 From: linlin1 Date: Thu, 21 May 2026 10:54:33 +0800 Subject: [PATCH 1/2] Fix alphagenome bug --- examples/biosciences/alphagenome/inference.sh | 6 +++--- examples/biosciences/alphagenome/run_track.sh | 4 ++-- examples/biosciences/alphagenome/run_variant.sh | 6 +++--- install.sh | 13 ++++++++++++- setup.py | 1 + 5 files changed, 21 insertions(+), 9 deletions(-) mode change 100644 => 100755 examples/biosciences/alphagenome/inference.sh mode change 100644 => 100755 examples/biosciences/alphagenome/run_track.sh mode change 100644 => 100755 examples/biosciences/alphagenome/run_variant.sh diff --git a/examples/biosciences/alphagenome/inference.sh b/examples/biosciences/alphagenome/inference.sh old mode 100644 new mode 100755 index 64c62b21..89342b65 --- a/examples/biosciences/alphagenome/inference.sh +++ b/examples/biosciences/alphagenome/inference.sh @@ -2,8 +2,8 @@ source ../../../env.sh export PYTHONPATH=$(pwd):$PYTHONPATH -export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGeonme -export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGeonme +export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGenome +export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGenome python run_inference.py \ --fasta_path ${DATA_ROOT_DIR}/reference/HOMO_SAPIENS/GRCh38.p13.genome.fa \ @@ -11,4 +11,4 @@ python run_inference.py \ --chromosome chr19 \ --start 10587331 \ --end 11635907 \ - --output_dir ./outputs \ No newline at end of file + --output_dir ./outputs diff --git a/examples/biosciences/alphagenome/run_track.sh b/examples/biosciences/alphagenome/run_track.sh old mode 100644 new mode 100755 index 17b69144..89ddbfb5 --- a/examples/biosciences/alphagenome/run_track.sh +++ b/examples/biosciences/alphagenome/run_track.sh @@ -2,8 +2,8 @@ source ../../../env.sh export PYTHONPATH=$(pwd):$PYTHONPATH -export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGeonme -export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGeonme +export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGenome +export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGenome python run_track_prediction_eval.py \ --model_dir ${MODEL_ROOT_DIR}/alphagenome-all-folds \ diff --git a/examples/biosciences/alphagenome/run_variant.sh b/examples/biosciences/alphagenome/run_variant.sh old mode 100644 new mode 100755 index 6e14478b..a494a6b5 --- a/examples/biosciences/alphagenome/run_variant.sh +++ b/examples/biosciences/alphagenome/run_variant.sh @@ -2,10 +2,10 @@ source ../../../env.sh export PYTHONPATH=$(pwd):$PYTHONPATH -export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGeonme -export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGeonme +export DATA_ROOT_DIR=${ONESCIENCE_DATASETS_DIR}/AlphaGenome +export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGenome python run_variant_scoring.py \ --fasta_path ${DATA_ROOT_DIR}/reference/HOMO_SAPIENS/GRCh38.p13.genome.fa \ --model_dir ${MODEL_ROOT_DIR}/alphagenome-all-folds \ - --output_dir ./outputs_variant \ No newline at end of file + --output_dir ./outputs_variant diff --git a/install.sh b/install.sh index 0ec8c1fb..3a51b9f6 100644 --- a/install.sh +++ b/install.sh @@ -67,7 +67,7 @@ BIO_WHEELS=( "https://download.sourcefind.cn:65024/file/4/jax/DAS1.7/jax_rocm60_pjrt-0.4.34+das.opt1.dtk25042-py3-none-manylinux2014_x86_64.whl" "https://download.sourcefind.cn:65024/file/4/jax/DAS1.7/jax_rocm60_plugin-0.4.34+das.opt1.dtk25042-cp311-cp311-manylinux2014_x86_64.whl" "https://download.sourcefind.cn:65024/file/4/jax/DAS1.7/jaxlib-0.4.34+das.opt1.dtk25042-cp311-cp311-manylinux2014_x86_64.whl" - "https://download.sourcefind.cn:65024/directlink/4/transformer_engine/DAS1.7/transformer_engine-2.7.0+das.opt1.dtk25042-cp311-cp311-manylinux_2_28_x86_64.whl" + "https://download.sourcefind.cn:65024/directlink/4/transformer_engine/DAS1.7/transformer_engine-2.5.0+das.opt1.dtk25042-cp311-cp311-manylinux_2_28_x86_64.whl" "https://download.sourcefind.cn:65024/directlink/4/flash_attn/DAS1.7/flash_attn-2.6.1+das.opt1.dtk25042-cp311-cp311-manylinux_2_28_x86_64.whl" "https://download.sourcefind.cn:65024/directlink/9/onesicence/dtk-25.04.2/bionemo_noodles-0.1.2-cp311-cp311-manylinux_2_28_x86_64.whl" "https://download.sourcefind.cn:65024/directlink/9/onesicence/dtk-25.04.2/nemo_toolkit-2.5.0rc0-py3-none-any.whl" @@ -202,6 +202,17 @@ if [[ "$DOMAIN" == "bio" || "$DOMAIN" == "all" ]]; then find "$AF3_SRC" -name "cpp*.so" -exec cp {} "$AF3_DEST/" \; cp -r "$AF3_SRC/_tools" "$AF3_DEST/" 2>/dev/null || true + # AlphaGenome metadata files need to be copied too + # Find the site-packages location for AlphaGenome + ALPHAGENOME_SRC="$SCRIPT_DIR/src/onescience/flax_models/alphagenome" + ALPHAGENOME_DEST="$(python -c 'import site; print(site.getsitepackages()[0])')/onescience/flax_models/alphagenome" + echo ">>> Copying AlphaGenome metadata files to $ALPHAGENOME_DEST" + + # Create destination directory if it doesn't exist + mkdir -p "$ALPHAGENOME_DEST/model/metadata/" + # Copy all .textproto files from the source metadata directory to the destination + find "$ALPHAGENOME_SRC/model/metadata" -maxdepth 1 -name "*.textproto" -exec cp {} "$ALPHAGENOME_DEST/model/metadata/" \; + # cpp.so (libcifpp) looks for components.cif in site-packages/share/libcifpp/ # at import time. Copy it there before building data files. if [[ -n "${ALPHAFOLD3_CIFPP_COMPONENTS:-}" && -f "$ALPHAFOLD3_CIFPP_COMPONENTS" ]]; then diff --git a/setup.py b/setup.py index 8d32dfe9..bf1f75e3 100644 --- a/setup.py +++ b/setup.py @@ -266,6 +266,7 @@ bio_requires = [ "pytest", "pdbfixer", "e3nn", + "pyranges", ] matchem_requires = [ -- Gitee From 8f070760e4dde386c61cbbc7d47192d6c7dcf53c Mon Sep 17 00:00:00 2001 From: linlin1 Date: Fri, 29 May 2026 11:41:23 +0800 Subject: [PATCH 2/2] algenome update --- examples/biosciences/alphagenome/run_track.sh | 1 + install.sh | 11 -------- requirements.txt | 2 +- .../flax_models/alphagenome/package_config.py | 25 +++++++++++++++++++ 4 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 src/onescience/flax_models/alphagenome/package_config.py diff --git a/examples/biosciences/alphagenome/run_track.sh b/examples/biosciences/alphagenome/run_track.sh index 89ddbfb5..a032a4a7 100755 --- a/examples/biosciences/alphagenome/run_track.sh +++ b/examples/biosciences/alphagenome/run_track.sh @@ -8,5 +8,6 @@ export MODEL_ROOT_DIR=${ONESCIENCE_MODELS_DIR}/AlphaGenome python run_track_prediction_eval.py \ --model_dir ${MODEL_ROOT_DIR}/alphagenome-all-folds \ --model_version ALL_FOLDS \ + --data_dir ${DATA_ROOT_DIR}/v1/train \ --output_path ./outputs_track/eval_results.csv diff --git a/install.sh b/install.sh index 3a51b9f6..6cc6c172 100644 --- a/install.sh +++ b/install.sh @@ -202,17 +202,6 @@ if [[ "$DOMAIN" == "bio" || "$DOMAIN" == "all" ]]; then find "$AF3_SRC" -name "cpp*.so" -exec cp {} "$AF3_DEST/" \; cp -r "$AF3_SRC/_tools" "$AF3_DEST/" 2>/dev/null || true - # AlphaGenome metadata files need to be copied too - # Find the site-packages location for AlphaGenome - ALPHAGENOME_SRC="$SCRIPT_DIR/src/onescience/flax_models/alphagenome" - ALPHAGENOME_DEST="$(python -c 'import site; print(site.getsitepackages()[0])')/onescience/flax_models/alphagenome" - echo ">>> Copying AlphaGenome metadata files to $ALPHAGENOME_DEST" - - # Create destination directory if it doesn't exist - mkdir -p "$ALPHAGENOME_DEST/model/metadata/" - # Copy all .textproto files from the source metadata directory to the destination - find "$ALPHAGENOME_SRC/model/metadata" -maxdepth 1 -name "*.textproto" -exec cp {} "$ALPHAGENOME_DEST/model/metadata/" \; - # cpp.so (libcifpp) looks for components.cif in site-packages/share/libcifpp/ # at import time. Copy it there before building data files. if [[ -n "${ALPHAFOLD3_CIFPP_COMPONENTS:-}" && -f "$ALPHAFOLD3_CIFPP_COMPONENTS" ]]; then diff --git a/requirements.txt b/requirements.txt index e9caa436..621cb06b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -112,7 +112,7 @@ pwact #alphagenome alphagenome>=0.1.0 -kagglehub>=0.3.0 +kagglehub>=0.3.0,<=0.3.3 orbax-checkpoint>=0.6.0 pyfaidx>=0.7.0 jaxtyping>=0.2.0 diff --git a/src/onescience/flax_models/alphagenome/package_config.py b/src/onescience/flax_models/alphagenome/package_config.py new file mode 100644 index 00000000..42784530 --- /dev/null +++ b/src/onescience/flax_models/alphagenome/package_config.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +""" +AlphaGenome package config. + +Expose runtime metadata files through the normal packaging pipeline so +install scripts do not need to copy files into site-packages manually. +""" + +ALPHAGENOME_PACKAGE_DATA = { + "onescience.flax_models.alphagenome.model.metadata": [ + "*.textproto", + ], +} + +ALPHAGENOME_MANIFEST_RULES = [ + "recursive-include src/onescience/flax_models/alphagenome/model/metadata *.textproto", +] + + +def get_package_data(): + return ALPHAGENOME_PACKAGE_DATA + + +def get_manifest_rules(): + return ALPHAGENOME_MANIFEST_RULES -- Gitee