UCD-BDLab · ramosv · Nov 29, 2025 · Nov 28, 2025 · Nov 29, 2025 · Copilot
diff --git a/bioneuralnet/clustering/correlated_louvain.py b/bioneuralnet/clustering/correlated_louvain.py
@@ -74,16 +74,23 @@ def __init__(
         self.tune = tune
 
         self.logger.info(
+            f"CorrelatedLouvain(k3={self.K3}, k4={self.K4}, "
+            f"nodes={self.G.number_of_nodes()}, edges={self.G.number_of_edges()}, "
+            f"features={self.B.shape[1] if self.B is not None else 0})"
+        )
+
+        self.logger.debug(
             f"Initialized CorrelatedLouvain with k3 = {self.K3}, k4 = {self.K4}, "
         )
         if self.B is not None:
-            self.logger.info(f"Original omics data shape: {self.B.shape}")
+            self.logger.debug(f"Original omics data shape: {self.B.shape}")
 
-        self.logger.info(f"Original graph has {self.G.number_of_nodes()} nodes.")
+        self.logger.debug(f"Original graph has {self.G.number_of_nodes()} nodes.")
 
         if self.B is not None:
-            self.logger.info(f"Final omics data shape: {self.B.shape}")
-        self.logger.info(
+            self.logger.debug(f"Final omics data shape: {self.B.shape}")
+
+        self.logger.debug(
             f"Graph has {self.G.number_of_nodes()} nodes and {self.G.number_of_edges()} edges."
         )
 
@@ -95,7 +102,7 @@ def __init__(
 
         self.clusters: dict[Any, Any] = {}
         self.device = torch.device("cuda" if gpu and torch.cuda.is_available() else "cpu")
-        self.logger.info(f"Initialized Correlated Louvain. device={self.device}")
+        self.logger.debug(f"Initialized Correlated Louvain. device={self.device}")
 
     def _compute_community_cohesion(self, nodes) -> float:
         """Compute average absolute pairwise correlation of omics features within a community.
@@ -131,28 +138,28 @@ def _compute_community_correlation(self, nodes) -> tuple:
         Drops columns that are completely zero.
         """
         try:
-            self.logger.info(
+            self.logger.debug(
                 f"Computing community correlation for {len(nodes)} nodes..."
             )
             node_cols = [str(n) for n in nodes if str(n) in self.B.columns]
             if not node_cols:
-                self.logger.info(
+                self.logger.debug(
                     "No valid columns found for these nodes; returning (0.0, 1.0)."
                 )
                 return 0.0, 1.0
             B_sub = self.B.loc[:, node_cols]
             zero_mask = (B_sub == 0).all(axis=0)
             num_zero_columns = int(zero_mask.sum())
             if num_zero_columns > 0:
-                self.logger.info(
+                self.logger.debug(
                     f"WARNING: {num_zero_columns} columns are all zeros in community subset."
                 )
             B_sub = B_sub.loc[:, ~zero_mask]
             if B_sub.shape[1] == 0:
-                self.logger.info("All columns dropped; returning (0.0, 1.0).")
+                self.logger.debug("All columns dropped; returning (0.0, 1.0).")
                 return 0.0, 1.0
 
-            self.logger.info(
+            self.logger.debug(
                 f"B_sub shape: {B_sub.shape}, first few columns: {node_cols[:5]}"
             )
             scaler = StandardScaler()
@@ -167,7 +174,7 @@ def _compute_community_correlation(self, nodes) -> tuple:
             corr, pvalue = pearsonr(pc1, target)
             return corr, pvalue
         except Exception as e:
-            self.logger.info(f"Error in _compute_community_correlation: {e}")
+            self.logger.error(f"Error in _compute_community_correlation: {e}")
             raise
 
     def _quality_correlated(self, partition) -> float:
@@ -180,7 +187,7 @@ def _quality_correlated(self, partition) -> float:
 
         # Unsupervised mode: Y is None
         if self.Y is None:
-            self.logger.info("Phenotype data not provided; using unsupervised cohesion.")
+            self.logger.debug("Phenotype data not provided; using unsupervised cohesion.")
 
             if self.B is None:
                 return Q
@@ -195,14 +202,14 @@ def _quality_correlated(self, partition) -> float:
 
             avg_cohesion = np.mean(community_cohesions) if community_cohesions else 0.0
             quality = self.K3 * Q + self.K4 * avg_cohesion
-            self.logger.info(
+            self.logger.debug(
                 f"Computed quality (unsupervised): Q = {Q:.4f}, avg_cohesion = {avg_cohesion:.4f}, combined = {quality:.4f}"
             )
             return quality
 
         # Supervised mode: Y is provided
         if self.B is None:
-            self.logger.info(
+            self.logger.debug(
                 "Omics data not provided; returning standard modularity."
             )
             return Q
@@ -266,6 +273,12 @@ def run(self, as_dfs: bool = False) -> Union[dict, list]:
             self.logger.info(f"Final quality: {quality:.4f}")
             self.partition = partition
 
+            n_clusters = len(set(partition.values()))
+            self.logger.info(
+                f"CorrelatedLouvain found {n_clusters} communities "
+                f"(nodes={self.G.number_of_nodes()})"
+            )
+
         if as_dfs:
             self.logger.info("Raw partition output:", self.partition)
             clusters_dfs = self.partition_to_adjacency(self.partition)

diff --git a/bioneuralnet/clustering/hybrid_louvain.py b/bioneuralnet/clustering/hybrid_louvain.py
@@ -13,18 +13,18 @@ class HybridLouvain:
 
     Attributes:
 
-        G (nx.Graph): NetworkX graph object.
+        G (Union[nx.Graph, pd.DataFrame]): Input graph as a NetworkX Graph or adjacency DataFrame.
         B (pd.DataFrame): Omics data.
         Y (pd.DataFrame): Phenotype data.
         k3 (float): Weight for Correlated Louvain.
         k4 (float): Weight for Correlated Louvain.
         max_iter (int): Maximum number of iterations.
         weight (str): Edge weight parameter name.
-        tune (bool): Flag to enable tuning of parameters
+        tune (bool): Flag to enable tuning of parameters       
-        tune (bool): Flag to enable tuning of parameters       
+        tune (bool): Flag to enable tuning of parameters
-        tune (bool): Flag to enable tuning of parameters       
+        tune (bool): Flag to enable tuning of parameters
     """
     def __init__(
         self,
-        G: nx.Graph,
+        G: Union[nx.Graph, pd.DataFrame],
         B: pd.DataFrame,
         Y: pd.DataFrame,
         k3: float = 0.2,
@@ -43,6 +43,13 @@ def __init__(
             set_seed(seed)
         self.logger.info("Initializing HybridLouvain...")
 
+        if isinstance(G, pd.DataFrame):
+            self.logger.info("Input G is a DataFrame; converting adjacency matrix to NetworkX graph.")
+            G = nx.from_pandas_adjacency(G)
+
+        if not isinstance(G, nx.Graph):
+            raise TypeError("G must be a networkx.Graph or a pandas DataFrame adjacency matrix.")
+
         self.G = G
         graph_nodes = set(map(str, G.nodes()))
 
@@ -233,7 +240,15 @@ def run(self, as_dfs: bool = False) -> Union[dict, list]:
             refined_nodes = pagerank_results.get("cluster_nodes", [])
             new_size = len(refined_nodes)
             all_clusters[iteration] = refined_nodes
-            self.logger.info(f"Refined subgraph size: {new_size}")
+
+            cond = pagerank_results.get("conductance", None)
+            corr = pagerank_results.get("correlation", None)
+            score = pagerank_results.get("composite_score", None)
+
+            self.logger.info(
+                f"Iteration {iteration+1}: cluster size={new_size}, "
+                f"Conductance={cond:.3f} Correlation={corr:.3f} score={score:.3f}"
-                f"Conductance={cond:.3f} Correlation={corr:.3f} score={score:.3f}"
+                f"Conductance={cond:.3f}" if cond is not None else "Conductance=N/A" + " "
+                f"Correlation={corr:.3f}" if corr is not None else "Correlation=N/A" + " "
+                f"score={score:.3f}" if score is not None else "score=N/A"
-                f"Conductance={cond:.3f} Correlation={corr:.3f} score={score:.3f}"
+                f"Conductance={cond:.3f}" if cond is not None else "Conductance=N/A" + " "
+                f"Correlation={corr:.3f}" if corr is not None else "Correlation=N/A" + " "
+                f"score={score:.3f}" if score is not None else "score=N/A"
+            )
 
             if new_size == prev_size or new_size <= 1:
                 self.logger.info(

diff --git a/bioneuralnet/datasets/lgg/target.csv b/bioneuralnet/datasets/lgg/target.csv
@@ -1,4 +1,4 @@
-patient,vital_status
+patient,target
 TCGA-CS-4938,0
 TCGA-CS-4941,1
 TCGA-CS-4942,1

diff --git a/bioneuralnet/downstream_task/dpmon.py b/bioneuralnet/downstream_task/dpmon.py
@@ -25,8 +25,10 @@
 from ray import tune
 from ray.tune import Checkpoint
 from ray.tune import CLIReporter
+from ray.tune.error import TuneError
 from ray.tune.stopper import TrialPlateauStopper
 from ray.tune.schedulers import ASHAScheduler
+from ray.tune.search.basic_variant import BasicVariantGenerator
 from sklearn.model_selection import train_test_split,StratifiedKFold,RepeatedStratifiedKFold
 from sklearn.preprocessing import label_binarize
 from scipy.stats import pointbiserialr
@@ -69,6 +71,7 @@ class DPMON:
         cv (bool): If True, use K-fold cross-validation; otherwise use repeated train/test splits.
         cuda (int): CUDA device index to use when gpu=True.
         seed (int): Random seed for reproducibility.
+        seed_trials (bool): If True, use a fixed seed for hyperparameter sampling to ensure reproducibility across trials.
         output_dir (Path): Directory where logs, checkpoints, and results are written.
     """
     def __init__(
@@ -97,6 +100,7 @@ def __init__(
         cv: bool = False,
         cuda: int = 0,
         seed: int = 1804,
+        seed_trials: bool = False,
         output_dir: Optional[str] = None,
     ):
         if adjacency_matrix.empty:
@@ -153,6 +157,7 @@ def __init__(
         self.gpu = gpu
         self.cuda = cuda
         self.seed = seed
+        self.seed_trials = seed_trials
         self.cv = cv
 
         if output_dir is None:
@@ -199,6 +204,7 @@ def run(self) -> Tuple[pd.DataFrame, object, torch.Tensor | None]:
             "cuda": self.cuda,
             "tune": self.tune,
             "seed": self.seed,
+            "seed_trials": self.seed_trials,
             "cv": self.cv,
         }
 
@@ -305,10 +311,8 @@ def prepare_node_features(adjacency_matrix: pd.DataFrame, omics_datasets: List[p
     omics_data = omics_datasets[0]
 
     if phenotype_col in omics_data.columns:
-        pheno = omics_data[phenotype_col]
         omics_feature_df = omics_data.drop(columns=[phenotype_col])
     else:
-        pheno = None
         omics_feature_df = omics_data
 
     nodes = sorted(network_features.intersection(omics_feature_df.columns))
@@ -529,7 +533,6 @@ def run_standard_training(dpmon_params, adjacency_matrix, combined_omics, clinic
         best_global_model_state = None
         best_global_embeddings = None
 
-        cv_predictions_list = []
         fold_accuracies = []
         fold_f1_macros = []
         fold_f1_weighteds = []
@@ -642,7 +645,6 @@ def run_standard_training(dpmon_params, adjacency_matrix, combined_omics, clinic
 
                 try:
                     n_classes = probs_np.shape[1]
-                    unique_classes = np.unique(y_test_np)
 
                     # binary
                     if n_classes == 2:
@@ -778,7 +780,7 @@ def run_hyperparameter_tuning(X_train, y_train, adjacency_matrix, clinical_data,
         "nn_hidden_dim2": tune.choice([32, 64, 128]),
         "ae_encoding_dim": tune.choice([4, 8, 16]),
         "num_epochs": tune.choice([512, 1024, 2048]),
-        "num_epochs": tune.choice([512, 1024, 2048]),
+        "num_epochs": tune.choice([512, 1024, 2048]),
+        # Dropout range updated: removed lower values (0.0, 0.1) and added higher (0.6) to encourage more aggressive regularization.
+        # This change may impact model performance; see commit message for rationale.
-        "num_epochs": tune.choice([512, 1024, 2048]),
+        "num_epochs": tune.choice([512, 1024, 2048]),
+        # Dropout range updated: removed lower values (0.0, 0.1) and added higher (0.6) to encourage more aggressive regularization.
+        # This change may impact model performance; see commit message for rationale.
-        "gnn_dropout": tune.choice([0.0, 0.1, 0.2, 0.3, 0.4, 0.5]),
+        "gnn_dropout": tune.choice([0.2, 0.3, 0.4, 0.5, 0.6]),
         "gnn_activation": tune.choice(["relu", "elu"]),
         "dim_reduction": tune.choice(["ae","linear", "mlp"]),
     }
@@ -798,7 +800,7 @@ def run_hyperparameter_tuning(X_train, y_train, adjacency_matrix, clinical_data,
         grace_period=30,
         reduction_factor=2
     )
-    gpu_resources = 1 if dpmon_params["gpu"] else 0
+
     best_configs = []
 
     omics_data = omics_dataset[0]
@@ -881,19 +883,63 @@ def tune_train_n(config):
     def short_dirname_creator(trial):
         return f"T{trial.trial_id}"
 
-    result = tune.run(
-        tune_train_n,
-        resources_per_trial={"cpu": 1, "gpu": 0.06} , #1 and 0.05
-        config=pipeline_configs,
-        num_samples=40, #50
-        verbose=0,
-        scheduler=scheduler,
-        stop=stopper,
-        name="tune_dp",
-        progress_reporter=reporter,
-        trial_dirname_creator=short_dirname_creator,
-        checkpoint_score_attr="min-val_loss",
-    )
+    cpu_per_trial = 2
+    use_gpu = bool(dpmon_params.get("gpu", False)) and torch.cuda.is_available()
+    if dpmon_params.get("gpu", False) and not torch.cuda.is_available():
+        logger.warning("gpu=True but CUDA is not available; Ray Tune will run on CPU only (gpu_per_trial=0.0).")
+
+    gpu_per_trial = 0.05 if use_gpu else 0.0
+
+    num_samples = 50
+    max_retries = 5
+
+    seed_trials = dpmon_params.get("seed_trials", False)
+
+    if seed_trials:
+        logger.debug(f"seed_trials=True: Using FIXED seed {dpmon_params['seed']} for hyperparameter sampling.")
+    else:
+        logger.debug("seed_trials=False: Using RANDOM hyperparameter sampling.")
+
+    for attempt in range(max_retries):
+        try:
+            if seed_trials:
+                search_alg = BasicVariantGenerator(random_state=np.random.RandomState(dpmon_params["seed"]))
+            else:
+                search_alg = None
+
+            result = tune.run(
+                tune_train_n,
+                search_alg=search_alg,
+                resources_per_trial={"cpu": cpu_per_trial, "gpu": gpu_per_trial},
+                config=pipeline_configs,
+                num_samples=num_samples,
+                verbose=0,
+                scheduler=scheduler,
+                stop=stopper,
+                name="tune_dp",
+                progress_reporter=reporter,
+                trial_dirname_creator=short_dirname_creator,
+                checkpoint_score_attr="min-val_loss",
+            )
+            break
+        except TuneError as e:
+            msg = str(e)
+            if "Trials did not complete" not in msg and "OutOfMemoryError" not in msg:
+                raise
+
+            new_num_samples = max(1, num_samples // 2)
+            if new_num_samples == num_samples:
+                raise
+
-            new_num_samples = max(1, num_samples // 2)
-            if new_num_samples == num_samples:
-                raise
+            if num_samples <= 1:
+                raise
+
+            new_num_samples = max(1, num_samples // 2)
-            new_num_samples = max(1, num_samples // 2)
-            if new_num_samples == num_samples:
-                raise
+            if num_samples <= 1:
+                raise
+
+            new_num_samples = max(1, num_samples // 2)
+            logger.warning(
+                f"Ray Tune failed with a likely resource / OOM error (attempt {attempt + 1}). "
+                f"Reducing num_samples from {num_samples} to {new_num_samples} "
+                f"(cpu_per_trial={cpu_per_trial}, gpu_per_trial={gpu_per_trial})."
+            )
+            num_samples = new_num_samples
+
-
+
+    # The else clause below executes only if the for loop completes without a break,
+    # i.e., if all retry attempts are exhausted without a successful run.
-
+
+    # The else clause below executes only if the for loop completes without a break,
+    # i.e., if all retry attempts are exhausted without a successful run.
+    else:
+        raise RuntimeError("Hyperparameter tuning failed after reducing resources several times.")
 
     best_trial = result.get_best_trial("val_loss", "min", "last")
     logger.debug("Best trial config: {}".format(best_trial.config))