mpc-bioinformatics · Luxxii · Feb 14, 2026 · Jan 15, 2026 · Feb 14, 2025 · Feb 14, 2025
diff --git a/.github/workflows/functional_tests.yaml b/.github/workflows/functional_tests.yaml
@@ -7,12 +7,12 @@ jobs:
     strategy:
       matrix:
         python-version:
-        - '3.7' 
-        - '3.8' 
         - '3.9' 
         - '3.10'
         - '3.11'
         - '3.12'
+        - '3.13'
+        - '3.14'
 
     steps:
       - name: Check out Repository
@@ -33,7 +33,6 @@ jobs:
           pip install psycopg
           pip install gremlinpython
           pip install mysql-connector-python
-          pip install cassandra-driver
           pip install apsw
 
       - name: Run Complete tests

diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml
@@ -9,10 +9,10 @@ jobs:
       - name: Check out Repository
         uses: actions/checkout@v2
 
-      - name: Setup Python 3.9
+      - name: Setup Python 3.14
         uses: actions/setup-python@v4
         with:
-          python-version: 3.9
+          python-version: 3.14
 
       - name: Install Python build
         run: |

diff --git a/README.md b/README.md
@@ -8,6 +8,9 @@
 
 [![Static Badge](https://img.shields.io/badge/publication-10.1093%2Fbib%2Fbbae671-blue?link=https%3A%2F%2Facademic.oup.com%2Fbib%2Farticle%2F26%2F1%2Fbbae671%2F7942791)](https://doi.org/10.1093/bib/bbae671)
 
+![de.NBI Logo](https://raw.githubusercontent.com/mpc-bioinformatics/ProtGraph/master/resources/denbi-logo-color.svg)
+
+If you used ProtGraph please take the time to answer [our short survey](https://de.surveymonkey.com/r/denbi-service?sc=bioinfra-prot&tool=ProtGraph) to help us improve de.NBI services.
 ## Summary
 
 ProtGraph in short is a python-package, which allows to convert protein-entries from the [UniProtKB](https://www.uniprot.org/) to so-called protein-graphs. We use the [SP-EMBL-Entries](https://web.expasy.org/docs/userman.html), provided by UniProtKB via `*.txt` or `*.dat`-files, and parse the available feature-information. In contrast to a FASTA-file-entry of a protein, a SP-EMBL-file-entry is
@@ -18,7 +21,7 @@ So, what can we do with ProtGraph? First, the protein-graphs can be saved by Pro
 generated, containing various on-the-fly retrievable information about the protein-graph. We can calculate the number of nodes/edges/features within a protein-graph, the number of protein-/peptide-sequences contained and even binned by specific attributes. These can give a quick overview e.g. of the tryptic search space while considering all feature-information of the provided species-proteome.
 Lastly, the protein-graphs per se are not useful, especially in identification. Therefore, we extended ProtGraph to optionally convert protein-graph into FASTA-entries, to be used for identification and to enable searches of feature-induced-peptides.
 
-Curious what we do with ProtGraph and its output? Check out `materials_and_posters` for an explanation of the protein-graph-generation and further materials. Below in the README.md, additional examples are provided.
+Curious what we do with ProtGraph and its output? Check out `materials_and_posters` for an explanation of the protein-graph-generation and further materials. Below in the README.md, additional examples are provided. For a full list of CLI commands, check out [this code part](https://github.com/mpc-bioinformatics/ProtGraph/blob/master/protgraph/cli.py)
 
 ### Setting up ProtGraph
 

diff --git a/protgraph/cli.py b/protgraph/cli.py
@@ -272,22 +272,22 @@ def add_statistics(group):
     group.add_argument(
         "--calc_num_possibilities_variant", "-cnpvar", default=False, action="store_true",
         help="If this is set, the number of all possible (non repeating) paths from the start to the end node will"
-        " be calculated. This returns a list, sorted by the number of variants (beginning at 0). "
-        "Similar to misclavages"
+        " be calculated for the feature VARIANT. This returns a list, sorted by the number of variants (beginning at 0)"
+        ". Similar to misclavages"
     )
 
     group.add_argument(
         "--calc_num_possibilities_mutagen", "-cnpmut", default=False, action="store_true",
         help="If this is set, the number of all possible (non repeating) paths from the start to the end node will"
-        " be calculated. This returns a list, sorted by the number of mutagens (beginning at 0). "
-        "Similar to misclavages"
+        " be calculated for the feature MUTAGEN. This returns a list, sorted by the number of mutagens (beginning at 0)"
+        ". Similar to misclavages"
     )
 
     group.add_argument(
         "--calc_num_possibilities_conflict", "-cnpcon", default=False, action="store_true",
         help="If this is set, the number of all possible (non repeating) paths from the start to the end node will"
-        " be calculated. This returns a list, sorted by the number of conflicts (beginning at 0). "
-        "Similar to misclavages"
+        " be calculated for the feature CONFLICT. This returns a list, sorted by the number of conflicts (beginning at "
+        "0). Similar to misclavages"
     )
 
     # Add replace funcitonality to CLI
@@ -302,16 +302,14 @@ def _list_to_func_map(input: str):
         else:
             return None
     group.add_argument(
-        "--calc_num_possibilites_or_count", "-cnp_or_count", choices=[min, max],
+        "--calc_num_possibilities_or_count", "-cnp_or_count", choices=[min, max],
         type=_list_to_func_map, action="store", default=min,
-        help="Substitute amino acids in graphs by other amino acids. This could be useful to replace"
-        " e.g. 'J' with 'I' and 'L'. This parameter can be then provided as: 'J->I,L'. Multiple replacements"
-        " are allowed and are executed one after another. NOTE: only from ONE amino acid multiple amino acids can"
-        " be substituted. So only the format: 'A->B[,C]*' is allowed!"
+        help="This only influences the counting of the number of all possible (non repeating) paths from the start to "
+        "the end node for the VARIANT, MUTAGEN and CONFLICT features. As some edges may collapse during optimization "
+        "of the graph one edge can describe more than one way how to point to the next amino acid / node. Choose "
+        "between 'min' or 'max' counting strategy (over or under couting). "
     )
 
-    # TODO one parameter is missing for cnp con/mut/var
-
 
 def add_graph_exports(group):
     group.add_argument(

diff --git a/protgraph/export/pcsr.py b/protgraph/export/pcsr.py
@@ -32,7 +32,7 @@ def write_pcsr(self, pg, path):
             out.write(out_str)
 
     def _build_csr_entry(self, graph):
-        # The CSR is reordered in Top Order in favor on going through the RAM sequentially. To be tested...
+        # The CSR is reordered in Top Order in favor on going through the RAM sequentially.
 
         # get the topological order
         TO = self.__get_protein_graph_specific_top_order(graph)  # The Topological Order
@@ -41,9 +41,11 @@ def _build_csr_entry(self, graph):
         # get Accessions (including isoforms)
         AC = graph.vs[0]["accession"]
         IA = \
-            list(set(graph.vs["isoform_accession"]).difference(set([None]))) \
-            if "isoform_accession" in graph.vs[0].attributes() \
-            else []  # Get List of Isos (Accessions)
+            sorted(
+                list(set(graph.vs["isoform_accession"]).difference(set([None]))) \
+                if "isoform_accession" in graph.vs[0].attributes() \
+                else []   # noqa E501 # Get List of Isos (Accessions) 
+            )
 
         NO = []  # Get List of Nodes
         ED = []  # Get List of Edges

diff --git a/protgraph/export/peptides/pep_mysql.py b/protgraph/export/peptides/pep_mysql.py
@@ -243,7 +243,7 @@ def _export_peptide_no_duplicate(self, l_peptides_tup, l_path_nodes, l_miscleava
                 # followed by 5 bits for the n and c terminus (ascii_code - 65)
                 # The weight is disregarded, since it is composed by the aa counts
                 [format(i, 'b').zfill(17) for i in x[1:-2]] \
-                + [format(ord(i) - 65, 'b').zfill(5) for i in x[-2:]]
+                + [format(ord(i) - 65, 'b').zfill(5) for i in x[-2:]] # noqa E501
             )
             for x in l_peptides_tup
         ]

diff --git a/protgraph/export/peptides/pep_postgres.py b/protgraph/export/peptides/pep_postgres.py
@@ -246,7 +246,7 @@ def _export_peptide_no_duplicate(self, l_peptides_tup, l_path_nodes, l_miscleava
                 # followed by 5 bits for the n and c terminus (ascii_code - 65)
                 # The weight is disregarded, since it is composed by the aa counts
                 [format(i, 'b').zfill(17) for i in x[1:-2]] \
-                + [format(ord(i) - 65, 'b').zfill(5) for i in x[-2:]]
+                + [format(ord(i) - 65, 'b').zfill(5) for i in x[-2:]] # noqa E501
             )
             for x in l_peptides_tup
         ]

diff --git a/protgraph/ft_execution/generic.py b/protgraph/ft_execution/generic.py
@@ -62,12 +62,12 @@ def _get_vertices_before_after(graph, generic_feature):
     # Get all vertices which are at beginning (before)
     # and all vertices which are at the end (after)
     vertices_before, vertices_after = _get_all_vertices_before_after(
-        graph, aa_before, aa_after, generic_feature.ref
+        graph, aa_before, aa_after, generic_feature.location.ref
     )
     if len(vertices_before) == 0 or len(vertices_after) == 0:
         # Check if we have vertices, if not simply skip
         print("No Vertices retrieved for protein {}, using {}: {} (referencing: {}). Skipping...".format(
-            graph.vs[0]["accession"], generic_feature.type, generic_feature.id, generic_feature.ref))
+            graph.vs[0]["accession"], generic_feature.type, generic_feature.id, generic_feature.location.ref))
         return None, None
 
     return vertices_before, vertices_after

diff --git a/protgraph/ft_execution/generic_cleaved_peptide.py b/protgraph/ft_execution/generic_cleaved_peptide.py
@@ -21,7 +21,6 @@ def execute_peptide(graph, peptide_feature):
 
 def execute_chain(graph, chain_feature):
     """ Wrapper function to execute chain_features"""
-    # TODO
     execute_generic_cleaved_peptide(graph, chain_feature)
 
 
@@ -55,7 +54,7 @@ def execute_generic_cleaved_peptide(graph, generic_cleaved_feature):
     # Get the corrsponding start and end nodes of the referenced peptide
     # (including isoforms, if not specified or only isoforms)
     start_nodes, end_nodes = _get_all_vertices_before_after(
-        graph, start_position, end_position, generic_cleaved_feature.ref
+        graph, start_position, end_position, generic_cleaved_feature.location.ref
     )
 
     # Create the edge-list (cleaving the referenced peptide)

diff --git a/protgraph/ft_execution/init_met.py b/protgraph/ft_execution/init_met.py
@@ -61,7 +61,7 @@ def _get_methionines(graph, pos_first_aas, init_met_feature):
         "isoform_position" in graph.vs[0].attributes() else False
 
     _aas_set = True  # Bool to check if we were able to retrieve aminoacids from the graph
-    if init_met_feature.ref is None:
+    if init_met_feature.location.ref is None:
         # Canonical M
         if not has_isoforms:
             # for no isoforms
@@ -84,13 +84,13 @@ def _get_methionines(graph, pos_first_aas, init_met_feature):
             x
             for x in pos_first_aas
             if graph.vs[x]["aminoacid"] == "M" and graph.vs[x]["isoform_position"] == 1 and
-            graph.vs[x]["isoform_accession"] == init_met_feature.ref
+            graph.vs[x]["isoform_accession"] == init_met_feature.location.ref
         ]
     else:
-        if init_met_feature.ref is not None and not has_isoforms:
+        if init_met_feature.location.ref is not None and not has_isoforms:
             print(
                 "Warning, INIT_MET could not applied on isoform {} (isoform is missing in graph)"
-                .format(init_met_feature.ref)
+                .format(init_met_feature.location.ref)
             )
             met_aas = []
             _aas_set = False

diff --git a/protgraph/graph_statistics.py b/protgraph/graph_statistics.py
@@ -15,7 +15,7 @@
 
 def get_statistics(graph, entry_dict, **kwargs):
     """
-    TODO can we retrieve even more information!?
+    Calculates Statistics based on the provided statistics method list and fills final result dictionary.
     returns #Node, #Edges, #Num_Of_Paths
     """
 
@@ -25,9 +25,7 @@ def get_statistics(graph, entry_dict, **kwargs):
 
     for calculate_bool, method, entry_dict_key in STATISTICS_METHOD_LIST:
         if kwargs[calculate_bool]:
-            entry_dict[entry_dict_key] = method(graph, kwargs["calc_num_possibilites_or_count"])
-
-    # TODO can we calculate more statistics?
+            entry_dict[entry_dict_key] = method(graph, kwargs["calc_num_possibilities_or_count"])
 
 
 def _get_edge_count(graph_entry):