Source code for metquest.guided_bfs

# -*- coding: utf-8 -*-

from __future__ import absolute_import

from collections import deque, defaultdict


[docs]def forward_pass(graph_object, seedmets):
    """
    This function carries out the Guided Breadth First Search on a directed
    bipartite graph starting from the entries in seed metabolite set.

    Parameters
    ----------
    graph_object : NetworkX DiGraph Object
        Bipartite graph of the metabolic network

    seedmets : set
        Set of seed metabolites including the source

    Returns
    -------
    lower_bound_metabolite : defaultdict
        Minimum number of steps required to reach a metabolite
    status_dict : defaultdict
        Dictionary pertaining to the status of every reaction - whether it
        has been visited or not
    scope : set
        Set of metabolites that can be produced from the given set of
        seed metabolites

    Notes
    -----
    Starting with the set of seed metabolites S, the algorithm first finds
    all the reactions from the set R, whose precursor metabolites are in S.
    Such reactions are marked visited and added to the visited reaction set.
    Metabolites produced by these reactions are checked. The reactions where
    these metabolites participate are then checked for the presence of all its
    predecessors and are added to the queue. This traversal continues in a
    breadth-first manner and stops when there are no further reactions to
    be visited.
    """
    pred = graph_object.predecessors
    succ = graph_object.successors
    seed_metabolite_set = seedmets.copy()
    lower_bound_metabolite = defaultdict(list)
    lower_bound_reaction = defaultdict(list)
    # Defaultdict is used simply because to avoid initialisations
    status_dict = defaultdict(str)
    # Using a deque since deques have O(1) speed for appendleft() and popleft()
    # while lists have O(n) performance for inserting and popping.
    queue = deque([])
    # All seed metabolites are always present, hence require 0 steps
    for seedmetabs in seed_metabolite_set:
        lower_bound_metabolite[seedmetabs].append(0)
    stage = 1
    scope = seed_metabolite_set.copy()
    starting_rxn_node = []
    # First stage where starting_rxn_node list contains all the reactions
    # which require only the seed metabolites as input
    for starting_met_nodes in seed_metabolite_set:
        # Essential when analysing mutiple networks with same seed metabolite
        # set, although would be redundant in case of single network
        if starting_met_nodes in graph_object:
            for startingrxns in succ(starting_met_nodes):
                if set(pred(startingrxns)).issubset(seed_metabolite_set):
                    if startingrxns not in starting_rxn_node:
                        starting_rxn_node.append(startingrxns)
                    for metsprod in succ(startingrxns):
                        scope.add(metsprod)
                        if stage not in lower_bound_metabolite[metsprod]:
                            lower_bound_metabolite[metsprod].append(stage)
                    if stage not in lower_bound_reaction[startingrxns]:
                        lower_bound_reaction[startingrxns].append(stage)
    for rxn in starting_rxn_node:
        for metabs in succ(rxn):
            for nextrxn in succ(metabs):
                if set(pred(nextrxn)).issubset(scope):
                    if nextrxn not in queue:
                        queue.append(nextrxn)
        status_dict[rxn] = 'V'
    while queue:
        stage += 1
        for parentrxn in list(queue):
            if status_dict[parentrxn] == '':
                if stage not in lower_bound_reaction[parentrxn]:
                    lower_bound_reaction[parentrxn].append(stage)
                for mets in succ(parentrxn):
                    scope.add(mets)
                    if stage not in lower_bound_metabolite[mets]:
                        lower_bound_metabolite[mets].append(stage)
                    for progeny in succ(mets):
                        if set(pred(progeny)).issubset(scope):

                            if status_dict[progeny] != 'V':
                                if progeny not in queue:
                                    queue.append(progeny)
                status_dict[parentrxn] = 'V'
            elif status_dict[parentrxn] == 'V':
                for mets in succ(parentrxn):
                    if stage not in lower_bound_metabolite[mets]:
                        lower_bound_metabolite[mets].append(stage)
            queue.popleft()
    return lower_bound_metabolite, status_dict, scope