Source code for nx_arangodb.classes.reportviews

"""Experimental overrides of the NetworkX Views that represent the
nodes and edges of the graph.

Overriding these classes allows us to implement custom logic for
data filtering and updating in the database, instead of in Python.

These classes are a work-in-progress. The main goal is to try
to delegate data processing to ArangoDB, whenever possible.

To use these experimental views, you must set **use_arango_views=True**
when creating a new graph object:
>>> G = nxadb.Graph(name="MyGraph", use_arango_views=True)
"""

from __future__ import annotations

import networkx as nx

import nx_arangodb as nxadb



[docs]
class ArangoNodeView(nx.classes.reportviews.NodeView):
    """The ArangoNodeView class is an experimental subclass of the
    NodeView class.

    Contrary to the original NodeView class, the ArangoNodeView is
    writable to allow for bulk updates to the graph in the DB.
    """

    # DataView method
    def __call__(self, data=False, default=None):
        if data is False:
            return self
        return ArangoNodeDataView(self._nodes, data, default)


[docs]
    def data(self, data=True, default=None):
        """Return a read-only view of node data.

        Parameters
        ----------
        data : bool or node data key, default=True
            If ``data=True`` (the default), return a `NodeDataView` object that
            maps each node to *all* of its attributes. `data` may also be an
            arbitrary key, in which case the `NodeDataView` maps each node to
            the value for the keyed attribute. In this case, if a node does
            not have the `data` attribute, the `default` value is used.
        default : object, default=None
            The value used when a node does not have a specific attribute.

        Returns
        -------
        NodeDataView
            The layout of the returned NodeDataView depends on the value of the
            `data` parameter.

        Notes
        -----
        If ``data=False``, returns a `NodeView` object without data.

        See Also
        --------
        NodeDataView
        """
        if data is False:
            return self
        return ArangoNodeDataView(self._nodes, data, default)



[docs]
    def update(self, data):
        """Update a set of nodes within the graph.

        The benefit of this method is that it allows for bulk API updates,
        as opposed to `G.add_nodes_from`, which currently makes
        one API request per node.

        Example
        -------
        >>> G = nxadb.Graph(name="MyGraph")
        >>> G.nodes.update(
            {
                'node/1': {"node/1", "foo": "bar"},
                'node/2': {"node/2", "foo": "baz"},
                ...
            })
        """
        return self._nodes.update(data)





[docs]
class ArangoNodeDataView(nx.classes.reportviews.NodeDataView):
    """The ArangoNodeDataView class is an experimental subclass of the
    NodeDataView class.

    The main use for this class is to iterate through node-data pairs.
    The data can be the entire data-dictionary for each node, or it
    can be a specific attribute (with default) for each node.

    In the event that the data is a specific attribute, the data is
    filtered server-side, instead of in Python. This is done by using
    the ArangoDB Query Language (AQL) to filter the data.
    """

    def __iter__(self):
        data = self._data
        if data is False:
            return iter(self._nodes)
        if data is True:
            return iter(self._nodes.items())

        ######################
        # NOTE: Monkey Patch #
        ######################

        # Old:
        # return (
        #     (n, dd[data] if data in dd else self._default)
        #     for n, dd in self._nodes.items()
        # )

        # New:
        return iter(self._nodes.items(data=data, default=self._default))


        # Reason: We can utilize AQL to filter the data we
        # want to return, instead of filtering it in Python

        ###########################



[docs]
class ArangoEdgeDataView(nx.classes.reportviews.EdgeDataView):
    """The ArangoEdgeDataView class is an experimental subclass of the
    EdgeDataView class.

    This view is primarily used to iterate over the edges reporting
    edges as node-tuples with edge data optionally reported.

    In the event that the data is a specific attribute, the data is
    filtered server-side, instead of in Python. This is done by using
    the ArangoDB Query Language (AQL) to filter the data.
    """

    def __iter__(self):
        ######################
        # NOTE: Monkey Patch #
        ######################

        if self._nbunch is None and self._data not in [None, True, False]:
            # Reason: We can utilize AQL to filter the data we
            # want to return, instead of filtering it in Python
            # This is hacky for now, but it's meant to show that
            # the data can be filtered server-side.
            # We solve this by relying on self._adjdict, which
            # is the AdjListOuterDict object that has a custom
            # items() method that can filter data with AQL.

            yield from self._adjdict.items(data=self._data, default=self._default)
        else:
            yield from super().__iter__()




[docs]
class ArangoEdgeView(nx.classes.reportviews.EdgeView):
    """The ArangoEdgeView class is an experimental subclass of the
    EdgeView class.

    The __len__ method is overridden to count the number of edges
    in the graph by querying the database, instead of iterating
    through the edges in Python.
    """

    dataview = ArangoEdgeDataView

    def __len__(self):

        ######################
        # NOTE: Monkey Patch #
        ######################

        # Old:
        # num_nbrs = (len(nbrs) + (n in nbrs) for n, nbrs in self._nodes_nbrs())
        # return sum(num_nbrs) // 2

        # New:
        G: nxadb.Graph = self._graph
        return sum(
            [
                G.db.collection(ed["edge_collection"]).count()
                for ed in G.adb_graph.edge_definitions()
            ]
        )


        # Reason: We can utilize AQL to count the number of edges
        # instead of making individual requests to the database
        # i.e avoid having to do `n in nbrs` for each node

        ######################