Source code for pygod.detector.one

# -*- coding: utf-8 -*-
"""Outlier Aware Network Embedding for Attributed Networks (ONE)
"""
# Author: Xiyang Hu <xiyanghu@cmu.edu>, Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause

import time
import warnings

import torch
from torch_geometric.utils import to_dense_adj

from . import Detector
from ..utils import logger, validate_device


[docs]class ONE(Detector):
    """
    Outlier Aware Network Embedding for Attributed Networks

    .. note::
        This detector is transductive only. Using ``predict`` with
        unseen data will train the detector from scratch.

    See :cite:`bandyopadhyay2019outlier` for details.

    Parameters
    ----------
    hid_a : int, optional
        Hidden dimension for the attribute. Default: ``36``.
    hid_s : int, optional
        Hidden dimension for the structure. Default: ``36``.
    alpha : float, optional
        Weight for the attribute loss. Default: ``1.``.
    beta : float, optional
        Weight for the structural loss. Default: ``1.``.
    gamma : float, optional
        Weight for the combined loss. Default: ``1.``.
    weight_decay : float, optional
        Weight decay (L2 penalty). Default: ``0.``.
    contamination : float, optional
        Valid in (0., 0.5). The proportion of outliers in the data set.
        Used when fitting to define the threshold on the decision
        function. Default: ``0.1``.
    lr : float, optional
        Learning rate. Default: ``0.004``.
    epoch : int, optional
        Maximum number of training epoch. Default: ``5``.
    gpu : int
        GPU Index, -1 for using CPU. Default: ``-1``.
    verbose : int, optional
        Verbosity mode. Range in [0, 3]. Larger value for printing out
        more log information. Default: ``0``.
    """

    def __init__(self,
                 hid_a=36,
                 hid_s=36,
                 alpha=1.,
                 beta=1.,
                 gamma=1.,
                 weight_decay=0.,
                 contamination=0.1,
                 lr=0.004,
                 epoch=5,
                 gpu=-1,
                 verbose=0):
        super(ONE, self).__init__(contamination=contamination)

        self.hid_a = hid_a
        self.hid_s = hid_s
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        self.weight_decay = weight_decay
        self.lr = lr
        self.epoch = epoch
        self.device = validate_device(gpu)
        self.verbose = verbose

        self.attribute_score_ = None
        self.structural_score_ = None
        self.combined_score_ = None

        self.model = None

[docs]    def fit(self, data, label=None):

        self.process_graph(data)

        num_nodes, in_dim = data.x.shape
        x = data.x
        s = data.s

        w = torch.randn(self.hid_a, self.hid_s)

        u = torch.randn(num_nodes, self.hid_a)
        v = torch.randn(self.hid_a, in_dim)

        g = torch.randn(num_nodes, self.hid_s)
        h = torch.randn(self.hid_s, num_nodes)

        self.model = ONEBase(g, h, u, v, w, self.alpha, self.beta, self.gamma)
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=self.lr,
                                     weight_decay=self.weight_decay)

        for epoch in range(self.epoch):
            start_time = time.time()
            x_, s_, diff = self.model()
            loss, o1, o2, o3 = self.model.loss_func(x,
                                                    x_,
                                                    s,
                                                    s_,
                                                    diff)

            self.attribute_score_ = o1.detach().cpu()
            self.structural_score_ = o2.detach().cpu()
            self.combined_score_ = o3.detach().cpu()
            self.decision_score_ = ((o1 + o2 + o3) / 3).detach().cpu()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            logger(epoch=epoch,
                   loss=loss.item(),
                   score=self.decision_score_,
                   target=label,
                   time=time.time() - start_time,
                   verbose=self.verbose,
                   train=True)

        self._process_decision_score()
        return self

    def decision_function(self, data, label=None):
        if data is not None:
            warnings.warn("This detector is transductive only. "
                          "Training from scratch with the input data.")
            self.fit(data, label)
        return self.decision_score_

    def process_graph(self, data):

        data.s = to_dense_adj(data.edge_index)[0]


class ONEBase(torch.nn.Module):
    def __init__(self, g, h, u, v, w, alpha=1., beta=1., gamma=1.):

        super(ONEBase, self).__init__()

        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma

        self.g = torch.nn.Parameter(g)
        self.h = torch.nn.Parameter(h)
        self.u = torch.nn.Parameter(u)
        self.v = torch.nn.Parameter(v)
        self.w = torch.nn.Parameter(w)

    def forward(self):
        x_ = self.u @ self.v
        s_ = self.g @ self.h
        diff = self.g - self.u @ self.w
        return x_, s_, diff

    def loss_func(self, x, x_, s, s_, diff):
        dx = torch.sum(torch.pow(x - x_, 2), 1)
        o1 = dx / torch.sum(dx)
        loss_a = torch.mean(torch.log(torch.pow(o1, -1)) * dx)

        ds = torch.sum(torch.pow(s - s_, 2), 1)
        o2 = ds / torch.sum(ds)
        loss_s = torch.mean(torch.log(torch.pow(o2, -1)) * ds)

        dc = torch.sum(torch.pow(diff, 2), 1)
        o3 = dc / torch.sum(dc)
        loss_c = torch.mean(torch.log(torch.pow(o3, -1)) * dc)

        loss = self.alpha * loss_a + self.beta * loss_s + self.gamma * loss_c

        return loss, o1, o2, o3