Source code for pygod.detector.adone

# -*- coding: utf-8 -*-
"""Adversarial Outlier Aware Attributed Network Embedding (AdONE)"""
# Author: Kay Liu <zliu234@uic.edu>
# License: BSD 2 clause

import torch
import warnings

from . import DeepDetector
from ..nn import AdONEBase


[docs] class AdONE(DeepDetector): """ Adversarial Outlier Aware Attributed Network Embedding AdONE consists of an attribute autoencoder and a structure autoencoder. It estimates five loss to optimize the model, including an attribute proximity loss, an attribute homophily loss, a structure proximity loss, a structure homophily loss, and an alignment loss. It calculates three outlier scores, and averages them as an overall score. .. note:: This detector is transductive only. Using ``predict`` with unseen data will train the detector from scratch. See :cite:`bandyopadhyay2020outlier` for details. Parameters ---------- hid_dim : int, optional Hidden dimension of model. Default: ``64``. num_layers : int, optional Total number of layers in model. A half (floor) of the layers are for the encoder, the other half (ceil) of the layers are for decoders. Default: ``4``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. backbone : torch.nn.Module The backbone of AdONE is fixed to be MLP. Changing of this parameter will not affect the model. Default: ``None``. w1 : float, optional Weight of structure proximity loss. Default: ``0.2``. w2 : float, optional Weight of structure homophily loss. Default: ``0.2``. w3 : float, optional Weight of attribute proximity loss. Default: ``0.2``. w4 : float, optional Weight of attribute homophily loss. Default: ``0.2``. w5 : float, optional Weight of alignment loss. Default: ``0.2``. contamination : float, optional The amount of contamination of the dataset in (0., 0.5], i.e., the proportion of outliers in the dataset. Used when fitting to define the threshold on the decision function. Default: ``0.1``. lr : float, optional Learning rate. Default: ``0.004``. epoch : int, optional Maximum number of training epoch. Default: ``100``. gpu : int GPU Index, -1 for using CPU. Default: ``-1``. batch_size : int, optional Minibatch size, 0 for full batch training. Default: ``0``. num_neigh : int, optional Number of neighbors in sampling, -1 for all neighbors. Default: ``-1``. verbose : int, optional Verbosity mode. Range in [0, 3]. Larger value for printing out more log information. Default: ``0``. save_emb : bool, optional Whether to save the embedding. Default: ``False``. compile_model : bool, optional Whether to compile the model with ``torch_geometric.compile``. Default: ``False``. **kwargs Other parameters for the backbone model. Attributes ---------- decision_score_ : torch.Tensor The outlier scores of the training data. Outliers tend to have higher scores. This value is available once the detector is fitted. threshold_ : float The threshold is based on ``contamination``. It is the :math:`N \\times` ``contamination`` most abnormal samples in ``decision_score_``. The threshold is calculated for generating binary outlier labels. label_ : torch.Tensor The binary labels of the training data. 0 stands for inliers and 1 for outliers. It is generated by applying ``threshold_`` on ``decision_score_``. emb : torch.Tensor or tuple of torch.Tensor or None The learned node hidden embeddings of shape :math:`N \\times` ``hid_dim``. Only available when ``save_emb`` is ``True``. When the detector has not been fitted, ``emb`` is ``None``. When the detector has multiple embeddings, ``emb`` is a tuple of torch.Tensor. attribute_score_ : torch.Tensor Attribute outlier score. structural_score_ : torch.Tensor Structural outlier score. combined_score_ : torch.Tensor Combined outlier score. """ def __init__(self, hid_dim=64, num_layers=4, dropout=0., weight_decay=0., act=torch.nn.functional.relu, backbone=None, w1=0.2, w2=0.2, w3=0.2, w4=0.2, w5=0.2, contamination=0.1, lr=4e-3, epoch=100, gpu=-1, batch_size=0, num_neigh=-1, save_emb=False, compile_model=False, verbose=0, **kwargs): if backbone is not None: warnings.warn("Backbone is not used in AdONE.") super(AdONE, self).__init__(hid_dim=hid_dim, num_layers=1, dropout=dropout, weight_decay=weight_decay, act=act, contamination=contamination, lr=lr, epoch=epoch, gpu=gpu, batch_size=batch_size, num_neigh=num_neigh, verbose=verbose, gan=True, save_emb=save_emb, compile_model=compile_model, **kwargs) self.w1 = w1 self.w2 = w2 self.w3 = w3 self.w4 = w4 self.w5 = w5 self.num_layers = num_layers self.attribute_score_ = None self.structural_score_ = None self.combined_score_ = None def process_graph(self, data): AdONEBase.process_graph(data) def init_model(self, **kwargs): self.attribute_score_ = torch.zeros(self.num_nodes) self.structural_score_ = torch.zeros(self.num_nodes) self.combined_score_ = torch.zeros(self.num_nodes) if self.save_emb: self.emb = (torch.zeros(self.num_nodes, self.hid_dim), torch.zeros(self.num_nodes, self.hid_dim)) return AdONEBase(x_dim=self.in_dim, s_dim=self.num_nodes, hid_dim=self.hid_dim, num_layers=self.num_layers, dropout=self.dropout, act=self.act, w1=self.w1, w2=self.w2, w3=self.w3, w4=self.w4, w5=self.w5, **kwargs).to(self.device) def forward_model(self, data): batch_size = data.batch_size node_idx = data.n_id x = data.x.to(self.device) s = data.s.to(self.device) edge_index = data.edge_index.to(self.device) x_, s_, h_a, h_s, dna, dns = self.model(x, s, edge_index) loss_d = self.model.loss_func_d(h_a[:batch_size].detach(), h_s[:batch_size].detach()) self.opt_in.zero_grad() loss_d.backward() self.opt_in.step() self.epoch_loss_in += loss_d.item() * batch_size loss_g, oa, os, oc = self.model.loss_func_g(x[:batch_size], x_[:batch_size], s[:batch_size], s_[:batch_size], h_a[:batch_size], h_s[:batch_size], dna[:batch_size], dns[:batch_size]) self.attribute_score_[node_idx[:batch_size]] = oa.detach().cpu() self.structural_score_[node_idx[:batch_size]] = os.detach().cpu() self.combined_score_[node_idx[:batch_size]] = oc.detach().cpu() return loss_g, ((oa + os + oc) / 3).detach().cpu() def decision_function(self, data, label=None): if data is not None: warnings.warn("This detector is transductive only. " "Training from scratch with the input data.") self.fit(data, label) return self.decision_score_