Source code for pygod.nn.dmgd

import math
import warnings

import torch
from torch import nn
import torch.nn.functional as F
from torch_geometric.nn import MLP
from torch_geometric.utils import to_dense_adj
from sklearn.cluster import KMeans

from .conv import NeighDiff


[docs] class DMGDBase(nn.Module): """ Deep Multiclass Graph Description DMGD is a support vector based multiclass outlier detector. Its backbone is an autoencoder that reconstructs the adjacency matrix of the graph with MSE loss and homophily loss. It applies k-means to cluster the nodes embedding and then uses support vector to detect outliers. See :cite:`bandyopadhyay2020integrating` for details. Parameters ---------- in_dim : int Input dimension. hid_dim : int, optional Hidden dimension of model. Default: ``64``. num_layers : int, optional Total number of layers in model. A half (floor) of the layers are for the encoder, the other half (ceil) of the layers are for decoders. Default: ``4``. dropout : float, optional Dropout rate. Default: ``0.``. weight_decay : float, optional Weight decay (L2 penalty). Default: ``0.``. act : callable activation function or None, optional Activation function if not None. Default: ``torch.nn.functional.relu``. backbone : torch.nn.Module, optional The backbone of the deep detector implemented in PyG. Default: ``torch_geometric.nn.MLP``. alpha : float, optional Weight of the radius loss. Default: ``1``. beta : float, optional Weight of the reconstruction loss. Default: ``1``. gamma : float, optional Weight of the homophily loss. Default: ``1``. k : int, optional The number of clusters. Default: ``2``. **kwargs Other parameters for the backbone. """ def __init__(self, in_dim, hid_dim=64, num_layers=2, dropout=0., act=torch.nn.functional.relu, backbone=MLP, alpha=1, beta=1, gamma=1, warmup=2, k=2, **kwargs): super(DMGDBase, self).__init__() self.alpha = alpha self.beta = beta self.gamma = gamma self.backbone = backbone self.warmup = warmup self.k = k # split the number of layers for the encoder and decoders assert num_layers >= 2, \ "Number of layers must be greater than or equal to 2." encoder_layers = math.floor(num_layers / 2) decoder_layers = math.ceil(num_layers / 2) self.encoder = self.backbone(in_channels=in_dim, hidden_channels=hid_dim, out_channels=hid_dim, num_layers=encoder_layers, dropout=dropout, act=act, **kwargs) self.decoder = self.backbone(in_channels=hid_dim, hidden_channels=hid_dim, out_channels=in_dim, num_layers=decoder_layers, dropout=dropout, act=act, **kwargs) self.neigh_diff = NeighDiff() self.emb = None self.clustered = False self.cluster = None self.centers = None self.r = torch.nn.Parameter(torch.zeros(self.k))
[docs] def forward(self, x, edge_index): """ Forward computation. Parameters ---------- x : torch.Tensor Input attribute embeddings. edge_index : torch.Tensor Edge index. Returns ------- x_ : torch.Tensor Reconstructed attribute embeddings. nd : torch.Tensor Neighbor distance. """ if self.backbone == MLP: self.emb = self.encoder(x, None) x_ = self.decoder(self.emb, None) else: self.emb = self.encoder(x, edge_index) x_ = self.decoder(self.emb, edge_index) nd = self.neigh_diff(self.emb, edge_index).squeeze() return x_, nd, self.emb
[docs] def loss_func(self, x, x_, nd, emb): """ Loss function for DMGD. Parameters ---------- x : torch.Tensor Input attribute embeddings. x_ : torch.Tensor Reconstructed attribute embeddings. nd : torch.Tensor Neighbor distance. emb : torch.Tensor Embeddings. Returns ------- loss : torch.Tensor Loss value. """ dx = torch.sum(torch.pow(x - x_, 2)) loss = self.beta * dx + self.gamma * nd.sum() if self.warmup > 0: self.warmup -= 1 score = torch.zeros(x.shape[0]) else: if not self.clustered: self.clustered = True kmeans = KMeans(n_clusters=self.k, n_init='auto').fit(emb.detach()) self.cluster = torch.tensor(kmeans.labels_).long() self.centers = torch.Tensor(kmeans.cluster_centers_) else: distances = torch.cdist(emb, self.centers, p=2) self.cluster = torch.argmin(distances, dim=1) one_hot = F.one_hot(self.cluster, num_classes=self.k).float() sums = torch.matmul(one_hot.T, emb) counts = one_hot.sum(dim=0).view(self.k, 1) counts = counts + (counts == 0).type(torch.float32) self.centers = sums / counts loss += torch.pow(torch.relu(self.r), 2).sum() score = torch.relu(torch.sum(torch.pow(emb - self.centers[self.cluster], 2), 1) - torch.pow(self.r[self.cluster], 2)) loss += self.alpha * torch.sum(score) return loss, score
[docs] @staticmethod def process_graph(data): """ Obtain the dense adjacency matrix of the graph. Parameters ---------- data : torch_geometric.data.Data Input graph. """ if data.x is not None: warnings.warn('DMGD overwrites x with adjacency matrx.') data.x = to_dense_adj(data.edge_index)[0]