TrafficWheel/model/TEDDCF/model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import pandas as pd
import sys
from model.TEDDCF.ISTF import SelfAttentionLayer


class GLU(nn.Module):
    def __init__(self, features, dropout=0.1):#PEMS08: 192
        super(GLU, self).__init__()
        self.conv1 = nn.Conv2d(features, features, (1, 1))
        self.conv2 = nn.Conv2d(features, features, (1, 1))
        self.conv3 = nn.Conv2d(features, features, (1, 1))
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):


        x1 = self.conv1(x)
        x2 = self.conv2(x)
        out = x1 * torch.sigmoid(x2)
        out = self.dropout(out)
        out = self.conv3(out)
        return out#[64,192,170,12]


class TemporalEmbedding(nn.Module):
    def __init__(self, time, features):
        super(TemporalEmbedding, self).__init__()
        #S08:time 288 features 96
        self.time = time
        self.time_day = nn.Parameter(torch.empty(time, features))#[288 96]
        nn.init.xavier_uniform_(self.time_day)

        self.time_week = nn.Parameter(torch.empty(7, features))#[7 96]
        nn.init.xavier_uniform_(self.time_week)

    def forward(self, x):
        #x #in:[64,12,170,3]
        day_emb = x[..., 1]

        time_day = self.time_day[(day_emb[:, :, :] * self.time).type(torch.LongTensor)]


        time_day = time_day.transpose(1, 2).contiguous()

        week_emb = x[..., 2]


        time_week = self.time_week[(week_emb[:, :, :]).type(torch.LongTensor)]#[64,12,170,96]
        time_week = time_week.transpose(1, 2).contiguous()#torch.Size([64, 170, 12, 96])


        tem_emb = time_day + time_week#[64,170,12,96]

        tem_emb = tem_emb.permute(0,3,1,2)#[64,96,170,12]

        return tem_emb


class Diffusion_GCN(nn.Module):
    def __init__(self, channels=128, diffusion_step=1, dropout=0.1):
        super().__init__()
        self.diffusion_step = diffusion_step#1
        self.conv = nn.Conv2d(diffusion_step * channels, channels, (1, 1))#[192,192,(1,1)]
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, adj):

        out = []
        for i in range(0, self.diffusion_step):#1
            if adj.dim() == 3:
                x = torch.einsum("bcnt,bnm->bcmt", x, adj).contiguous()
                out.append(x)
            elif adj.dim() == 2:
                x = torch.einsum("bcnt,nm->bcmt", x, adj).contiguous()
                out.append(x)
        x = torch.cat(out, dim=1)
        x = self.conv(x)
        output = self.dropout(x)
        return output


class EventGraph_Fusion(nn.Module):
    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1):
        super().__init__()
        self.memory = nn.Parameter(torch.randn(channels, num_nodes))
        nn.init.xavier_uniform_(self.memory)
        self.fc = nn.Linear(2,1)

    def forward(self, x):
        adj_dyn_1 = torch.softmax(
            F.relu(
                torch.einsum("bcnt, cm->bnm", x, self.memory).contiguous()
                / math.sqrt(x.shape[1])
            ),
            -1,
        )
        adj_dyn_2 = torch.softmax(
            F.relu(
                torch.einsum("bcn, bcm->bnm", x.sum(-1), x.sum(-1)).contiguous()
                / math.sqrt(x.shape[1])
            ),
            -1,
        )
        adj_f = torch.cat([(adj_dyn_1).unsqueeze(-1)] + [(adj_dyn_2).unsqueeze(-1)], dim=-1)

        adj_f = torch.softmax(self.fc(adj_f).squeeze(), -1)

        topk_values, topk_indices = torch.topk(adj_f, k=int(adj_f.shape[1]*0.8), dim=-1)

        mask = torch.zeros_like(adj_f)

        mask.scatter_(-1, topk_indices, 1)

        adj_f = adj_f * mask

        return adj_f


class EventGCN(nn.Module):
    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1, emb=None):
        super().__init__()

        self.conv = nn.Conv2d(channels,channels,(1,1))
        self.generator = EventGraph_Fusion(channels, num_nodes, diffusion_step, dropout)
        self.gcn = Diffusion_GCN(channels, diffusion_step, dropout)
        self.emb = emb

    def forward(self, x):

        skip = x
        x = self.conv(x)
        adj_dyn = self.generator(x)
        x = self.gcn(x, adj_dyn)
        x = x*self.emb + skip

        return x

class TrendGCN(nn.Module):
    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1, emb=None):
        super().__init__()

        self.conv = nn.Conv2d(channels,channels,(1,1))
        self.generator = TrendGraph_Fusion(channels, num_nodes, diffusion_step, dropout)
        self.gcn = Diffusion_GCN(channels, diffusion_step, dropout)
        self.emb = emb


    def forward(self, x):

        skip = x
        x = self.conv(x)
        adj_dyn = self.generator(x)
        x = self.gcn(x, adj_dyn)
        x = x*self.emb + skip

        return x


class TrendGraph_Fusion(nn.Module):
    def __init__(self, channels=128, num_nodes=170, diffusion_step=1, dropout=0.1):
        super().__init__()
        self.memory = nn.Parameter(
            torch.randn(channels, num_nodes))
        nn.init.xavier_uniform_(self.memory)
        self.fc = nn.Linear(2, 1)
        self.E_adaptive = nn.Parameter(torch.randn(num_nodes, 10))

    def forward(self, x):
        # adj_dyn_1 = torch.softmax(
        #     F.relu(
        #         torch.einsum("bcnt, cm->bnm", x, self.memory).contiguous()
        #         / math.sqrt(x.shape[1])
        #     ),
        #     -1,
        # )

        adj_dyn_2 = torch.softmax(
            F.relu(
                torch.einsum("bcn, bcm->bnm", x.sum(-1), x.sum(-1)).contiguous()
                / math.sqrt(x.shape[1])
            ),
            -1,
        )
        adj_adp = F.softmax(F.relu(torch.mm(self.E_adaptive, self.E_adaptive.transpose(0, 1))), dim=1)

        adj_adp_expanded = adj_adp.unsqueeze(0)

        adj_adp = adj_adp_expanded.repeat(x.shape[0], 1, 1)

        adj_f = torch.cat([(adj_dyn_2).unsqueeze(-1)] + [(adj_adp).unsqueeze(-1)], dim=-1)

        adj_f = torch.softmax(self.fc(adj_f).squeeze(), -1)

        topk_values, topk_indices = torch.topk(adj_f, k=int(adj_f.shape[1] * 0.8), dim=-1)

        mask = torch.zeros_like(adj_f)

        mask.scatter_(-1, topk_indices, 1)

        adj_f = adj_f * mask

        return adj_f

class Chomp1d(nn.Module):
    """
    extra dimension will be added by padding, remove it
    """
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :, :-self.chomp_size].contiguous()

class TemporalConvNet(nn.Module):
    def __init__(self, features, kernel_size=2, dropout=0.2, levels=1):
        super(TemporalConvNet, self).__init__()

        layers = []
        for i in range(levels):
            dilation_size = 2 ** i
            padding = (kernel_size - 1) * dilation_size
            self.conv = nn.Conv2d(features, features, (1, kernel_size), dilation=(1, dilation_size),
                                  padding=(0, padding))
            self.chomp = Chomp1d(padding)
            self.relu = nn.ReLU()
            self.dropout = nn.Dropout(dropout)

            layers += [nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)]
        self.tcn = nn.Sequential(*layers)

    def forward(self, xh):
        xh = self.tcn(xh)
        return xh
    pass

class FeedForward(nn.Module):
    def __init__(self, fea, res_ln=False):
        super(FeedForward, self).__init__()


        self.res_ln = res_ln
        self.L = len(fea) - 1#2
        self.linear = nn.ModuleList([nn.Linear(fea[i], fea[i+1]) for i in range(self.L)])
        self.ln = nn.LayerNorm(fea[self.L], elementwise_affine=False)

    def forward(self, inputs):

        x = inputs
        for i in range(self.L):
            x = self.linear[i](x)
            if i != self.L-1:
                x = F.relu(x)


        if self.res_ln:
            x += inputs
            x = self.ln(x)
        return x

class Adaptive_Fusion(nn.Module):
    def __init__(self, heads, dims):
        super(Adaptive_Fusion, self).__init__()
        features = dims  # 192
        self.h = heads  # 8
        self.d = int(dims / heads) # 16

        self.qlfc = FeedForward([features, features])
        self.khfc = FeedForward([features, features])
        self.vhfc = FeedForward([features, features])
        self.ofc = FeedForward([features, features])

        self.ln = nn.LayerNorm(features, elementwise_affine=False)
        self.ff = FeedForward([features, features, features], True)

    def forward(self, xl, xh, Mask=True):
        '''
        xl: [B,T,N,F]
        xh: [B,T,N,F]
        te: [B,T,N,F]
        return: [B,T,N,F]
        '''
        # xl += te
        # xh += te

        query = self.qlfc(xl)  # [B,T,N,F]
        keyh = torch.relu(self.khfc(xh))  # [B,T,N,F]
        valueh = torch.relu(self.vhfc(xh))  # [B,T,N,F]

        query = torch.cat(torch.split(query, self.d, -1), 0).permute(0, 2, 1, 3)  # [k*B,N,T,d]
        keyh = torch.cat(torch.split(keyh, self.d, -1), 0).permute(0, 2, 3, 1)  # [k*B,N,d,T]
        valueh = torch.cat(torch.split(valueh, self.d, -1), 0).permute(0, 2, 1, 3)  # [k*B,N,T,d]

        attentionh = torch.matmul(query, keyh)  # [k*B,N,T,T]

        if Mask:
            batch_size = xl.shape[0]
            num_steps = xl.shape[1]
            num_vertexs = xl.shape[2]
            mask = torch.ones(num_steps, num_steps).to(xl.device)  # [T,T]
            mask = torch.tril(mask)  # [T,T]
            mask = torch.unsqueeze(torch.unsqueeze(mask, dim=0), dim=0)  # [1,1,T,T]
            mask = mask.repeat(self.h * batch_size, num_vertexs, 1, 1)  # [k*B,N,T,T]
            mask = mask.to(torch.bool)
            zero_vec = (-2 ** 15 + 1) * torch.ones_like(attentionh).to(xl.device)  # [k*B,N,T,T]
            attentionh = torch.where(mask, attentionh, zero_vec)

        attentionh /= (self.d ** 0.5)  # scaled
        attentionh = F.softmax(attentionh, -1)  # [k*B,N,T,T]

        value = torch.matmul(attentionh, valueh)  # [k*B,N,T,d]

        value = torch.cat(torch.split(value, value.shape[0] // self.h, 0), -1).permute(0, 2, 1, 3)  # [B,T,N,F]
        value = self.ofc(value)
        value = value + xl

        value = self.ln(value)

        return self.ff(value)  # [64,12,170,128]

class TEDDCF(nn.Module):
    def __init__(
        self, device, input_dim, num_nodes, channels, granularity, dropout=0.1
    ):
        super().__init__()

        self.device = device
        self.num_nodes = num_nodes
        self.output_len = 12
        self.input_len = 12
        self.heads = 8
        diffusion_step = 1

        self.Temb = TemporalEmbedding(granularity, channels)

        self.start_conv = nn.Conv2d(
            in_channels=input_dim, out_channels=channels, kernel_size=(1, 1)
        )


        self.glu = GLU(channels*2, dropout)

        self.regression_layer = nn.Conv2d(
            channels*2, self.output_len, kernel_size=(1, self.output_len)
        )

        self.temporal_conv = TemporalConvNet(channels*2)
        self.pre_h = nn.Conv2d(in_channels=self.input_len, out_channels=self.output_len, kernel_size=(1,1))
        self.adp_f = Adaptive_Fusion(self.heads, channels*2)

        num_layers = 3
        self.attn_layers_t = nn.ModuleList(
            [
                SelfAttentionLayer(channels*2, feed_forward_dim=256, num_heads=4, dropout=0.1)
                for _ in range(num_layers)  # 3
            ]
        )
        self.xh_emb = nn.Parameter(torch.randn(channels*2, num_nodes, 12))
        self.xh_dgcn = EventGCN(channels*2, num_nodes, diffusion_step=1, dropout=0.1,emb=self.xh_emb)

        self.xl_emb = nn.Parameter(torch.randn(channels*2, num_nodes, 12))
        self.xl_dgcn = TrendGCN(channels*2, num_nodes, diffusion_step=1, dropout=0.1, emb=self.xl_emb)


    def param_num(self):
        return sum([param.nelement() for param in self.parameters()])

    def forward(self, inputxl, inputxh):

        xl = inputxl
        xh = inputxh

        # Encoder
        # Data Embedding
        time_embl = self.Temb(inputxl.permute(0, 3, 2, 1))
        time_embh = self.Temb(inputxh.permute(0, 3, 2, 1))
        #t = self.start_conv(x)#[64,96,170,12]
        xl = torch.cat([self.start_conv(xl)] + [time_embl], dim=1)
        xh = torch.cat([self.start_conv(xh)] + [time_embh], dim=1)


        xl = xl.permute(0, 3, 2, 1)
        for attn in self.attn_layers_t:
            xl = attn(xl, dim=1)
        xl = xl.permute(0, 3, 2, 1)

        xl = self.xl_dgcn(xl)
        xl = self.glu(xl) + xl


        xh = self.temporal_conv(xh)


        xh = self.xh_dgcn(xh)

        #simple plus
        x_all = xh + xl
        #STwave_fusion
        # xl = xl.transpose(1, 3)
        # xh = self.pre_h(xh.transpose(1,3))#[64,12,170,192]
        # x_all = self.adp_f(xl, xh)
        # x_all = x_all.transpose(1, 3)

        prediction = self.regression_layer(F.relu(x_all))


        return prediction