te')); return $arr; } /* 遍历用户所有主题 * @param $uid 用户ID * @param int $page 页数 * @param int $pagesize 每页记录条数 * @param bool $desc 排序方式 TRUE降序 FALSE升序 * @param string $key 返回的数组用那一列的值作为 key * @param array $col 查询哪些列 */ function thread_tid_find_by_uid($uid, $page = 1, $pagesize = 1000, $desc = TRUE, $key = 'tid', $col = array()) { if (empty($uid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('uid' => $uid), array('tid' => $orderby), $page, $pagesize, $key, $col); return $arr; } // 遍历栏目下tid 支持数组 $fid = array(1,2,3) function thread_tid_find_by_fid($fid, $page = 1, $pagesize = 1000, $desc = TRUE) { if (empty($fid)) return array(); $orderby = TRUE == $desc ? -1 : 1; $arr = thread_tid__find($cond = array('fid' => $fid), array('tid' => $orderby), $page, $pagesize, 'tid', array('tid', 'verify_date')); return $arr; } function thread_tid_delete($tid) { if (empty($tid)) return FALSE; $r = thread_tid__delete(array('tid' => $tid)); return $r; } function thread_tid_count() { $n = thread_tid__count(); return $n; } // 统计用户主题数 大数量下严谨使用非主键统计 function thread_uid_count($uid) { $n = thread_tid__count(array('uid' => $uid)); return $n; } // 统计栏目主题数 大数量下严谨使用非主键统计 function thread_fid_count($fid) { $n = thread_tid__count(array('fid' => $fid)); return $n; } ?>python - Attention masks on time series data with keras funktional API - Stack Overflow
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - Attention masks on time series data with keras funktional API - Stack Overflow

programmeradmin5浏览0评论

Im currently trying to study the effects of masking attention on a transformer model trained to classify time series data. My model works so far and give me okish performance, but when i try to mask the attention of all MultiHeadAttention Layers in my model, the performance stays the same, which is quite not what i am expecting.

My Model (based of a keras tutorial):

def build(params: dict, input_shape:tuple) -> keras.Model:
    
    #input_dim = 1
    sequence_size = params["sequence_size"]
    n_classes = params["n_classes"]
    encoder_blocks = params["encoder_blocks"]
    n_heads = params["encoder_heads"]
    encolder_mlp = params["mlp_dim"]
    conv_filters = params["conv_filters"]
    encoder_dropout = params["encoder_dropout"]
    mlp_dropout = params["mlp_dropout"]
    learning_rate = params["learning_rate"]
    
    inputs = keras.Input(shape=input_shape, name="sequence_input")
    mask = keras.Input(shape=(sequence_size, sequence_size), name="mask_input")
    
    x = inputs + SinePositionEncoding()(inputs)

    for _ in range(encoder_blocks):
        x = transformer_encoder(x, head_size=sequence_size, num_heads=n_heads, con_filters=conv_filters, attention_mask=mask, dropout=encoder_dropout, seed=SEED)
        #x, _ = EncoderLayer(d_model=n_heads*5, num_heads= n_heads, dff=conv_filters, rate=encoder_dropout)(x, mask=mask)
    
    x = layers.GlobalAveragePooling1D(data_format="channels_last")(x)
    x = layers.Dense(encolder_mlp, activation="relu")(x)
    x = layers.Dropout(mlp_dropout, seed=SEED)(x)

    outputs = layers.Dense(n_classes, activation="softmax")(x)

    model =  keras.Model(inputs=[inputs, mask], outputs=outputs)

    modelpile(
        loss="categorical_crossentropy",
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        metrics=["categorical_accuracy", "f1_score"],
        run_eagerly=False
    )
    
    return model

with my transformer_encoder

def transformer_encoder(inputs:np.ndarray, head_size:int, num_heads:int, con_filters:int, attention_mask, dropout=0, seed=42):

    x, att = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout, seed=seed)(inputs, inputs, attention_mask=tf.ones((sequence_size, sequence_size), dtype=bool), return_attention_scores=True ,training=True)
    tf.print(att)
    #print(f"output: {x}")
    #x, _ = MultiHeadAttention(d_model=num_heads*5, num_heads=num_heads)(inputs, inputs, inputs, attention_mask)
    print(x)
    x = layers.Dropout(dropout, seed=seed)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    res = x + inputs
    
    x = layers.Conv1D(filters=con_filters, kernel_size=1, activation="relu")(res)
    x = layers.Dropout(dropout, seed=seed)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
        
    return x + res

so far i have tried to pass a Mask with every Input and masking all of the attention with tf.zeros((sequence_size, sequence_size), dytpe=bool. I also tried to change the shape of the Masks, but no luck either.

Anybody who knows an answer?

发布评论

评论列表(0)

  1. 暂无评论