.= 'tag.htm'; break; case 'flag': $pre .= $default_pre .= 'flag.htm'; break; case 'my': $pre .= $default_pre .= 'my.htm'; break; case 'my_password': $pre .= $default_pre .= 'my_password.htm'; break; case 'my_bind': $pre .= $default_pre .= 'my_bind.htm'; break; case 'my_avatar': $pre .= $default_pre .= 'my_avatar.htm'; break; case 'home_article': $pre .= $default_pre .= 'home_article.htm'; break; case 'home_comment': $pre .= $default_pre .= 'home_comment.htm'; break; case 'user': $pre .= $default_pre .= 'user.htm'; break; case 'user_login': $pre .= $default_pre .= 'user_login.htm'; break; case 'user_create': $pre .= $default_pre .= 'user_create.htm'; break; case 'user_resetpw': $pre .= $default_pre .= 'user_resetpw.htm'; break; case 'user_resetpw_complete': $pre .= $default_pre .= 'user_resetpw_complete.htm'; break; case 'user_comment': $pre .= $default_pre .= 'user_comment.htm'; break; case 'single_page': $pre .= $default_pre .= 'single_page.htm'; break; case 'search': $pre .= $default_pre .= 'search.htm'; break; case 'operate_sticky': $pre .= $default_pre .= 'operate_sticky.htm'; break; case 'operate_close': $pre .= $default_pre .= 'operate_close.htm'; break; case 'operate_delete': $pre .= $default_pre .= 'operate_delete.htm'; break; case 'operate_move': $pre .= $default_pre .= 'operate_move.htm'; break; case '404': $pre .= $default_pre .= '404.htm'; break; case 'read_404': $pre .= $default_pre .= 'read_404.htm'; break; case 'list_404': $pre .= $default_pre .= 'list_404.htm'; break; default: $pre .= $default_pre .= theme_mode_pre(); break; } if ($config['theme']) { $conffile = APP_PATH . 'view/template/' . $config['theme'] . '/conf.json'; $json = is_file($conffile) ? xn_json_decode(file_get_contents($conffile)) : array(); } !empty($json['installed']) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . ($id ? $id . '_' : '') . $pre; (empty($path_file) || !is_file($path_file)) and $path_file = APP_PATH . 'view/template/' . $config['theme'] . '/htm/' . $pre; if (!empty($config['theme_child']) && is_array($config['theme_child'])) { foreach ($config['theme_child'] as $theme) { if (empty($theme) || is_array($theme)) continue; $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . ($id ? $id . '_' : '') . $pre; !is_file($path_file) and $path_file = APP_PATH . 'view/template/' . $theme . '/htm/' . $pre; } } !is_file($path_file) and $path_file = APP_PATH . ($dir ? 'plugin/' . $dir . '/view/htm/' : 'view/htm/') . $default_pre; return $path_file; } function theme_mode_pre($type = 0) { global $config; $mode = $config['setting']['website_mode']; $pre = ''; if (1 == $mode) { $pre .= 2 == $type ? 'portal_category.htm' : 'portal.htm'; } elseif (2 == $mode) { $pre .= 2 == $type ? 'flat_category.htm' : 'flat.htm'; } else { $pre .= 2 == $type ? 'index_category.htm' : 'index.htm'; } return $pre; } ?>Converting csv (";" based) string value of a field to new key:value items in same document in Mongodb aggregat
最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

Converting csv (";" based) string value of a field to new key:value items in same document in Mongodb aggregat

programmeradmin1浏览0评论

Sample documents of a collection

DB: MYDB
Collection: MYCOLL
{
    "_id": {
      "$oid": "678a78375cb6955814197272"
    },
    "ID": 1019397,
    "INFO": "A=5242;AF=987;C=82622"
}
{
    "_id": {
      "$oid": "678a78375cb6955814197272"
    },
    "ID": 1019397,
    "INFO": "A=h242;AF=9y87;C=8w622"
}

I would like to have expected output as following:

{
    "_id": {
        "$oid": "678a78375cb6955814197272"
      },
      "ID": 1019397,
      "INFO": "A=5242;AF=987;C=82622",
      "A":"5242",
      "AF":"987",
      "C":"82622"

}

I have tried to do it myself, but for now I created the following aggregate code for a specific value in CSV. which is not what I want to do. Can you please tell me how can I make it better in performance and create (updating the current collection document) in the form as I showed above?

db["MYCOLL"].aggregate([
    
    {
        $project: {
            INFO_ARR: {
                $split:["$INFO",";"]
            }
        }
    },
    { 
        $project: { 
            AF_ARR: { 
                $arrayElemAt: [ "$INFO_ARR" ,  1 ] }
            }
    },
    { 
        $project: { 
            AF_FREQ_ARR: { 
                $split: [ "$AF_ARR" ,  "=" ]}
            }
            
    },
    {
        $project: { 
            AF: { 
                $arrayElemAt: [ "$AF_FREQ_ARR" ,  1 ] }
            }
    },
    { $merge : { into: { db: "MYDB", coll: "MYCOLL" }, on: "_id",  whenMatched: "merge", whenNotMatched: "insert" } }
          
])

Note: I can not use javascript as the collection is of size 400GB and is having millions of records. Hence, Javascript may be too slow to update all of the records. Am I wrong about this ?

Sample documents of a collection

DB: MYDB
Collection: MYCOLL
{
    "_id": {
      "$oid": "678a78375cb6955814197272"
    },
    "ID": 1019397,
    "INFO": "A=5242;AF=987;C=82622"
}
{
    "_id": {
      "$oid": "678a78375cb6955814197272"
    },
    "ID": 1019397,
    "INFO": "A=h242;AF=9y87;C=8w622"
}

I would like to have expected output as following:

{
    "_id": {
        "$oid": "678a78375cb6955814197272"
      },
      "ID": 1019397,
      "INFO": "A=5242;AF=987;C=82622",
      "A":"5242",
      "AF":"987",
      "C":"82622"

}

I have tried to do it myself, but for now I created the following aggregate code for a specific value in CSV. which is not what I want to do. Can you please tell me how can I make it better in performance and create (updating the current collection document) in the form as I showed above?

db["MYCOLL"].aggregate([
    
    {
        $project: {
            INFO_ARR: {
                $split:["$INFO",";"]
            }
        }
    },
    { 
        $project: { 
            AF_ARR: { 
                $arrayElemAt: [ "$INFO_ARR" ,  1 ] }
            }
    },
    { 
        $project: { 
            AF_FREQ_ARR: { 
                $split: [ "$AF_ARR" ,  "=" ]}
            }
            
    },
    {
        $project: { 
            AF: { 
                $arrayElemAt: [ "$AF_FREQ_ARR" ,  1 ] }
            }
    },
    { $merge : { into: { db: "MYDB", coll: "MYCOLL" }, on: "_id",  whenMatched: "merge", whenNotMatched: "insert" } }
          
])

Note: I can not use javascript as the collection is of size 400GB and is having millions of records. Hence, Javascript may be too slow to update all of the records. Am I wrong about this ?

Share Improve this question asked Jan 30 at 12:57 ShahanShahan 491 silver badge7 bronze badges 0
Add a comment  | 

2 Answers 2

Reset to default 3

After splitting the array on semi-colons ;, split each item of that result on =. That gives an array of pairs like [ ["A", "5242"], ["AF", "987"], ["C", "82622"].

Then use arrayToObject which will treat the 1st item as the key and the 2nd item as the value. After that, merge with Root and remove the temp array.

I've done the transformations as separate steps but you can combine them into one:

db.mycoll.aggregate([
  {
    $set: {
      INFO_ARR: { $split: ["$INFO", ";"] }
    }
  },
  {
    $set: {
      INFO_ARR: {
        $map: {
          input: "$INFO_ARR",
          in: { $split: ["$$this", "="] }
        }
      }
    }
  },
  {
    $set: {
      INFO_ARR: { $arrayToObject: "$INFO_ARR" }
    }
  },
  {
    $replaceWith: {
      $mergeObjects: ["$$ROOT", "$INFO_ARR"]
    }
  },
  {
    $unset: "INFO_ARR"
  },
  {
    $merge: {
      into: {
        db: "MYDB",
        coll: "MYCOLL"
      },
      on: "_id",
      whenMatched: "merge",
      whenNotMatched: "insert"
    }
  }
])

Mongo Playground

Also, this can be done as an updateMany query since every doc needs to be updated anyway: Mongo Playground with update & aggregation syntax

You can use the following aggregation pipeline to split the contents of the INFO property and add the values as properties:

[
  {
    $project: {
      INFO: 1
    }
  },
  {
    $set: {
      info_arr: {
        "$split": [
          "$INFO",
          ";"
        ]
      }
    }
  },
  {
    $set: {
      info_arr2: {
        "$map": {
          "input": "$info_arr",
          "in": {
            k: {
              $first: {
                $split: [
                  "$$this",
                  "="
                ]
              }
            },
            v: {
              $last: {
                $split: [
                  "$$this",
                  "="
                ]
              }
            }
          }
        }
      }
    }
  },
  {
    $set: {
      info_obj: {
        "$arrayToObject": "$info_arr2"
      }
    }
  },
  {
    "$replaceRoot": {
      "newRoot": {
        "$mergeObjects": [
          "$$ROOT",
          "$info_obj"
        ]
      }
    }
  },
  {
    $unset: [
      "info_arr",
      "info_arr2",
      "info_obj"
    ]
  }
]
  • The pipeline first splits the field content at the semicolons, then splits the individual lines at the equality sign and assigns the key to a k field, the value to a v field.
  • At this point, the $arrayToObject operator is used to create an object from the array contents.
  • Afterwards, the properties of the newly created subdocument are merged with the $$ROOT document.
  • At the end, the temporary properties are removed using an $unset stage.

At the end, the documents look like this:

{
    "A": "5242",
    "AF": "987",
    "C": "82622",
    "INFO": "A=5242;AF=987;C=82622",
    "_id": ObjectId("5a934e000102030405000001")
 }

If you add a $merge stage, you can add the new properties to existing documents in the collection.

See this playground to test.

与本文相关的文章

发布评论

评论列表(0)

  1. 暂无评论