最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

scala - Adding new columns to existing bigquery table via API - Stack Overflow

programmeradmin6浏览0评论

I have a list of columns which needs to be added to the existing bigquery table. All the new columns which needs to be added should have float datatype.

 private def updateTableSchema(newColumnsList:Seq[String],finalSourceTableId:TableId)
  : Unit = {
    logInfo(s"adding new column to the existing pm_ingest bigquery table")
    import com.google.cloud.bigquery.LegacySQLTypeName

    lazy val bigQuery: BigQuery = Util.initializeBigQueryService()
    val table = bigQuery.getTable(finalSourceTableId)
    val tableDefinition = table.getDefinition[StandardTableDefinition]
    import scala.collection.JavaConverters._
    var oldSchema = tableDefinition.getSchema

    val newSchema = for (col <- newColumnsList) yield  {
      Schema.of(
        Field.newBuilder(col, LegacySQLTypeName.STRING)
          .setMode(Field.Mode.NULLABLE)
          .build())
    }
//    val newSchema =
//      Schema.of(
//        Field.newBuilder("TEST_COL_1", LegacySQLTypeName.STRING)
//          .setMode(Field.Mode.NULLABLE)
//          .build(),
//        // Adding below additional column during the load job
//        Field.newBuilder("TEST_COL_2", LegacySQLTypeName.STRING)
//          .setMode(Field.Mode.NULLABLE)
//          .build());

}

below is the print statements

newColumnsList: List(TEST_COL_1, TEST_COL_2)

I can hardcode the elements of newColumnsList and create a new schema but I was looking something more dynamic. As this list may contains one or more elements.

When am using for loop to create a schema its returning me a list whereas I just need schema.

val newSchema: Seq[Schema]

newSchema:List(Schema{fields=[Field{name=TEST_COL_1, type=STRING, mode=NULLABLE, description=null}]}, Schema{fields=[Field{name=TEST_COL_2, type=STRING, mode=NULLABLE, description=null}]})


oldSchema:Schema{fields=[Field{name=COLLECTTIME, type=TIMESTAMP, mode=null, description=null}, Field{name=GNBCUCPFUNCTION, type=STRING, mode=null, description=null}, Field{name=NRCELLCU, type=STRING, mode=null, description=null}, Field{name=CELLID, type=STRING, mode=null, description=null}, Field{name=SITE, type=STRING, mode=null, description=null}, Field{name=MANAGEDELEMENT, type=STRING, mode=null, description=null}, Field{name=SOURCEIDENTITY, type=STRING, mode=null, description=null}, Field{name=NODE, type=STRING, mode=null, description=null}, Field{name=SITEID, type=STRING, mode=null, description=null}, Field{name=NODE_temp, type=STRING, mode=null, description=null}, Field{name=NETWORK, type=STRING, mode=null, description=null}, Field{name=SITETYPE, type=STRING, mode=null, description=null}, Field{name=STATEORPROVINCE, type=STRING, mode=null, description=null}, Field{name=DISTRICT, type=STRING, mode=null, description=null}, Field{name=CLUSTER, type=STRING, mode=null, description=null}, Field{name=REGION, type=STRING, mode=null, description=null}, Field{name=PERIOD, type=INTEGER, mode=null, description=null}, Field{name=DATE, type=DATE, mode=null, description=null}, Field{name=TIME, type=STRING, mode=null, description=null}, Field{name=HOUR, type=STRING, mode=null, description=null}, Field{name=DAY, type=STRING, mode=null, description=null}, Field{name=WEEK, type=STRING, mode=null, description=null}, Field{name=MONTH, type=STRING, mode=null, description=null}, Field{name=NW_BAND, type=STRING, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_0, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_1, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_2, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_3, type=FLOAT, mode=null, description=null}, Field{name=COLLINT, type=FLOAT, mode=null, description=null}, Field{name=FILE_REVISION, type=INTEGER, mode=null, description=null}, Field{name=JOBTIME, type=TIMESTAMP, mode=null, description=null}]}

I was referring to similar logic being explained here but I need to add multiple fields and using Scala.

Update BigQuery Schema / Add New Column in Java

I have a list of columns which needs to be added to the existing bigquery table. All the new columns which needs to be added should have float datatype.

 private def updateTableSchema(newColumnsList:Seq[String],finalSourceTableId:TableId)
  : Unit = {
    logInfo(s"adding new column to the existing pm_ingest bigquery table")
    import com.google.cloud.bigquery.LegacySQLTypeName

    lazy val bigQuery: BigQuery = Util.initializeBigQueryService()
    val table = bigQuery.getTable(finalSourceTableId)
    val tableDefinition = table.getDefinition[StandardTableDefinition]
    import scala.collection.JavaConverters._
    var oldSchema = tableDefinition.getSchema

    val newSchema = for (col <- newColumnsList) yield  {
      Schema.of(
        Field.newBuilder(col, LegacySQLTypeName.STRING)
          .setMode(Field.Mode.NULLABLE)
          .build())
    }
//    val newSchema =
//      Schema.of(
//        Field.newBuilder("TEST_COL_1", LegacySQLTypeName.STRING)
//          .setMode(Field.Mode.NULLABLE)
//          .build(),
//        // Adding below additional column during the load job
//        Field.newBuilder("TEST_COL_2", LegacySQLTypeName.STRING)
//          .setMode(Field.Mode.NULLABLE)
//          .build());

}

below is the print statements

newColumnsList: List(TEST_COL_1, TEST_COL_2)

I can hardcode the elements of newColumnsList and create a new schema but I was looking something more dynamic. As this list may contains one or more elements.

When am using for loop to create a schema its returning me a list whereas I just need schema.

val newSchema: Seq[Schema]

newSchema:List(Schema{fields=[Field{name=TEST_COL_1, type=STRING, mode=NULLABLE, description=null}]}, Schema{fields=[Field{name=TEST_COL_2, type=STRING, mode=NULLABLE, description=null}]})


oldSchema:Schema{fields=[Field{name=COLLECTTIME, type=TIMESTAMP, mode=null, description=null}, Field{name=GNBCUCPFUNCTION, type=STRING, mode=null, description=null}, Field{name=NRCELLCU, type=STRING, mode=null, description=null}, Field{name=CELLID, type=STRING, mode=null, description=null}, Field{name=SITE, type=STRING, mode=null, description=null}, Field{name=MANAGEDELEMENT, type=STRING, mode=null, description=null}, Field{name=SOURCEIDENTITY, type=STRING, mode=null, description=null}, Field{name=NODE, type=STRING, mode=null, description=null}, Field{name=SITEID, type=STRING, mode=null, description=null}, Field{name=NODE_temp, type=STRING, mode=null, description=null}, Field{name=NETWORK, type=STRING, mode=null, description=null}, Field{name=SITETYPE, type=STRING, mode=null, description=null}, Field{name=STATEORPROVINCE, type=STRING, mode=null, description=null}, Field{name=DISTRICT, type=STRING, mode=null, description=null}, Field{name=CLUSTER, type=STRING, mode=null, description=null}, Field{name=REGION, type=STRING, mode=null, description=null}, Field{name=PERIOD, type=INTEGER, mode=null, description=null}, Field{name=DATE, type=DATE, mode=null, description=null}, Field{name=TIME, type=STRING, mode=null, description=null}, Field{name=HOUR, type=STRING, mode=null, description=null}, Field{name=DAY, type=STRING, mode=null, description=null}, Field{name=WEEK, type=STRING, mode=null, description=null}, Field{name=MONTH, type=STRING, mode=null, description=null}, Field{name=NW_BAND, type=STRING, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_0, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_1, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_2, type=FLOAT, mode=null, description=null}, Field{name=PDFPMEBSSESSIONTIMEDRB5QI_MCC502MNC153_3, type=FLOAT, mode=null, description=null}, Field{name=COLLINT, type=FLOAT, mode=null, description=null}, Field{name=FILE_REVISION, type=INTEGER, mode=null, description=null}, Field{name=JOBTIME, type=TIMESTAMP, mode=null, description=null}]}

I was referring to similar logic being explained here but I need to add multiple fields and using Scala.

Update BigQuery Schema / Add New Column in Java

Share Improve this question edited Mar 21 at 9:27 vikrant rana asked Mar 21 at 2:27 vikrant ranavikrant rana 4,6817 gold badges35 silver badges81 bronze badges
Add a comment  | 

1 Answer 1

Reset to default 1

Can you try this, use fields as a map and then add it to the new schema so this will not in sequence format, separate by comma

val fields = newColumnsList.map { col =>
  Field.newBuilder(col, LegacySQLTypeName.STRING)
    .setMode(Field.Mode.NULLABLE)
    .build()
}

val newSchema = Schema.of(fields: _*)
发布评论

评论列表(0)

  1. 暂无评论