最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

python - MySQL update query for rows NOT matching any records in a dataframe? - Stack Overflow

programmeradmin2浏览0评论

I have a MySQL table

+----+------+------+------+
| ID | x    | y    | z    |
+----+------+------+------+
|  1 |    1 | a    |    2 |
|  2 |    1 | b    | NULL |
|  3 |    1 | c    | NULL |
|  4 |    2 | a    | NULL |
|  5 |    2 | b    |    1 |
|  6 |    2 | c    |    2 |
|  7 |    3 | a    |    1 |
|  8 |    3 | b    | NULL |
|  9 |    3 | c    | NULL |
+----+------+------+------+

in which I want to update values in the column z based on the pandas dataframe

lst = [{'ID': 3, 'x': 1, 'y': 'c'},
       {'ID': 4, 'x': 2, 'y': 'a'},
       {'ID': 8, 'x': 3, 'y': 'b'}]

df = pd.DataFrame(lst)

I want to set the value of z to 3 for those rows satisfying

  1. the present value for z is NULL, and
  2. the combination of the value for x and the value for y is NOT present in the dataframe.

This corresponds to the rows 2 and 9 in the table. Can I do this with a single query?

Being fairly new to MySQL, I tried the following:

import pandas as pd
import mysql.connector
from mysql.connector import errorcode

import configparser
config = configparser.ConfigParser()
config.read('config.ini')
mysql_host     = config['mysql']['host']
mysql_database = config['mysql']['database']
mysql_user     = config['mysql']['user']
mysql_password = config['mysql']['password']

connection = mysql.connector.connect(
    database = mysql_database,
    host     = mysql_host,
    user     = mysql_user,
    password = mysql_password
)
cursor = connection.cursor()

values = df[['x', 'y']].values.tolist()

query = "UPDATE my_table SET z = 3 WHERE z IS NULL AND NOT (x = %s AND y = %s)"

cursor.executemany(query, values)
connectionmit()

This does not work: it changes all NULL values to 3. Apparently cursor.executemany iterates over the items in the values list, so that makes sense. Is there a way where the items are not iterated over, but considered simultaneously, when the WHERE ... NOT part in the query is considered? And effectively only updating rows 2 and 9 in the table?

Of course I could also read part of the table into a dataframe, manipulate the dataframe according to my wishes, and based on that update the table. But it is less direct. Is there no direct way where the manipulation is done inside MySQL using a query and the dataframe?

I am looking for a way that applies to large tables and dataframes, not just the small ones in the example here. Thanks!

I have a MySQL table

+----+------+------+------+
| ID | x    | y    | z    |
+----+------+------+------+
|  1 |    1 | a    |    2 |
|  2 |    1 | b    | NULL |
|  3 |    1 | c    | NULL |
|  4 |    2 | a    | NULL |
|  5 |    2 | b    |    1 |
|  6 |    2 | c    |    2 |
|  7 |    3 | a    |    1 |
|  8 |    3 | b    | NULL |
|  9 |    3 | c    | NULL |
+----+------+------+------+

in which I want to update values in the column z based on the pandas dataframe

lst = [{'ID': 3, 'x': 1, 'y': 'c'},
       {'ID': 4, 'x': 2, 'y': 'a'},
       {'ID': 8, 'x': 3, 'y': 'b'}]

df = pd.DataFrame(lst)

I want to set the value of z to 3 for those rows satisfying

  1. the present value for z is NULL, and
  2. the combination of the value for x and the value for y is NOT present in the dataframe.

This corresponds to the rows 2 and 9 in the table. Can I do this with a single query?

Being fairly new to MySQL, I tried the following:

import pandas as pd
import mysql.connector
from mysql.connector import errorcode

import configparser
config = configparser.ConfigParser()
config.read('config.ini')
mysql_host     = config['mysql']['host']
mysql_database = config['mysql']['database']
mysql_user     = config['mysql']['user']
mysql_password = config['mysql']['password']

connection = mysql.connector.connect(
    database = mysql_database,
    host     = mysql_host,
    user     = mysql_user,
    password = mysql_password
)
cursor = connection.cursor()

values = df[['x', 'y']].values.tolist()

query = "UPDATE my_table SET z = 3 WHERE z IS NULL AND NOT (x = %s AND y = %s)"

cursor.executemany(query, values)
connectionmit()

This does not work: it changes all NULL values to 3. Apparently cursor.executemany iterates over the items in the values list, so that makes sense. Is there a way where the items are not iterated over, but considered simultaneously, when the WHERE ... NOT part in the query is considered? And effectively only updating rows 2 and 9 in the table?

Of course I could also read part of the table into a dataframe, manipulate the dataframe according to my wishes, and based on that update the table. But it is less direct. Is there no direct way where the manipulation is done inside MySQL using a query and the dataframe?

I am looking for a way that applies to large tables and dataframes, not just the small ones in the example here. Thanks!

Share Improve this question asked Mar 14 at 17:48 BartBart 1334 bronze badges 2
  • you could load df to another (temporary) table, then execute the UPDATE from MySQL with a join; pretty sure MySQL should support UPDATE with a join. – mechanical_meat Commented Mar 14 at 18:24
  • or you would have to build a derived table (subquery in the from clause) either using union or json_table function. The latter may be simpler and may not need the dataframe at all. The point is: you have to do this fully in sql or fully in python, you can't do this mix and match approach in your question. – Shadow Commented Mar 14 at 19:02
Add a comment  | 

2 Answers 2

Reset to default 1

a possible solution is to

  • make a temporary table

  • insert your dataframe

  • and then run the update on that table

    After the connection is closed the temporary table disappears.

    In case you want to run multiple times in the same connction, you should truncate the table


import pandas as pd
import mysql.connector
from mysql.connector import errorcode

import configparser
config = configparser.ConfigParser()
config.read('config.ini')
mysql_host     = config['mysql']['host']
mysql_database = config['mysql']['database']
mysql_user     = config['mysql']['user']
mysql_password = config['mysql']['password']

connection = mysql.connector.connect(
    database = mysql_database,
    host     = mysql_host,
    user     = mysql_user,
    password = mysql_password
)
cursor = connection.cursor()


temp_creator = '''CREATE  TEMPORARY TABLE IF NOT EXISTS val (ID int, x int, y varchar(1));'''
cursor.execute(temp_creator)
 #load dataframe
lst = [{'ID': 3, 'x': 1, 'y': 'c'},
       {'ID': 4, 'x': 2, 'y': 'a'},
       {'ID': 8, 'x': 3, 'y': 'b'}]
df = pd.DataFrame(lst)
values = df[['ID','x', 'y']].values.tolist()
#import dataframe into teporary
cursor.executemany("""
    INSERT INTO val
        (ID,x,y)
    VALUES (%s,%s,%s)""", values)

connectionmit()


query = """UPDATE my_table m JOIN val v ON m.z IS 
NULL AND (m.x = v.x AND m.y <> v.y) SET m.z = 3 
WHERE  m.ID > 0;"""

cursor.execute(query)

connectionmit()

Convert df to json and run query with this single parameter.

param = df.to_json(orient='records')

Query is

update test t
left join JSON_TABLE(
        -- json parameter  - your dataframe as json
    '[{"ID":3,"x":1,"y":"c"},{"ID":4,"x":2,"y":"a"},{"ID":8,"x":3,"y":"b"}]'
    ,"$[*]"
     COLUMNS(
         rowid FOR ORDINALITY,
         Id int PATH "$.ID" ,
         x  varchar(5) PATH "$.x",
         y  varchar(5) PATH "$.y"
      )
     ) AS p 
  on p.x=t.x and p.y=t.y
set z=3
where p.id is null and z is null;

OR create query dynamically

update test t
left join (values 
      row(3,1,'c'),row(4,2,'a'),row(8,3,'b')
    -- ,row(...) 
  )p(id,x,y)
  on p.x=t.x and p.y=t.y
set z=4
where p.id is null and z is null;

For join used condition on p.x=t.x and p.y=t.y. Id not used.

ID x y z
1 1 a 2
2 1 b 3
3 1 c null
4 2 a null
5 2 b 1
6 2 c 2
7 3 a 1
8 3 b null
9 3 c 3

fiddle

JOIN with JSON_TABLE output is

ID x y z rowid Id x y
1 1 a 2 null null null null
2 1 b null null null null null
3 1 c null 1 3 1 c
4 2 a null 2 4 2 a
5 2 b 1 null null null null
6 2 c 2 null null null null
7 3 a 1 null null null null
8 3 b null 3 8 3 b
9 3 c null null null null null
发布评论

评论列表(0)

  1. 暂无评论