I want to concatenate with results like concat_columns
here:
In the example code below the results have quotation marks: Houston-Alice-"salary"
I do not want this, I want the results like this: Houston-Alice-salary
I've tried variations on the "'{}'".format(data_col)
to no avail. data_col
, "{}".format(data_col)
, and '{}'.format(data_col)
all return Houston-Alice-77321
.
The column names are user supplied so I need to use methods that prevent SQL injection attacks.
What should I try next?
Here is my example code:
import psycopg
import pandas as pd
def so_question(columns, group_by_columns, table):
"""
example for SO
"""
table = psycopg.sql.Composable.as_string(psycopg.sql.Identifier(table))
columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in columns]
group_by_columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in group_by_columns]
for data_col in columns:
group_by = ''
# check if there are grouping columns
if len(group_by_columns) > 0:
concat_columns = " ,'-',".join(group_by_columns) + " ,'-'," + "'{}'".format(data_col)
group_by_clause_group_by = " ,".join(group_by_columns) + " ," + data_col
# CTE generation:
sql_statement = f"""
WITH sql_cte as (
SELECT
CONCAT({concat_columns}) as concat_columns
,{data_col} as value
from {table}, unnest(array[{data_col}]) AS my_col
group by
{group_by_clause_group_by}
)
SELECT * FROM sql_cte
"""
return sql_statement
def execute_sql_get_dataframe(sql):
"""
Creates (and closes) db connection, gets requested sql data , returns as Pandas DataFrame.
Args:
sql (string): sql query to execute.
Returns:
Pandas DataFrame of sql query results.
"""
try:
# print(sql_statement_to_execute)
# create db connection, get connection and cursor object
db_connection_cursor = db_connection_cursor_object()
# execute query
db_connection_cursor['cursor'].execute(sql)
# get results into tuples
tuples_list = db_connection_cursor['cursor'].fetchall()
# get column names: /
column_names = [desc[0] for desc in db_connection_cursor['cursor'].description]
db_connection_cursor['connection'].close()
# create df from results
df_from_sql_query = pd.DataFrame(tuples_list, columns=column_names)
return df_from_sql_query
except Exception as exc:
log.exception(f'sql_statement_to_execute:\n {sql}', exc_info=True)
log.exception(msg=f'Exception: {exc}', exc_info=True)
data_columns = ['salary']
group_by_columns_in_order_of_grouping = ['city', 'name']
_sql = so_question(columns=data_columns,
group_by_columns=group_by_columns_in_order_of_grouping,
table='random_data')
dataframe = execute_sql_get_dataframe(sql=_sql)
print(dataframe)
Here's the code for data generation:
import psycopg
# Connect to the database
conn = psycopg.connect("dbname=mydatabase user=myuser password=mypassword")
cur = conn.cursor()
# Create the sample table
cur.execute("""
CREATE TABLE sample_table (
city VARCHAR(50),
name VARCHAR(50),
salary INTEGER
)
""")
# Insert sample data into the table
cur.execute("""
INSERT INTO sample_table (city, name, salary) VALUES
('New York', 'Alice', 70000),
('Los Angeles', 'Bob', 80000),
('Chicago', 'Charlie', 75000),
('Houston', 'Eve', 71758),
('Phoenix', 'Dave', 68000)
""")
# Commit the transaction
connmit()
# Close the cursor and connection
cur.close()
conn.close()
print("Sample table created and data inserted successfully.")
I want to concatenate with results like concat_columns
here:
In the example code below the results have quotation marks: Houston-Alice-"salary"
I do not want this, I want the results like this: Houston-Alice-salary
I've tried variations on the "'{}'".format(data_col)
to no avail. data_col
, "{}".format(data_col)
, and '{}'.format(data_col)
all return Houston-Alice-77321
.
The column names are user supplied so I need to use methods that prevent SQL injection attacks.
What should I try next?
Here is my example code:
import psycopg
import pandas as pd
def so_question(columns, group_by_columns, table):
"""
example for SO
"""
table = psycopg.sql.Composable.as_string(psycopg.sql.Identifier(table))
columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in columns]
group_by_columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in group_by_columns]
for data_col in columns:
group_by = ''
# check if there are grouping columns
if len(group_by_columns) > 0:
concat_columns = " ,'-',".join(group_by_columns) + " ,'-'," + "'{}'".format(data_col)
group_by_clause_group_by = " ,".join(group_by_columns) + " ," + data_col
# CTE generation:
sql_statement = f"""
WITH sql_cte as (
SELECT
CONCAT({concat_columns}) as concat_columns
,{data_col} as value
from {table}, unnest(array[{data_col}]) AS my_col
group by
{group_by_clause_group_by}
)
SELECT * FROM sql_cte
"""
return sql_statement
def execute_sql_get_dataframe(sql):
"""
Creates (and closes) db connection, gets requested sql data , returns as Pandas DataFrame.
Args:
sql (string): sql query to execute.
Returns:
Pandas DataFrame of sql query results.
"""
try:
# print(sql_statement_to_execute)
# create db connection, get connection and cursor object
db_connection_cursor = db_connection_cursor_object()
# execute query
db_connection_cursor['cursor'].execute(sql)
# get results into tuples
tuples_list = db_connection_cursor['cursor'].fetchall()
# get column names: https://www.geeksfeeks./get-column-names-from-postgresql-table-using-psycopg2/
column_names = [desc[0] for desc in db_connection_cursor['cursor'].description]
db_connection_cursor['connection'].close()
# create df from results
df_from_sql_query = pd.DataFrame(tuples_list, columns=column_names)
return df_from_sql_query
except Exception as exc:
log.exception(f'sql_statement_to_execute:\n {sql}', exc_info=True)
log.exception(msg=f'Exception: {exc}', exc_info=True)
data_columns = ['salary']
group_by_columns_in_order_of_grouping = ['city', 'name']
_sql = so_question(columns=data_columns,
group_by_columns=group_by_columns_in_order_of_grouping,
table='random_data')
dataframe = execute_sql_get_dataframe(sql=_sql)
print(dataframe)
Here's the code for data generation:
import psycopg
# Connect to the database
conn = psycopg.connect("dbname=mydatabase user=myuser password=mypassword")
cur = conn.cursor()
# Create the sample table
cur.execute("""
CREATE TABLE sample_table (
city VARCHAR(50),
name VARCHAR(50),
salary INTEGER
)
""")
# Insert sample data into the table
cur.execute("""
INSERT INTO sample_table (city, name, salary) VALUES
('New York', 'Alice', 70000),
('Los Angeles', 'Bob', 80000),
('Chicago', 'Charlie', 75000),
('Houston', 'Eve', 71758),
('Phoenix', 'Dave', 68000)
""")
# Commit the transaction
connmit()
# Close the cursor and connection
cur.close()
conn.close()
print("Sample table created and data inserted successfully.")
Share
Improve this question
edited Mar 27 at 11:40
snakecharmerb
56.1k13 gold badges134 silver badges187 bronze badges
asked Mar 27 at 10:52
Python_LearnerPython_Learner
1,6774 gold badges25 silver badges55 bronze badges
2
- Offtopic: In this example, there is no need for a CTE or a GROUP BY. – Frank Heikens Commented Mar 27 at 16:33
- @FrankHeikens fair observation, I stripped down my existing function to this example code. Didn't occur to me to start from scratch, could have been made simpler. – Python_Learner Commented Mar 29 at 9:39
2 Answers
Reset to default 1Since Postgres column identifiers are quoted in double quotes, psycopg.sql.Identifier() does return the column name enclosed in them.
What you can do is theoretically:
columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col).strip('"')) for col in columns]
However, as I am not sure how Identifier() ensures quotability it may be that you are losing the SQL injection protection.
So you may actually want to remove the quotes from the SQL result in dataframe.
For future readers, Postgres
has a built in solution, REPLACE()
.
Here's it added to my CONCAT()
line to accomplish the desired result:
REPLACE(CONCAT({concat_columns}), '"', '')