Thursday, June 19, 2025

Databricks: Python

 

Creating like data table: Data frame and inserting data.

from pyspark.sql.types import (
    StructType,
    StructField,
    StringType,
    IntegerType,
    FloatType,
    DoubleType,
    LongType,
    TimestampType,
)

dta12 = [
    (1, "Ganesha", 30, "2021-04-02"),
    (2, "Krishna", 34, "2023-06-01"),
    (3, "Pooja", 53, "2021-01-31"),
    (4, "Archana", 56, "2021-01-28"),
]
sch1 = StructType(
    [
        StructField("id", IntegerType(), True),
        StructField("name", StringType(), True),
        StructField("age", StringType(), True),
        StructField("DateExecute", StringType(), True),
    ]
)
# print(d1);
df1= spark.createDataFrame(dta12, sch1)
display(df1)
df2=df1.filter(col("id")=="4");
#df2=df1.filter(col("id")=="4").select("name");
display(df2);



Table creation in Databricks, Add new rows 1) query 2) DataFrame
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

frm1 = spark.sql("SELECT * FROM dev.databricks_training.ashwini_test")
display(frm1)

### creating data frame
sch2 = StructType(
    [
        StructField("empid1", IntegerType()),
        StructField("empname", StringType()),
        StructField("region", StringType()),
    ]
);
### creting new new to existing dataFrame
frm2=spark.createDataFrame([(123,'Speaker', 'AMIND'),(124,'Mouse', 'AMIND'),(125,'Screen', 'AMIND')], sch2)
display(frm2)
### adding new row by union
frm1=frm1.union(frm2)
display(frm1)

### saving to databricks table
frm1.write.mode("overwrite").saveAsTable(f"dev.databricks_training.ashwini_test")

### filtering columns
# display(frm1.where(frm1["region"]=="amna"));

### inerting record into table by Query execute
# spark.sql("insert into dev.databricks_training.ashwini_test values (19037,'Sri Kumar','HKT')");