# 1.select操作:select 列名 from 表A limit 5 df[["","",""]].head(5)
# 2.where条件:select 列名 from 表名 where 条件='' # 使用括号的方式进行 连接多个条件 condition=(df["列名"]=="")&(df["列名"]=="")&(df["列名"]=="") condition.value_count() df[condition].head(5)
# 3.in和not in:select * from 表A where 列名 in('1','2') df["列名"].unique() # 去重 # in df[df["列名"].isin((1,2))].head() # not in:df取反符号为~ df[~df["列名"].isin((1,2))].head()
# 4.1group by:select sum(),maen() from person group by sex df.groupby("sex").agg({列名:np.sum,"列名":np.mean,"列名":np.mean})
# 4.2.多个列的聚合:select sum(),mean() from person group by sex,department df.groupby(["sex","department"]).agg({列名:np.sum,"列名":np.mean,"列名":np.mean})
# 5.join数据关联:select * from table1 a1 join table2 a2 on a1.列名=a2.列名 limit 5 df2=pd.read_csv() df2.head(5) # 查看前5行数据 df_merged=pd.merge(left=df1,right=df2,on="列名") df_merged.head(5)
# 6.Union数据合并,当两个表的数据列一致时使用union。 # SQL:selet city,rank from 表1 union all select city,,rank from 表2 # 同样的df也需要有相同的列名,假设df1和df2的列名是一致的 pd.concat([df1,df2])
# 7.order limnit先排序后分页 # sql:select * from 表1 order by 列名 limit 5 df.sort_values("列名",ascending=False).head(5)