import numpy as np
import pandas as pd
import os
df=pd.read_csv('train.csv')#拿出train.csv中的训练集,使用相对路径
df.shape#维数
(891, 12)
df.T#行与列交换
0 1 2 3 4 5 6 7 8 9 881 882 883 884 885 886 887 888 889 890
PassengerId 1 2 3 4 5 6 7 8 9 10 882 883 884 885 886 887 888 889 890 891
Survived 0 1 1 1 0 0 0 0 1 1 0 0 0 0 0 0 1 0 1 0
Pclass 3 1 3 1 3 3 1 3 3 2 3 3 2 3 3 2 1 3 1 3
Name Braund, Mr. Owen Harris Cumings, Mrs. John Bradley (Florence Briggs Th… Heikkinen, Miss. Laina Futrelle, Mrs. Jacques Heath (Lily May Peel) Allen, Mr. William Henry Moran, Mr. James McCarthy, Mr. Timothy J Palsson, Master. Gosta Leonard Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) Nasser, Mrs. Nicholas (Adele Achem) Markun, Mr. Johann Dahlberg, Miss. Gerda Ulrika Banfield, Mr. Frederick James Sutehall, Mr. Henry Jr Rice, Mrs. William (Margaret Norton) Montvila, Rev. Juozas Graham, Miss. Margaret Edith Johnston, Miss. Catherine Helen “Carrie” Behr, Mr. Karl Howell Dooley, Mr. Patrick
Sex male female female female male male male male female female male female male male female male female female male male
Age 22.0 38.0 26.0 35.0 35.0 NaN 54.0 2.0 27.0 14.0 33.0 22.0 28.0 25.0 39.0 27.0 19.0 NaN 26.0 32.0
SibSp 1 1 0 1 0 0 0 3 0 1 0 0 0 0 0 0 0 1 0 0
Parch 0 0 0 0 0 0 0 1 2 0 0 0 0 0 5 0 0 2 0 0
Ticket A/5 21171 PC 17599 STON/O2. 3101282 113803 373450 330877 17463 349909 347742 237736 349257 7552 C.A./SOTON 34068 SOTON/OQ 392076 382652 211536 112053 W./C. 6607 111369 370376
Fare 7.25 71.2833 7.925 53.1 8.05 8.4583 51.8625 21.075 11.1333 30.0708 7.8958 10.5167 10.5 7.05 29.125 13.0 30.0 23.45 30.0 7.75
Cabin NaN C85 NaN C123 NaN NaN E46 NaN NaN NaN NaN NaN NaN NaN NaN NaN B42 NaN C148 NaN
Embarked S C S S S Q S S S C S S S S Q S S S C Q

12 rows × 891 columns

path=os.path.abspath('train.csv')#查找绝对路径
pd.read_csv(path)#使用绝对路径
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

pd.read_table(path)#默认竖线为分隔符
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0 1,0,3,”Braund, Mr. Owen Harris”,male,22,1,0,A/…
1 2,1,1,”Cumings, Mrs. John Bradley (Florence Br…
2 3,1,3,”Heikkinen, Miss. Laina”,female,26,0,0,S…
3 4,1,1,”Futrelle, Mrs. Jacques Heath (Lily May …
4 5,0,3,”Allen, Mr. William Henry”,male,35,0,0,3…
886 887,0,2,”Montvila, Rev. Juozas”,male,27,0,0,21…
887 888,1,1,”Graham, Miss. Margaret Edith”,female,…
888 889,0,3,”Johnston, Miss. Catherine Helen “”Car…
889 890,1,1,”Behr, Mr. Karl Howell”,male,26,0,0,11…
890 891,0,3,”Dooley, Mr. Patrick”,male,32,0,0,3703…

891 rows × 1 columns

pd.read_table(path,sep=',')#将默认改为逗号为分隔符
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

df=pd.read_csv('train.csv',chunksize=1000)#chunksize为迭代数量,10000为迭代一次的数量
 for i in df:#得到逐块数据
     print(i)
     PassengerId  Survived  Pclass  \
0              1         0       3   
1              2         1       1   
2              3         1       3   
3              4         1       1   
4              5         0       3   
..           ...       ...     ...   
886          887         0       2   
887          888         1       1   
888          889         0       3   
889          890         1       1   
890          891         0       3   

                                                  Name     Sex   Age  SibSp  \
0                              Braund, Mr. Owen Harris    male  22.0      1   
1    Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                               Heikkinen, Miss. Laina  female  26.0      0   
3         Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                             Allen, Mr. William Henry    male  35.0      0   
..                                                 ...     ...   ...    ...   
886                              Montvila, Rev. Juozas    male  27.0      0   
887                       Graham, Miss. Margaret Edith  female  19.0      0   
888           Johnston, Miss. Catherine Helen "Carrie"  female   NaN      1   
889                              Behr, Mr. Karl Howell    male  26.0      0   
890                                Dooley, Mr. Patrick    male  32.0      0   

     Parch            Ticket     Fare Cabin Embarked  
0        0         A/5 21171   7.2500   NaN        S  
1        0          PC 17599  71.2833   C85        C  
2        0  STON/O2. 3101282   7.9250   NaN        S  
3        0            113803  53.1000  C123        S  
4        0            373450   8.0500   NaN        S  
..     ...               ...      ...   ...      ...  
886      0            211536  13.0000   NaN        S  
887      0            112053  30.0000   B42        S  
888      2        W./C. 6607  23.4500   NaN        S  
889      0            111369  30.0000  C148        C  
890      0            370376   7.7500   NaN        Q  

[891 rows x 12 columns]
df.get_chunk()#得到逐块数据(方法2)
PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

df=pd.read_csv('train.csv')
df.columns=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口']
#将表头改成中文,索引改为乘客ID,直接进行替换
df
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 12 columns

df=pd.read_csv('train.csv',names=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口'])
#将表头改成中文,索引改为乘客ID,相当于多加一个表头
df
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 1 0 3 Braund, Mr. Owen Harris male 22 1 0 A/5 21171 7.25 NaN S
2 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38 1 0 PC 17599 71.2833 C85 C
3 3 1 3 Heikkinen, Miss. Laina female 26 0 0 STON/O2. 3101282 7.925 NaN S
4 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1 C123 S
887 887 0 2 Montvila, Rev. Juozas male 27 0 0 211536 13 NaN S
888 888 1 1 Graham, Miss. Margaret Edith female 19 0 0 112053 30 B42 S
889 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.45 NaN S
890 890 1 1 Behr, Mr. Karl Howell male 26 0 0 111369 30 C148 C
891 891 0 3 Dooley, Mr. Patrick male 32 0 0 370376 7.75 NaN Q

892 rows × 12 columns

df.info()#查看数据的基本信息
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 892 entries, 0 to 891
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   乘客ID            892 non-null    object
 1   是否幸存            892 non-null    object
 2   乘客等级(1/2/3等舱位)  892 non-null    object
 3   乘客姓名            892 non-null    object
 4   性别              892 non-null    object
 5   年龄              715 non-null    object
 6   堂兄弟/妹个数         892 non-null    object
 7   父母与小孩个数         892 non-null    object
 8   船票信息            892 non-null    object
 9   票价              892 non-null    object
 10  客舱              205 non-null    object
 11  登船港口            890 non-null    object
dtypes: object(12)
memory usage: 83.8+ KB
df.describe()#查看数据的基本信息
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
count 892 892 892 892 892 715 892 892 892 892 205 890
unique 892 3 4 892 3 89 8 8 682 249 148 4
top PassengerId 0 3 Name male 24 0 0 1601 8.05 C23 C25 C27 S
freq 1 549 491 1 577 30 608 678 7 43 4 644
df.head(15)#前十五个数据
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
1 1 0 3 Braund, Mr. Owen Harris male 22 1 0 A/5 21171 7.25 NaN S
2 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38 1 0 PC 17599 71.2833 C85 C
3 3 1 3 Heikkinen, Miss. Laina female 26 0 0 STON/O2. 3101282 7.925 NaN S
4 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35 1 0 113803 53.1 C123 S
5 5 0 3 Allen, Mr. William Henry male 35 0 0 373450 8.05 NaN S
6 6 0 3 Moran, Mr. James male NaN 0 0 330877 8.4583 NaN Q
7 7 0 1 McCarthy, Mr. Timothy J male 54 0 0 17463 51.8625 E46 S
8 8 0 3 Palsson, Master. Gosta Leonard male 2 3 1 349909 21.075 NaN S
9 9 1 3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27 0 2 347742 11.1333 NaN S
10 10 1 2 Nasser, Mrs. Nicholas (Adele Achem) female 14 1 0 237736 30.0708 NaN C
11 11 1 3 Sandstrom, Miss. Marguerite Rut female 4 1 1 PP 9549 16.7 G6 S
12 12 1 1 Bonnell, Miss. Elizabeth female 58 0 0 113783 26.55 C103 S
13 13 0 3 Saundercock, Mr. William Henry male 20 0 0 A/5. 2151 8.05 NaN S
14 14 0 3 Andersson, Mr. Anders Johan male 39 1 5 347082 31.275 NaN S
df.tail(15)#后十五个数据
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
877 877 0 3 Gustafsson, Mr. Alfred Ossian male 20 0 0 7534 9.8458 NaN S
878 878 0 3 Petroff, Mr. Nedelio male 19 0 0 349212 7.8958 NaN S
879 879 0 3 Laleff, Mr. Kristo male NaN 0 0 349217 7.8958 NaN S
880 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56 0 1 11767 83.1583 C50 C
881 881 1 2 Shelley, Mrs. William (Imanita Parrish Hall) female 25 0 1 230433 26 NaN S
882 882 0 3 Markun, Mr. Johann male 33 0 0 349257 7.8958 NaN S
883 883 0 3 Dahlberg, Miss. Gerda Ulrika female 22 0 0 7552 10.5167 NaN S
884 884 0 2 Banfield, Mr. Frederick James male 28 0 0 C.A./SOTON 34068 10.5 NaN S
885 885 0 3 Sutehall, Mr. Henry Jr male 25 0 0 SOTON/OQ 392076 7.05 NaN S
886 886 0 3 Rice, Mrs. William (Margaret Norton) female 39 0 5 382652 29.125 NaN Q
887 887 0 2 Montvila, Rev. Juozas male 27 0 0 211536 13 NaN S
888 888 1 1 Graham, Miss. Margaret Edith female 19 0 0 112053 30 B42 S
889 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.45 NaN S
890 890 1 1 Behr, Mr. Karl Howell male 26 0 0 111369 30 C148 C
891 891 0 3 Dooley, Mr. Patrick male 32 0 0 370376 7.75 NaN Q
df.isnull()#true表示数据为空
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
0 False False False False False False False False False False True False
1 False False False False False False False False False False False False
2 False False False False False False False False False False True False
3 False False False False False False False False False False False False
4 False False False False False False False False False False True False
886 False False False False False False False False False False True False
887 False False False False False False False False False False False False
888 False False False False False True False False False False True False
889 False False False False False False False False False False False False
890 False False False False False False False False False False True False

891 rows × 12 columns

df.to_csv('train_chinese.csv')
s=pd.Series(np.random.randn(5),index=['a','b','c','d','e'])#随机生成1维数据
s
a   -0.557197
b    1.348717
c    0.228413
d    0.356737
e   -0.123567
dtype: float64
s1=pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])#生成1维数据
s1
pandas.core.series.Series
s=pd.Series({'a':1,'b':2,'c':3,'d':4,'e':5})#将字典实列化
s
a    1
b    2
c    3
d    4
e    5
dtype: int64
d={'one':pd.Series([1,2,3,4,5],index=['a','b','c','d','e']),'two':pd.Series([6,7,8,9,10],index=['a','b','c','d','e'])}
d#生成二维数据
{'one': a    1
 b    2
 c    3
 d    4
 e    5
 dtype: int64,
 'two': a     6
 b     7
 c     8
 d     9
 e    10
 dtype: int64}
pd.DataFrame(d)#生成有序列表
one two
a 1 6
b 2 7
c 3 8
d 4 9
e 5 10
df=pd.read_csv('train.csv')
df.columns#查看df每一列的项
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
df.Cabin#查看Cabin列的所有项(方法1)
0       NaN
1       C85
2       NaN
3      C123
4       NaN
       ... 
886     NaN
887     B42
888     NaN
889    C148
890     NaN
Name: Cabin, Length: 891, dtype: object
df['Cabin']#查看Cabin列的所有项(方法2),返回类型为Series类型
0       NaN
1       C85
2       NaN
3      C123
4       NaN
       ... 
886     NaN
887     B42
888     NaN
889    C148
890     NaN
Name: Cabin, Length: 891, dtype: object
df[['Cabin']]#返回类型为DataFrame类型
Cabin
0 NaN
1 C85
2 NaN
3 C123
4 NaN
886 NaN
887 B42
888 NaN
889 C148
890 NaN

891 rows × 1 columns

test_1=pd.read_csv('test_1.csv')
test_1
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked a
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 100
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C 100
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 100
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 100
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 100
886 886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S 100
887 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S 100
888 888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S 100
889 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C 100
890 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q 100

891 rows × 14 columns

del test_1['a']#删除a列数据(方法1)
test_1
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 13 columns

a=test_1.pop('a')#删除a列数据(方法2),a表示删除的东西
test_1
0      100
1      100
2      100
3      100
4      100
      ... 
886    100
887    100
888    100
889    100
890    100
Name: a, Length: 891, dtype: int64
test_1.drop(['a'],axis=1)#删除a列数据(方法3),axis=1表示列,axis=0表示行(返回的是副本,不是test_1本身)
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 13 columns

test_1.drop(['a'],axis=1,inplace=True)#inplace=True表示返回的是test_1本身,母本进行改变
test_1
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 886 887 0 2 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 887 888 1 1 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 888 889 0 3 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 889 890 1 1 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 890 891 0 3 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 13 columns

test_1=pd.read_csv('test_1.csv')
test_1
test_1.drop(['a','PassengerId','Survived','Pclass'],axis=1)#返回的是没有以上元素的副本
Unnamed: 0 Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 0 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C
2 2 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 4 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
886 886 Montvila, Rev. Juozas male 27.0 0 0 211536 13.0000 NaN S
887 887 Graham, Miss. Margaret Edith female 19.0 0 0 112053 30.0000 B42 S
888 888 Johnston, Miss. Catherine Helen “Carrie” female NaN 1 2 W./C. 6607 23.4500 NaN S
889 889 Behr, Mr. Karl Howell male 26.0 0 0 111369 30.0000 C148 C
890 890 Dooley, Mr. Patrick male 32.0 0 0 370376 7.7500 NaN Q

891 rows × 10 columns

test_1=pd.read_csv('test_1.csv')
test_1['Age'] < 10 #当Age小于10时返回True,其余返回Faulse
0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool
test_1[test_1['Age'] < 10]#将结果为True的拿出来
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked a
7 7 8 0 3 Palsson, Master. Gosta Leonard male 2.00 3 1 349909 21.0750 NaN S 100
10 10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.00 1 1 PP 9549 16.7000 G6 S 100
16 16 17 0 3 Rice, Master. Eugene male 2.00 4 1 382652 29.1250 NaN Q 100
24 24 25 0 3 Palsson, Miss. Torborg Danira female 8.00 3 1 349909 21.0750 NaN S 100
43 43 44 1 2 Laroche, Miss. Simonne Marie Anne Andree female 3.00 1 2 SC/Paris 2123 41.5792 NaN C 100
827 827 828 1 2 Mallet, Master. Andre male 1.00 0 2 S.C./PARIS 2079 37.0042 NaN C 100
831 831 832 1 2 Richards, Master. George Sibley male 0.83 1 1 29106 18.7500 NaN S 100
850 850 851 0 3 Andersson, Master. Sigvard Harald Elias male 4.00 4 2 347082 31.2750 NaN S 100
852 852 853 0 3 Boulos, Miss. Nourelain female 9.00 1 1 2678 15.2458 NaN C 100
869 869 870 1 3 Johnson, Master. Harold Theodor male 4.00 1 1 347742 11.1333 NaN S 100

62 rows × 14 columns

midage=test_1[(test_1['Age'] > 10) & (test_1['Age'] < 50)]#返回Age大于10岁,且Age小于50岁的结果
midage.head()
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked a
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 100
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C 100
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 100
3 3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S 100
4 4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S 100
midage1=test_1[(test_1['Age'] > 50) | (test_1['Age'] < 10)]#返回Age小于10岁,和Age大于50岁的结果
midage1
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked a
6 6 7 0 1 McCarthy, Mr. Timothy J male 54.0 0 0 17463 51.8625 E46 S 100
7 7 8 0 3 Palsson, Master. Gosta Leonard male 2.0 3 1 349909 21.0750 NaN S 100
10 10 11 1 3 Sandstrom, Miss. Marguerite Rut female 4.0 1 1 PP 9549 16.7000 G6 S 100
11 11 12 1 1 Bonnell, Miss. Elizabeth female 58.0 0 0 113783 26.5500 C103 S 100
15 15 16 1 2 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 0 248706 16.0000 NaN S 100
851 851 852 0 3 Svensson, Mr. Johan male 74.0 0 0 347060 7.7750 NaN S 100
852 852 853 0 3 Boulos, Miss. Nourelain female 9.0 1 1 2678 15.2458 NaN C 100
857 857 858 1 1 Daly, Mr. Peter Denis male 51.0 0 0 113055 26.5500 E17 S 100
869 869 870 1 3 Johnson, Master. Harold Theodor male 4.0 1 1 347742 11.1333 NaN S 100
879 879 880 1 1 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 1 11767 83.1583 C50 C 100

126 rows × 14 columns

midage.loc[[100],['Pclass','Sex']]#取出midage中索引为100的数据并拿出Pclass和Sex的结果(并不是第100个数据)
#索引还是与拿过来时的索引一样,不能代表排列顺序
Pclass Sex
100 3 female
midage.to_csv('midage.csv')#将生成的midage表格存储
midage1=midage.reset_index(drop=True)#将midage中的索引改成由0开始的连续排列索引(drop=True是为了删除原来索引,不加的话原来的索引会变成数据)
midage1.to_csv('midage1.csv')
midage1.loc[[100],['Pclass','Sex']]#此时取出的是真正的第100个数据
Pclass Sex
100 2 male
midage.index
Index([  0,   1,   2,   3,   4,   8,   9,  12,  13,  14,
       ...
       880, 881, 882, 883, 884, 885, 886, 887, 889, 890],
      dtype='int64', length=576)
midage1.loc[[100,105,108],['Pclass','Name','Sex']]#取出第100,105,108个数据
Pclass Name Sex
100 2 Byles, Rev. Thomas Roussel Davids male
105 3 Cribb, Mr. John Hatfield male
108 3 Calic, Mr. Jovo male
midage1.iloc[[100,105,108],[3,4,5]]#用iloc表示将输出行的名称更改为列索引相对应的列数,如Pclass用3表示因为Pclass在从左往右数第3个(从0开始数)
Pclass Name Sex
100 2 Byles, Rev. Thomas Roussel Davids male
105 3 Cribb, Mr. John Hatfield male
108 3 Calic, Mr. Jovo male
midage1.head(3)
Unnamed: 0 PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked a
0 0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S 100
1 1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th… female 38.0 1 0 PC 17599 71.2833 C85 C 100
2 2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S 100
sample = pd.DataFrame(np.random.randn(6,4),
                      index=list('632451'),
                     columns=list('BDCA'))
sample#随机生成一个6*4的矩阵,并将其改为行索引为123456,列索引叫ABCD的表格
B D C A
6 -1.240358 -1.264760 0.530587 -0.641027
3 -0.774196 0.063058 0.908744 -0.111125
2 -0.743644 -0.142332 1.577287 0.409604
4 3.055682 -0.207313 1.322307 0.008019
5 1.191964 0.791010 0.306310 1.400323
1 0.113995 0.514866 -0.219842 -0.149131
sample.sort_values('B')#按B从小到大排列,axis=0默认按行排序,ascending=False表示按降序排列,inplace=True表示将母本替换
B D C A
6 -1.240358 -1.264760 0.530587 -0.641027
3 -0.774196 0.063058 0.908744 -0.111125
2 -0.743644 -0.142332 1.577287 0.409604
1 0.113995 0.514866 -0.219842 -0.149131
5 1.191964 0.791010 0.306310 1.400323
4 3.055682 -0.207313 1.322307 0.008019
sample.sort_index()#按行索引顺序排列
B D C A
1 0.113995 0.514866 -0.219842 -0.149131
2 -0.743644 -0.142332 1.577287 0.409604
3 -0.774196 0.063058 0.908744 -0.111125
4 3.055682 -0.207313 1.322307 0.008019
5 1.191964 0.791010 0.306310 1.400323
6 -1.240358 -1.264760 0.530587 -0.641027
sample.sort_index(axis=1)#按列顺序排列
A B C D
6 -0.641027 -1.240358 0.530587 -1.264760
3 -0.111125 -0.774196 0.908744 0.063058
2 0.409604 -0.743644 1.577287 -0.142332
4 0.008019 3.055682 1.322307 -0.207313
5 1.400323 1.191964 0.306310 0.791010
1 -0.149131 0.113995 -0.219842 0.514866
sample.sort_index(axis=1,ascending=False)#按列降序排列
D C B A
6 -1.264760 0.530587 -1.240358 -0.641027
3 0.063058 0.908744 -0.774196 -0.111125
2 -0.142332 1.577287 -0.743644 0.409604
4 -0.207313 1.322307 3.055682 0.008019
5 0.791010 0.306310 1.191964 1.400323
1 0.514866 -0.219842 0.113995 -0.149131
sample.sort_values(['B','A'],ascending=False)#任选两列数据降序排列
B D C A
4 3.055682 -0.207313 1.322307 0.008019
5 1.191964 0.791010 0.306310 1.400323
1 0.113995 0.514866 -0.219842 -0.149131
2 -0.743644 -0.142332 1.577287 0.409604
3 -0.774196 0.063058 0.908744 -0.111125
6 -1.240358 -1.264760 0.530587 -0.641027
df=pd.read_csv('train.csv')
df.columns=['乘客ID','是否幸存','乘客等级(1/2/3等舱位)','乘客姓名','性别','年龄','堂兄弟/妹个数','父母与小孩个数','船票信息','票价','客舱','登船港口']
df.sort_values(['票价','年龄'],ascending=False)#先优先按票价排序,在按年龄排序
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 乘客姓名 性别 年龄 堂兄弟/妹个数 父母与小孩个数 船票信息 票价 客舱 登船港口
679 680 1 1 Cardeza, Mr. Thomas Drake Martinez male 36.0 0 1 PC 17755 512.3292 B51 B53 B55 C
258 259 1 1 Ward, Miss. Anna female 35.0 0 0 PC 17755 512.3292 NaN C
737 738 1 1 Lesurer, Mr. Gustave J male 35.0 0 0 PC 17755 512.3292 B101 C
438 439 0 1 Fortune, Mr. Mark male 64.0 1 4 19950 263.0000 C23 C25 C27 S
341 342 1 1 Fortune, Miss. Alice Elizabeth female 24.0 3 2 19950 263.0000 C23 C25 C27 S
481 482 0 2 Frost, Mr. Anthony Wood “Archie” male NaN 0 0 239854 0.0000 NaN S
633 634 0 1 Parr, Mr. William Henry Marsh male NaN 0 0 112052 0.0000 NaN S
674 675 0 2 Watson, Mr. Ennis Hastings male NaN 0 0 239856 0.0000 NaN S
732 733 0 2 Knight, Mr. Robert J male NaN 0 0 239855 0.0000 NaN S
815 816 0 1 Fry, Mr. Richard male NaN 0 0 112058 0.0000 B102 S

891 rows × 12 columns

x = pd.DataFrame(np.random.randn(4,4),
                      index=list('3241'),
                     columns=list('BDCA'))
x
B D C A
3 0.207853 0.574396 0.086197 1.187164
2 -0.794598 -0.308106 -0.291090 -0.150375
4 0.215895 -0.189428 0.556125 -0.361963
1 -1.593097 -0.205176 1.427471 -0.339048
y = pd.DataFrame(np.random.randn(5,4),
                      index=list('53241'),
                     columns=list('BDCE'))
y
B D C E
5 0.345871 -1.007209 -0.047450 -0.867648
3 -1.674230 0.874186 1.965207 -0.291205
2 1.802405 -0.033730 1.164470 -1.460408
4 0.501799 0.024054 -2.140898 -0.611685
1 -1.486755 1.780159 -2.015993 0.259121
x+y#只有行和列都相同的才能相加
A B C D E
1 NaN -3.079853 -0.588522 1.574983 NaN
2 NaN 1.007806 0.873381 -0.341837 NaN
3 NaN -1.466377 2.051404 1.448582 NaN
4 NaN 0.717694 -1.584773 -0.165374 NaN
5 NaN NaN NaN NaN NaN
max(df['堂兄弟/妹个数']+df['父母与小孩个数'])#取最大值
10
df.describe()
乘客ID 是否幸存 乘客等级(1/2/3等舱位) 年龄 堂兄弟/妹个数 父母与小孩个数 票价
count 891.000000 891.000000 891.000000 714.000000 891.000000 891.000000 891.000000
mean 446.000000 0.383838 2.308642 29.699118 0.523008 0.381594 32.204208
std 257.353842 0.486592 0.836071 14.526497 1.102743 0.806057 49.693429
min 1.000000 0.000000 1.000000 0.420000 0.000000 0.000000 0.000000
25% 223.500000 0.000000 2.000000 20.125000 0.000000 0.000000 7.910400
50% 446.000000 0.000000 3.000000 28.000000 0.000000 0.000000 14.454200
75% 668.500000 1.000000 3.000000 38.000000 1.000000 0.000000 31.000000
max 891.000000 1.000000 3.000000 80.000000 8.000000 6.000000 512.329200
from matplotlib import pyplot as plt
plt.hist(df['年龄'])#画年龄的直方图
(array([ 54.,  46., 177., 169., 118.,  70.,  45.,  24.,   9.,   2.]),
 array([ 0.42 ,  8.378, 16.336, 24.294, 32.252, 40.21 , 48.168, 56.126,
        64.084, 72.042, 80.   ]),
 <BarContainer object of 10 artists>)

df['票价'].describe()#票价基本信息
count    891.000000
mean      32.204208
std       49.693429
min        0.000000
25%        7.910400
50%       14.454200
75%       31.000000
max      512.329200
Name: 票价, dtype: float64
plt.hist(df['票价'])#票价的直方图
(array([732., 106.,  31.,   2.,  11.,   6.,   0.,   0.,   0.,   3.]),
 array([  0.     ,  51.23292, 102.46584, 153.69876, 204.93168, 256.1646 ,
        307.39752, 358.63044, 409.86336, 461.09628, 512.3292 ]),
 <BarContainer object of 10 artists>)

df['父母与小孩个数'].describe()#父母与小孩个数基本信息
count    891.000000
mean       0.381594
std        0.806057
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max        6.000000
Name: 父母与小孩个数, dtype: float64
plt.hist(df['父母与小孩个数'])#父母与小孩个数的直方图
(array([678., 118.,   0.,  80.,   0.,   5.,   4.,   0.,   5.,   1.]),
 array([0. , 0.6, 1.2, 1.8, 2.4, 3. , 3.6, 4.2, 4.8, 5.4, 6. ]),
 <BarContainer object of 10 artists>)

声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。