Data Frame
- 2차원 형태의 데이터를 다루기 위한 자료형
- column: 데이터의 특징으로 구분함
- row: 레코드를 의미 (대상이 되는 것)
- pandas는 numpy기반으로 만들어졌으며 더 향상된 기능들을 가지고 있음.
- numpy의 경우 자료형이 통일 되어야 하지만 pandas는 여러 가지 자료형을 혼용할 수 있음
import pandas as pd
two_dimensional_list = [['Way of White Corona', 15, 1, 18], ['Projected Heal', 55, 1, 28], ['Lighting Arrow', 19, 1, 35]]
data = pd.DataFrame(two_dimensional_list, columns = ['Miracle', 'FP_cost', 'Slots_Used', 'Faith_Required'], index = ['a', 'b', 'c'])
data
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
a |
Way of White Corona |
15 |
1 |
18 |
b |
Projected Heal |
55 |
1 |
28 |
c |
Lighting Arrow |
19 |
1 |
35 |
type(data)
pandas.core.frame.DataFrame
data.columns
Index(['Miracle', 'FP_cost', 'Slots_Used', 'Faith_Required'], dtype='object')
data.index
Index(['a', 'b', 'c'], dtype='object')
data.dtypes
Miracle object
FP_cost int64
Slots_Used int64
Faith_Required int64
dtype: object
- object: 문자열, int64: 정수형
- 다른 문자열을 쓸 수 있지만 한 컬럼 안에는 같은 자료형이 들어가야 함
DataFrame 만들기
- 2차원 리스트, 2차원 numpy array, pandas series를 담고 있는 리스트로 DataFrame 만들기
import numpy as np
import pandas as pd
two_dimensional_list = [['Way of White Corona', 15, 1, 18], ['Projected Heal', 55, 1, 28], ['Lighting Arrow', 19, 1, 35]]
two_dimensional_array = np.array(two_dimensional_list)
list_of_series = [
pd.Series(['Way of White Corona', 15, 1, 18]),
pd.Series(['Projected Heal', 55, 1, 28]),
pd.Series(['Lighting Arrow', 19, 1, 35])
]
data = pd.DataFrame(two_dimensional_list, columns = ['Miracle', 'FP_cost', 'Slots_Used', 'Faith_Required'], index = ['a', 'b', 'c'])
data
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
a |
Way of White Corona |
15 |
1 |
18 |
b |
Projected Heal |
55 |
1 |
28 |
c |
Lighting Arrow |
19 |
1 |
35 |
data1 = pd.DataFrame(two_dimensional_array, columns = ['Miracle', 'FP_cost', 'Slots_Used', 'Faith_Required'], index = ['a', 'b', 'c'])
data1
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
a |
Way of White Corona |
15 |
1 |
18 |
b |
Projected Heal |
55 |
1 |
28 |
c |
Lighting Arrow |
19 |
1 |
35 |
data2 = pd.DataFrame(list_of_series, index = ['a', 'b', 'c'])
data2.columns = ['Miracle', 'FP_cost', 'Slots_used', 'Faith_Required']
data2
|
Miracle |
FP_cost |
Slots_used |
Faith_Required |
a |
Way of White Corona |
15 |
1 |
18 |
b |
Projected Heal |
55 |
1 |
28 |
c |
Lighting Arrow |
19 |
1 |
35 |
- 딕셔너리로 DataFrame 만들기
- key: column, 해당 값을 value로 입력
import numpy as np
import pandas as pd
Miracles = ['Way of White Corona', 'Projected Heal', 'Lighting Arrow']
FP_cost = [15, 55, 19]
Slots_Used = [1, 1, 1]
Faith_Required = [18, 28, 35]
dict1 = {
'Miracle': Miracles,
'FP_cost': FP_cost,
'Slots_Used': Slots_Used,
'Faith_Required': Faith_Required
}
dict2 = {
'Miracle': np.array(Miracles),
'FP_cost': np.array(FP_cost),
'Slots_Used': np.array(Slots_Used),
'Faith_Required': np.array(Faith_Required)
}
dict3 = {
'Miracle': pd.Series(Miracles),
'FP_cost': pd.Series(FP_cost),
'Slots_Used': pd.Series(Slots_Used),
'Faith_Required': pd.Series(Faith_Required)
}
data1 = pd.DataFrame(dict1)
data2 = pd.DataFrame(dict2)
data3 = pd.DataFrame(dict3)
data1
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
0 |
Way of White Corona |
15 |
1 |
18 |
1 |
Projected Heal |
55 |
1 |
28 |
2 |
Lighting Arrow |
19 |
1 |
35 |
data2
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
0 |
Way of White Corona |
15 |
1 |
18 |
1 |
Projected Heal |
55 |
1 |
28 |
2 |
Lighting Arrow |
19 |
1 |
35 |
data3
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
0 |
Way of White Corona |
15 |
1 |
18 |
1 |
Projected Heal |
55 |
1 |
28 |
2 |
Lighting Arrow |
19 |
1 |
35 |
- 사전이 담긴 리스트로 DataFrame 만들기
import numpy as np
import pandas as pd
list = [
{'Miracle': 'Way of White Corona', 'FP_cost': 15, 'Slots_Used': 1, 'Faith_Required': 18},
{'Miracle': 'Projected Heal', 'FP_cost': 55, 'Slots_Used': 1, 'Faith_Required': 28},
{'Miracle': 'Lighting Arrow', 'FP_cost': 19, 'Slots_Used': 1, 'Faith_Required': 35},
]
data = pd.DataFrame(list)
data
|
Miracle |
FP_cost |
Slots_Used |
Faith_Required |
0 |
Way of White Corona |
15 |
1 |
18 |
1 |
Projected Heal |
55 |
1 |
28 |
2 |
Lighting Arrow |
19 |
1 |
35 |
data.dtypes
Miracle object
FP_cost int64
Slots_Used int64
Faith_Required int64
dtype: object
- data type
- int 64: 정수
- float64: 소수
- object: 텍스트
- bool: 불린
- datetime64: 날짜와 시간
- category: 카테고리
외부 파일 읽기
import pandas as pd
miracle_df = pd.read_csv('test.csv')
miracle_df
|
Miracle |
FP_cost |
Slots_used |
Faith_Required |
0 |
Way of White Corona |
15 |
1 |
18 |
1 |
Projected Heal |
55 |
1 |
28 |
2 |
Lighting Arrow |
19 |
1 |
35 |
type(miracle_df)
pandas.core.frame.DataFrame
miracle_df = pd.read_csv('test.csv', index_col = 'Miracle')
miracle_df
|
FP_cost |
Slots_used |
Faith_Required |
Miracle |
|
|
|
Way of White Corona |
15 |
1 |
18 |
Projected Heal |
55 |
1 |
28 |
Lighting Arrow |
19 |
1 |
35 |