Skip to main content

Reshaping

You can reshape a DataFrame object using these functions.

The data used in the example neo_v2.csv is NASA's list of Nearest Earth Objects, sourced from Kaggle.

Pivot

func (df *DataFrame) Pivot(column, value string) (DataFrame, error)

Pivot returns an organized Dataframe that has values corresponding to the index and the given column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}
df.Head(5)
fmt.Println("")

res, err := df.Pivot("hazardous", "est_diameter_min")
if err != nil {
fmt.Println(err)
}

res.Head(5)
id         |    id         name                   est_diameter_min    est_diameter_max    relative_velocity    miss_distance            orbiting_body    sentry_object    absolute_magnitude    hazardous    
2162635 | 2162635 162635 (2000 SS164) 1.1982708007 2.6794149658 13569.2492241812 5.483974408284605e+07 Earth false 16.73 false
2277475 | 2277475 277475 (2005 WK4) 0.2658 0.5943468684 73588.7266634981 6.143812652395093e+07 Earth false 20 true
2512244 | 2512244 512244 (2015 YE18) 0.7220295577 1.6145071727 114258.6921290512 4.979872494045679e+07 Earth false 17.83 false
3596030 | 3596030 (2012 BV13) 0.096506147 0.2157943048 24764.3031380016 2.543497272075825e+07 Earth false 22.2 false
3667127 | 3667127 (2014 GE35) 0.2550086879 0.5702167609 42737.7337647264 4.627556700130072e+07 Earth false 20.09 true

id | false true
2162635 | 1.1982708007 NaN
2277475 | NaN 0.2658
2512244 | 0.7220295577 NaN
3596030 | 0.096506147 NaN
3667127 | NaN 0.2550086879

PivotTable

func (df *DataFrame) PivotTable(index, column, value string, aggFunc StatsFunc) (DataFrame, error)

PivotTable rearranges the data by a given index and column.

Each value will be aggregated via an aggregation function. Pick three columns from the DataFrame, each to serve as the index, column, and value.

PivotTable ignores NaN values.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), nil)
if err != nil {
fmt.Println(err)
}
df.SortByValues("id", true)
df.Head(12)
fmt.Println("")

res, err := df.PivotTable("id", "hazardous", "miss_distance", gambas.Mean)
if err != nil {
fmt.Println(err)
}

res.Head(5)
         |    id         name                      est_diameter_min    est_diameter_max    relative_velocity    miss_distance             orbiting_body    sentry_object    absolute_magnitude    hazardous    
12709 | 2000433 433 Eros (A898 PA) 23.0438466577 51.5276075896 15884.2526231559 5.468807778293672e+07 Earth false 10.31 false
37651 | 2000433 433 Eros (A898 PA) 23.0438466577 51.5276075896 21402.705247412 2.6729521135077037e+07 Earth false 10.31 false
56533 | 2000433 433 Eros (A898 PA) 23.0438466577 51.5276075896 21761.7034264303 3.120591927495648e+07 Earth false 10.31 false
1847 | 2000719 719 Albert (A911 TB) 2.0443487103 4.5713026859 27551.5971939875 4.258288106079324e+07 Earth false 15.57 false
36418 | 2001036 1036 Ganymed (A924 UB) 37.8926498379 84.7305408852 51496.9232928228 5.3721237819369085e+07 Earth false 9.23 false
13527 | 2001566 1566 Icarus (1949 MA) 1.4274305148 3.1918316641 76768.6272477926 5.1882752851231776e+07 Earth false 16.35 true
17077 | 2001566 1566 Icarus (1949 MA) 1.4274305148 3.1918316641 136986.6291056903 4.442794284496872e+07 Earth false 16.35 true
28226 | 2001566 1566 Icarus (1949 MA) 1.4274305148 3.1918316641 120524.2906272869 6.046637755115862e+07 Earth false 16.35 true
68073 | 2001566 1566 Icarus (1949 MA) 1.4274305148 3.1918316641 108801.2963741598 8.053781761441007e+06 Earth false 16.35 true
73540 | 2001566 1566 Icarus (1949 MA) 1.4274305148 3.1918316641 78130.8042822814 6.564713381994684e+07 Earth false 16.35 true
4156 | 2001580 1580 Betulia (1950 KA) 3.0658787593 6.8555133165 105157.7758512475 3.5573935508316e+07 Earth false 14.69 false
67943 | 2001580 1580 Betulia (1950 KA) 3.0658787593 6.8555133165 109184.9019317352 5.268620819957744e+07 Earth false 14.69 false

id | false true
2000433 | 3.7541172731e+07 NaN
2000719 | 4.2582881061e+07 NaN
2001036 | 5.3721237819e+07 NaN
2001566 | NaN 4.6095597766e+07
2001580 | 4.4130071854e+07 NaN

Melt

func (df *DataFrame) Melt(colName, valueName string) (DataFrame, error)

Melt returns the table from wide to long format.

Use Melt to revert to pre-Pivot format.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), nil)
if err != nil {
fmt.Println(err)
}

pivoted, err := df.PivotTable("id", "hazardous", "miss_distance", gambas.Mean)
if err != nil {
fmt.Println(err)
}

pivoted.Head(5)
fmt.Println("")

melted, err := pivoted.Melt("hazardous", "miss_distance")
if err != nil {
fmt.Println(err)
}

melted.Head(5)
id         |    false               true                
2000433 | 3.7541172731e+07 NaN
2000719 | 4.2582881061e+07 NaN
2001036 | 5.3721237819e+07 NaN
2001566 | NaN 4.6095597766e+07
2001580 | 4.4130071854e+07 NaN

id | id hazardous miss_distance
2000433 | 2000433 false 3.7541172731e+07
2000433 | 2000433 true NaN
2000719 | 2000719 false 4.2582881061e+07
2000719 | 2000719 true NaN
2001036 | 2001036 false 5.3721237819e+07

GroupBy

func (df *DataFrame) GroupBy(by ...string) (GroupBy, error)

GroupBy groups selected columns in a DataFrame object and returns a GroupBy object.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), nil)
if err != nil {
fmt.Println(err)
}

filtered, err := df.LocRows([]interface{}{0}, []interface{}{1}, []interface{}{2}, []interface{}{3}, []interface{}{4})
if err != nil {
fmt.Println(err)
}

filtered.Print()
fmt.Println("")

gb, err := filtered.GroupBy("hazardous")
if err != nil {
fmt.Println(err)
}

res, err := gb.Agg([]string{"relative_velocity"}, gambas.Mean)
if err != nil {
fmt.Println(err)
}

res.Print()
     |    id         name                   est_diameter_min    est_diameter_max    relative_velocity    miss_distance            orbiting_body    sentry_object    absolute_magnitude    hazardous    
0 | 2162635 162635 (2000 SS164) 1.1982708007 2.6794149658 13569.2492241812 5.483974408284605e+07 Earth false 16.73 false
1 | 2277475 277475 (2005 WK4) 0.2658 0.5943468684 73588.7266634981 6.143812652395093e+07 Earth false 20 true
2 | 2512244 512244 (2015 YE18) 0.7220295577 1.6145071727 114258.6921290512 4.979872494045679e+07 Earth false 17.83 false
3 | 3596030 (2012 BV13) 0.096506147 0.2157943048 24764.3031380016 2.543497272075825e+07 Earth false 22.2 false
4 | 3667127 (2014 GE35) 0.2550086879 0.5702167609 42737.7337647264 4.627556700130072e+07 Earth false 20.09 true

hazardous | hazardous relative_velocity
false | false 50864.081
true | true 58163.23