Skip to main content

Summary Statistics

gambas provides summary statistics functions for Series objects. All statistics functions return a StatsResult object defined per below.

type StatsResult struct {
UsedFunc string
Result float64
Err error
}

UsedFunc denotes what function has been used. Result is the result of the calculation. Err holds any errors that the function has encountered.

The data used in the example neo_v2.csv is NASA's list of Nearest Earth Objects, sourced from Kaggle.

Count

func (s *Series) Count() StatsResult

Count counts the number of non-NaN elements in a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Count()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Count
90836
<nil>

Mean

func (s *Series) Mean() StatsResult

Mean returns the mean of the elements in a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Mean()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Mean
0.127
<nil>

Median

func (s *Series) Median() StatsResult

Median returns the median of the elements in a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Median()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Median
0.048
<nil>

Std

func (s *Series) Std() StatsResult

Std returns the sample standard deviation of the elements in a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Std()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Std
0.299
<nil>

Min

func (s *Series) Min() StatsResult

Min returns the smallest element in a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Min()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Min
0.0006089126
<nil>

Max

func (s *Series) Max() StatsResult

Max returns the largest element is a column.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Max()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Max
37.8926498379
<nil>

Q1

func (s *Series) Q1() StatsResult

Q1 returns the lower quartile (25%) of the elements in a column. This does not include the median during calculation.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Q1()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Q1
0.0192555078
<nil>

Q2

func (s *Series) Q2() StatsResult

Q2 returns the middle quartile (50%) of the elements in a column. This accomplishes the same thing as Median.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Q2()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Q2
0.048
<nil>

Q3

func (s *Series) Q3() StatsResult

Q3 returns the upper quartile (75%) of the elements in a column. This does not include the median during calculation.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res := col1.Q3()
fmt.Println(res.UsedFunc)
fmt.Println(res.Result)
fmt.Println(res.Err)
Q3
0.1434019235
<nil>

Describe

func (s *Series) Describe() ([]StatsResult, error)

Describe runs through the most commonly used statistics functions and prints the output.

df, err := gambas.ReadCsv(filepath.Join(".", "neo_v2.csv"), []string{"id"})
if err != nil {
fmt.Println(err)
}

col1, err := df.LocCol("est_diameter_min")
if err != nil {
fmt.Println(err)
}

res, err := col1.Describe()
if err != nil {
fmt.Println(err)
}
fmt.Println(res)
Count: 90836
Mean: 0.127
Median: 0.048
Std: 0.299
Min: 0.0006089126
Max: 37.8926498379
Q1: 0.0192555078
Q2: 0.048
Q3: 0.1434019235
[{Count 90836 <nil>} {Mean 0.127 <nil>} {Median 0.048 <nil>} {Std 0.299 <nil>} {Min 0.0006089126 <nil>} {Max 37.8926498379 <nil>} {Q1 0.0192555078 <nil>} {Q2 0.048 <nil>} {Q3 0.1434019235 <nil>}]