Commit 1a001009 authored by Claudiu Mihali's avatar Claudiu Mihali
Browse files

Client library latest version

parent 7c72b14d
This diff is collapsed.
age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
Age,Job,Marital,Education,Default,Balance,Housing,Loan,Contact,Day,Month,Duration,Campaign,Pdays,Previous,Poutcome,Y
30,10,1,0,0,1787,0,0,0,19,10,79,1,-1,0,3,0
33,7,1,1,0,4789,1,1,0,11,8,220,1,339,4,0,0
35,4,2,2,0,1350,1,0,0,16,0,185,1,330,1,0,0
......
This source diff could not be displayed because it is too large. You can view the blob instead.
import pandas as pd
import matplotlib.pyplot as plt
# df = pd.read_csv("banknote.csv")
# df = df.sample(frac=1)
# df.to_csv("banknote_s.csv", index = None, header = True, float_format='%g')
# le = LabelEncoder()
# df["job"] = le.fit_transform(df["job"])
# df["marital"] = le.fit_transform(df["marital"])
......@@ -53,7 +57,6 @@ def test(in_file_path, used_features):
# file_path1 = "banknote.csv"
# file_path2 = "banknote_transformed.csv"
# file_path3 = "banknote_noisy.csv"
# used_features =[
# "variance",
......@@ -64,7 +67,6 @@ def test(in_file_path, used_features):
file_path1 = "diabetes.csv"
file_path2 = "diabetes_transformed.csv"
file_path3 = "diabetes_noisy.csv"
used_features =[
"Pregnancies",
......
......@@ -10,6 +10,8 @@ from sklearn.ensemble._forest import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble._weight_boosting import AdaBoostClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing._data import StandardScaler
# le = LabelEncoder()
# df["job"] = le.fit_transform(df["job"])
......@@ -28,18 +30,15 @@ from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
def test(in_file_path, used_features, output_feature):
df = pd.read_csv(in_file_path)
#df[["age", "balance", "day", "campaign", "pdays", "previous"]] = df[["age", "balance", "day", "campaign", "pdays", "previous"]].round(0)
#df[["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "Age"]] = df[["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "Age"]].round(0)
#print(df.head())
xx = df[used_features + [output_feature]]
xx[used_features] = xx[used_features].round(0)
print(xx.head())
print(xx.describe())
print(xx.nunique())
sc = StandardScaler()
df[used_features] = sc.fit_transform(df[used_features])
y = df[output_feature]
x = df.drop([output_feature], axis=1)[used_features]
......@@ -58,53 +57,55 @@ def test(in_file_path, used_features, output_feature):
for model in models:
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print("{} {}:\nNumber of mislabeled points out of a total {} points : {}, performance {:05.2f}%\n"
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("{} {}:\n {}\n Accuracy = {:5.4f}\n Precision = {:5.4f}\n Recall = {:5.4f}"
.format(
type(model).__name__,
in_file_path,
x_test.shape[0],
(y_test != y_pred).sum(),
100*(1-(y_test != y_pred).sum()/x_test.shape[0])
(tn, fp, fn, tp),
(tp+tn)/(tp+tn+fp+fn),
tp/(tp+fp),
tp/(tp+fn)
))
# file_path1 = "bank_labeled.csv"
# file_path2 = "bank_transformed.csv"
# file_path2 = "bank_labeled_fpga.csv"
# used_features =[
# "age",
# #"job",
# #"marital",
# #"education",
# #"default",
# "balance",
# #"housing",
# #"loan",
# #"contact",
# "day",
# #"month",
# #"duration",
# "campaign",
# "pdays",
# "previous",
# #"poutcome"
# "Age",
# "Job",
# "Marital",
# "Education",
# "Default",
# "Balance",
# "Housing",
# "Loan",
# "Contact",
# "Day",
# "Month",
# "Duration",
# "Campaign",
# "Pdays",
# "Previous",
# "Poutcome"
# ]
# output_feature = "y"
# output_feature = "Y"
# file_path1 = "banknote.csv"
# file_path2 = "banknote_transformed.csv"
# file_path1 = "banknote_s.csv"
# file_path2 = "banknote_s_fpga.csv"
# used_features =[
# "variance",
# "skewness",
# "curtosis",
# "entropy"
# "Variance",
# "Skewness",
# "Curtosis",
# "Entropy"
# ]
# output_feature = "class"
# output_feature = "Class"
file_path1 = "diabetes.csv"
file_path2 = "diabetes_transformed.csv"
file_path2 = "diabetes_fpga.csv"
used_features =[
"Pregnancies",
......
......@@ -2,6 +2,7 @@
package ops
import (
"encoding/binary"
"errors"
"fmt"
"math"
......@@ -15,6 +16,7 @@ const (
idxLen = 2
maxKLen = 8 - idxLen
perturbedGroupSize = 3
rotationMatrixKey = "rotmat"
)
type keyError struct {
......@@ -121,26 +123,21 @@ func (c Client) GetWithCheckpoint(key []byte, tokenBucketIdx int, tokensEachTick
MaxBurstSize: [2]byte{byte(maxBurstSize & 0xFF), byte((maxBurstSize >> 8) & 0xFF)}})
rh := internal.ValueResHandler{}
err := c.conn.Send(gOp, &rh)
if err != nil {
return nil, err
}
pRes := gOp.Result
if pRes == nil {
return nil, nil
}
return pRes, nil
return gOp.Result, nil
}
func (c Client) GetPerturbed(key [perturbedGroupSize][]byte) ([][]byte, error) {
rqs := make([]*internal.Operation, perturbedGroupSize)
func (c Client) GetPerturbed(key [][]byte) ([][]byte, error) {
if len(key)%perturbedGroupSize != 0 {
return nil, fmt.Errorf("Error GetPerturbed: the number of keys should be multiple of perturbedGroupSize.")
}
for i := 0; i < perturbedGroupSize; i++ {
rqs := make([]*internal.Operation, len(key))
for i := 0; i < len(key); i++ {
if len(key[i]) > maxKLen {
return nil, newKeyError(maxKLen)
}
......@@ -159,12 +156,11 @@ func (c Client) GetPerturbed(key [perturbedGroupSize][]byte) ([][]byte, error) {
return nil, err
}
results := make([][]byte, perturbedGroupSize)
results := make([][]byte, len(key))
for i, r := range rqs {
if r.Result == nil {
return nil, fmt.Errorf("Corrupted packet of key %x", r.Key)
}
results[i] = r.Result
}
......@@ -225,44 +221,18 @@ func (c Client) GetBulkN(keys [][]byte, getCondNo int, getNo int, n int) ([][]by
return results, nil
}
func (c Client) GetBulk(keys [][]byte, getCondNo int, getNo int) ([][]byte, error) {
if getCondNo%perturbedGroupSize != 0 {
return nil, fmt.Errorf("Error GetBulk: getCondNo should be multiple of perturbedGroupSize.")
}
rqs := make([]*internal.Operation, getCondNo+getNo)
for i := 0; i < getCondNo+getNo; i++ {
initKey := make([]byte, len(keys[i])+idxLen)
copy(initKey[idxLen:], keys[i])
if i < getCondNo {
value := []byte{0xFF}
rqs[i] = internal.NewGetCondOp(initKey, value)
} else {
rqs[i] = internal.NewGetOp(initKey)
}
}
rh := internal.ValueResHandler{}
err := c.conn.SendBulk(rqs, &rh)
if err != nil {
return nil, err
}
results := make([][]byte, getCondNo+getNo)
for i, r := range rqs {
if r.Result == nil {
return nil, fmt.Errorf("Corrupted packet of key %x", r.Key)
func (c Client) SetRotationMatrix(matrix [][]float64) error {
matrixBytes := make([]byte, 8*len(matrix)*len(matrix[0]))
for i := 0; i < len(matrix[0]); i++ {
for j := 0; j < len(matrix); j++ {
binary.LittleEndian.PutUint64(matrixBytes[(i*len(matrix)+j)*8:(i*len(matrix)+j)*8+8], math.Float64bits(matrix[j][i]))
}
results[i] = r.Result
}
return results, nil
return c.Set([]byte(rotationMatrixKey), matrixBytes)
}
func (c Client) GetRotationMatrix(key []byte) error {
if len(key) > maxKLen {
return newKeyError(maxKLen)
}
func (c Client) GetRotationMatrix() error {
key := []byte(rotationMatrixKey)
initKey := make([]byte, len(key)+idxLen)
copy(initKey[idxLen:], key)
......@@ -271,7 +241,6 @@ func (c Client) GetRotationMatrix(key []byte) error {
op := internal.NewGetCondOp(initKey, value)
//rh := internal.NoResHandler{}
rh := internal.ValueResHandler{}
return c.conn.Send(op, &rh)
......
package ops
import (
//"encoding/binary"
"math"
//"os"
"multes_client_library_priv/internal"
//"gitlab.software.imdea.org/fpga/multes/sw/client-library/ops/parquet"
//"gitlab.software.imdea.org/fpga/multes/sw/client-library/ops/parquet/transformable"
)
// BatchSet performs several high-level sets in a single batch on network level
......
......@@ -20,7 +20,7 @@ type Composer struct {
bytesWritten int
}
func NewComposer(pageValues [][]byte, parquetFilePath string, parquetSchema ParquetSchema) (*Composer, error) {
func NewComposer(pageValues [][]byte, parquetFilePath string, parquetSchema Schema) (*Composer, error) {
c := new(Composer)
var err error
......
......@@ -7,14 +7,10 @@ import (
"github.com/apache/thrift/lib/go/thrift"
//"github.com/xitongsys/parquet-go/Compress"
"github.com/xitongsys/parquet-go/Layout"
"github.com/xitongsys/parquet-go/ParquetFile"
"github.com/xitongsys/parquet-go/ParquetReader"
"github.com/xitongsys/parquet-go/parquet"
//"os"
//"strconv"
)
// var ok = true
......@@ -28,7 +24,7 @@ type Divider struct {
pageValues [][]byte
}
func NewDivider(parquetFilePath string, parquetSchema ParquetSchema, minValueSize int, threadsNo int) (*Divider, error) {
func NewDivider(parquetFilePath string, parquetSchema Schema, minValueSize int, threadsNo int) (*Divider, error) {
d := new(Divider)
var err error
......
......@@ -2,8 +2,9 @@ package parquet
import "fmt"
type ParquetSchema interface {
type Schema interface {
GetSchemaObjectReference() interface{}
GetColumnNames() []string
}
type Air struct {
......@@ -27,6 +28,12 @@ func (obj Air) GetSchemaObjectReference() interface{} {
return &obj
}
func (obj Air) GetColumnNames() []string {
return []string{"ActivityPeriod", "OperatingAirline", "OperatingAirlineIATACode", "PublishedAirline", "PublishedAirlineIATACode", "GEOSummary",
"GEORegion", "LandingAircraftType", "AircraftBodyType", "AircraftManufacturer", "AircraftModel", "AircraftVersion", "LandingCount",
"TotalLandedWeight"}
}
type BankClientMarketingData struct {
Age *float64 `parquet:"name=Age, type=DOUBLE, repetitiontype=OPTIONAL"`
Job *float64 `parquet:"name=Job, type=DOUBLE, repetitiontype=OPTIONAL"`
......@@ -51,6 +58,11 @@ func (obj BankClientMarketingData) GetSchemaObjectReference() interface{} {
return &obj
}
func (obj BankClientMarketingData) GetColumnNames() []string {
return []string{"Age", "Job", "Marital", "Education", "Default", "Balance", "Housing", "Loan", "Contact", "Day", "Month", "Duration", "Campaign",
"Pdays", "Previous", "Poutcome", "Y"}
}
func (obj BankClientMarketingData) String() string {
return fmt.Sprintf("BankClientMarketingData{Age:%f, Job:%f, Marital:%f, Education:%f, Default:%f, Balance:%f, Housing:%f, Loan:%f, "+
"Contact:%f, Day:%f, Month:%f, Duration:%f, Campaign:%f, Pdays:%f, Previous:%f, Poutcome:%f, Y:%f}", *obj.Age, *obj.Job, *obj.Marital,
......@@ -74,8 +86,33 @@ func (obj PimaIndiansDiabetesData) GetSchemaObjectReference() interface{} {
return &obj
}
func (obj PimaIndiansDiabetesData) GetColumnNames() []string {
return []string{"Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"}
}
func (obj PimaIndiansDiabetesData) String() string {
return fmt.Sprintf("PimaIndiansDiabetesData{Pregnancies:%f, Glucose:%f, BloodPressure:%f, SkinThickness:%f, Insulin:%f, BMI:%f, DiabetesPedigreeFunction:%f, Age:%f, "+
"Outcome:%f}", *obj.Pregnancies, *obj.Glucose, *obj.BloodPressure, *obj.SkinThickness, *obj.Insulin, *obj.BMI, *obj.DiabetesPedigreeFunction, *obj.Age,
*obj.Outcome)
return fmt.Sprintf("PimaIndiansDiabetesData{Pregnancies:%f, Glucose:%f, BloodPressure:%f, SkinThickness:%f, Insulin:%f, BMI:%f, "+
"DiabetesPedigreeFunction:%f, Age:%f, Outcome:%f}", *obj.Pregnancies, *obj.Glucose, *obj.BloodPressure, *obj.SkinThickness, *obj.Insulin,
*obj.BMI, *obj.DiabetesPedigreeFunction, *obj.Age, *obj.Outcome)
}
type BanknoteData struct {
Variance *float64 `parquet:"name=Variance, type=DOUBLE, repetitiontype=OPTIONAL"`
Skewness *float64 `parquet:"name=Skewness, type=DOUBLE, repetitiontype=OPTIONAL"`
Curtosis *float64 `parquet:"name=Curtosis, type=DOUBLE, repetitiontype=OPTIONAL"`
Entropy *float64 `parquet:"name=Entropy, type=DOUBLE, repetitiontype=OPTIONAL"`
Class *float64 `parquet:"name=Class, type=DOUBLE, repetitiontype=OPTIONAL"`
}
func (obj BanknoteData) GetSchemaObjectReference() interface{} {
return &obj
}
func (obj BanknoteData) GetColumnNames() []string {
return []string{"Variance", "Skewness", "Curtosis", "Entropy", "Class"}
}
func (obj BanknoteData) String() string {
return fmt.Sprintf("BanknoteData{Variance:%f, Skewness:%f, Curtosis:%f, Entropy:%f, Class:%f}", *obj.Variance, *obj.Skewness, *obj.Curtosis,
*obj.Entropy, *obj.Class)
}
package transformable
// import (
// "strconv"
// )
type Air struct {
ActivityPeriod *int64 `parquet:"name=ActivityPeriod, type=INT64, repetitiontype=OPTIONAL"`
OperatingAirline *string `parquet:"name=OperatingAirline, type=UTF8, repetitiontype=OPTIONAL"`
OperatingAirlineIATACode *string `parquet:"name=OperatingAirlineIATACode, type=UTF8, repetitiontype=OPTIONAL"`
PublishedAirline *string `parquet:"name=PublishedAirline, type=UTF8, repetitiontype=OPTIONAL"`
PublishedAirlineIATACode *string `parquet:"name=PublishedAirlineIATACode, type=UTF8, repetitiontype=OPTIONAL"`
GEOSummary *string `parquet:"name=GEOSummary, type=UTF8, repetitiontype=OPTIONAL"`
GEORegion *string `parquet:"name=GEORegion, type=UTF8, repetitiontype=OPTIONAL"`
LandingAircraftType *string `parquet:"name=LandingAircraftType, type=UTF8, repetitiontype=OPTIONAL"`
AircraftBodyType *string `parquet:"name=AircraftBodyType, type=UTF8, repetitiontype=OPTIONAL"`
AircraftManufacturer *string `parquet:"name=AircraftManufacturer, type=UTF8, repetitiontype=OPTIONAL"`
AircraftModel *string `parquet:"name=AircraftModel, type=UTF8, repetitiontype=OPTIONAL"`
AircraftVersion *string `parquet:"name=AircraftVersion, type=UTF8, repetitiontype=OPTIONAL"`
LandingCount *int64 `parquet:"name=LandingCount, type=INT64, repetitiontype=OPTIONAL"`
TotalLandedWeight *int64 `parquet:"name=TotalLandedWeight, type=INT64, repetitiontype=OPTIONAL"`
}
func (air Air) Convert(values []string) Transformable {
a := Air{}
// a, _ := strconv.ParseInt(values[0], 10, 64)
// w, _ := strconv.ParseInt(values[10], 10, 64)
// m, _ := strconv.ParseFloat(values[11], 64)
// a.Activity = a
// a.Airline = values[1]
// a.AirlineCode = values[2]
// a.Plubished = values[3]
// a.PlubishedCode = values[4]
// a.GEO = values[5]
// a.Region = values[6]
// a.ActivityCode = values[7]
// a.CargoCode = values[8]
// a.CargoType = values[9]
// a.CargoWeight = w
// a.CargoMetric = m
return a
}
package transformable
import (
"strconv"
)
type Fly struct {
Activity int64 `parquet:"name=Activity, type=INT64"`
Airline string `parquet:"name=Airline, type=UTF8"`
AirlineCode string `parquet:"name=AirlineCode, type=UTF8"`
Plubished string `parquet:"name=Plubished, type=UTF8"`
PlubishedCode string `parquet:"name=PlubishedCode, type=UTF8"`
GEO string `parquet:"name=GEO, type=UTF8"`
Region string `parquet:"name=Region, type=UTF8"`
ActivityCode string `parquet:"name=ActivityCode, type=UTF8"`
CargoCode string `parquet:"name=CargoCode, type=UTF8"`
CargoType string `parquet:"name=CargoType, type=UTF8"`
CargoWeight int64 `parquet:"name=CargoWeight, type=INT64"`
CargoMetric float64 `parquet:"name=CargoMetric, type=DOUBLE"`
}
func (self Fly) Convert(values []string) Transformable {
f := Fly{}
a, _ := strconv.ParseInt(values[0], 10, 64)
w, _ := strconv.ParseInt(values[10], 10, 64)
m, _ := strconv.ParseFloat(values[11], 64)
f.Activity = a
f.Airline = values[1]
f.AirlineCode = values[2]
f.Plubished = values[3]
f.PlubishedCode = values[4]
f.GEO = values[5]
f.Region = values[6]
f.ActivityCode = values[7]
f.CargoCode = values[8]
f.CargoType = values[9]
f.CargoWeight = w
f.CargoMetric = m
return f
}
package transformable
import (
"strconv"
)
type PoliceReport struct {
IncidntNum string `parquet:"name=IncidntNum, type=UTF8"`
Category string `parquet:"name=Category, type=UTF8"`
Descript string `parquet:"name=Descript, type=UTF8"`
Dayofweek string `parquet:"name=DayOfWeek, type=UTF8"`
Date string `parquet:"name=Date, type=UTF8"`
Time string `parquet:"name=Time, type=UTF8"`
Pddistrict string `parquet:"name=PdDistrict, type=UTF8"`
Resolution string `parquet:"name=Resolution, type=UTF8"`
Address string `parquet:"name=Address, type=UTF8"`
X int64 `parquet:"name=X, type=INT64"`
Y int64 `parquet:"name=Y, type=INT64"`
Location string `parquet:"name=Location, type=UTF8"`
Pdid int64 `parquet:"name=PdId, type=INT64"`
}
func (self PoliceReport) Convert(values []string) Transformable {
pol := PoliceReport{}
x, _ := strconv.ParseInt(values[9], 10, 64)
y, _ := strconv.ParseInt(values[10], 10, 64)
p, _ := strconv.ParseInt(values[12], 10, 64)
pol.IncidntNum = values[0]
pol.Category = values[1]
pol.Descript = values[2]
pol.Dayofweek = values[3]
pol.Date = values[4]
pol.Time = values[5]
pol.Pddistrict = values[6]
pol.Resolution = values[7]
pol.Address = values[8]
pol.X = x
pol.Y = y
pol.Location = values[11]
pol.Pdid = p
return pol
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment