Verification of dependency constraints algorithm 1 (MapReduce)

Inter-columns corrections algorithm 2

Map1(key,value)

// key : data source name

// value : Source contents //set of tuples

Begin

For all tuple in value do

EmitIntermediate("Xi ;Yij ", "1")// count occurences of Xi ;Yij

End For

End Map1

Reduce1(key,values)

// key : Xi ;Yij

//value : a list of counts

int αi = 0

file f

For all Xi ;Yij in values do

αi += ParseInt(Xi ;Yij )

End for

Emit(αi)

Write(f, αi)

End Reduce1

Map2(key,value)

// key : f

// value : f contents

For all word Xi in value do

EmitIntermediate(Xi, "1") // count occurences of Xij

End For

End Map2

Reduce2(key, values )

//key : Xi

//values : a list of counts

int ßi=0;

Boolean validDF=0

For all v in values do

ßi += ParseInt(Xi) // count occurences of Xi

End for

Emit(ßi)

If ßi =1 then

validDF=1

End if

End Reduce2

Begin inter-columns corrections

Input: DD Data Dictionary S data source, X, Y subsets of

columns from S,

SubCat Dominant subcategory of S

Output: S' the data source with automatic corrections

E2 ← CreateV alidDFs(DDV S;X; Y; SubCat)

S0 ← CorrectionsDFs(S;E2)

End inter-columns corrections

Function CreateValidDFs

Input: DDVS,X,Y,SubCat

Output: E2

// Get from DDVS the concerned categories X and Y

E1= {SELECT *ALL FROM DDVS

WHERE DDVS.CATEGORY = X UNION

SELECT *ALL FROM DDVS

WHERE DDVS.CATEGORY = Y}

// Get from DDVS all correct values (xi,yi)

E2= {SELECT A.Subcat, B.Subcat FROM E1 A, E1 B

WHERE A.PRIMARYKEY=B.FOREIGNKEY}

End Function CreateValidDFs

Function CorrectionsDFs

Input: E2, S

Output: S'

For lj from S (j=1;n) do // lj tuple from S, n (number of tuples)

While (Sx[j]6=DDVSx[l]) AND (l ≤ nl) do

// nl number of tuples of DDVS

If SX[j]= DDV SX[l] then

Sy[j]=DDV Sy[l]

Else

l++

End If

End While

End For

End Function CorrectionsDFs