Utilitaire pour indexer des informations de la blockchain et dessiner des Plots. Focalisé sur la toile de confiance. http://datajune.coinduf.eu/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

135 lines
5.4 KiB

module StatsExporter
using ..WotHistory, ..Config
using ProgressMeter, Serialization, Dates, DataFrames, CSV
"store computed statistics and manage cache"
struct WotStats # structure containing stats about whole history
T::Int # number of days (time intervals)
N::Int # number of ids
members::Vector{Int} # number of active members for each day
was_member::Vector{Int} # number of ids that was members once before day
total_referent::Vector{Int} # number of referent members for each day
ipr::Array{Float64,3} # proportion of referent reached for each day, for each member id (0 for other), for each distance 1:5
avg_pr::Matrix{Float64} # average ipr over N
member_prop_over80percent::Vector{Float64} # proportion of members reaching over 80% of referents for each day
btw_centralities::Matrix{Float64} # betweenness centrality for each day, for each member (other are 0)
"default constructor"
WotStats(T, N) =
# initialize cache buffer
zeros(Int, T), # members
zeros(Int, T), # was_member
zeros(Int, T), # total_referent
zeros(Float64, T, N, 5), # ipr
zeros(Float64, T, 5), # avg_pr
zeros(Float64, T), # member_prop_over80percent
zeros(Float64, T, N), # btw_centralities
"retreive from cache and set wotstats to date"
function load_wotstats!(w::WotSnapshot)::WotStats
@debug "found wotstats cache, loading it"
old_ws = deserialize(WOTSTATS_CACHE_FILE)
T = old_ws.T
N = old_ws.N
if T<length(w) # not at the end
new_ws = WotStats(length(w), w.N)
new_ws.members[1:T] .= old_ws.members
new_ws.was_member[1:T] .= old_ws.was_member
new_ws.total_referent[1:T] .= old_ws.total_referent
new_ws.ipr[1:T,1:N,:] .= old_ws.ipr
new_ws.avg_pr[1:T,:] .= old_ws.avg_pr
new_ws.member_prop_over80percent[1:T] .= old_ws.member_prop_over80percent
new_ws.btw_centralities[1:T,1:N] .= old_ws.btw_centralities
WotHistory.set_current_interval!(w, T+1)
return new_ws
else # already at the end
WotHistory.set_current_interval!(w, T)
return old_ws
@info "no wotstats cache found, computing from beginning"
return WotStats(length(w), w.N)
"compute statistics about wot"
function compute_wotstats()
w = WotSnapshot() # load snapshot
ws = load_wotstats!(w) # load wotstats, if cache found, use it
if w.interval == length(w) # already finished
@info "nothing to be done"
return nothing
@showprogress "computing statistics... " for i in w.interval:length(w) # iterate from current interval to the end
WotHistory.set_current_interval!(w, i)
ws.members[i] = w.members # number of active members
ws.was_member[i] = w.was_member # number of id that was active
ws.total_referent[i] = sum(w.referent.*w.active) # number of referent member
ws.ipr[i,:,:] .= (w.referent_neighborhood .* repeat(w.active, 1, 5)) ./ ws.total_referent[i]
# sum of quality (referent_neighborhood/total_referent) over active nodes divided by number of active nodes (distance one to five)
ws.avg_pr[i,:] .= dropdims( sum( ws.ipr[i,:,:], dims=1), dims=1) ./ ws.members[i]
ws.member_prop_over80percent[i] = sum(ws.ipr[i,:,5] .>= 0.8) ./ ws.members[i] # numbers of members with pr5 >= 0.8 divided by number of members
ws.btw_centralities[i,:] = w.centrality # betweenness centrality
@info "exporting data"
serialize(WOTSTATS_CACHE_FILE, ws)
"join path with API_PATH"
api(path::String) = joinpath(API_PATH, path)
function csv_write(path::String, df, legend)
write(api(path)*".csv.txt", legend)
CSV.write(api(path)*".csv", df)
"exporte au format CSV"
function export_api_files()
w = WotSnapshot()
ws = deserialize(WOTSTATS_CACHE_FILE)
df = DataFrame(
Dates = Date.(unix2datetime.(w.intervals)),
MemberCount = ws.members,
ReferentCount = ws.total_referent,
IdCount = ws.was_member,
DistRuleCompliance = ws.member_prop_over80percent,
AvgRefReached_1 = ws.avg_pr[:,1],
AvgRefReached_2 = ws.avg_pr[:,2],
AvgRefReached_3 = ws.avg_pr[:,3],
AvgRefReached_4 = ws.avg_pr[:,4],
AvgRefReached_5 = ws.avg_pr[:,5],
legend = """
Dates : dates au format YYYY-MM-DD
MemberCount : nombre de comptes membres
ReferentCount : nombre de membres référents
IdCount : total des membres et des anciens membres (nombre d'identités ayant été membre au moins une fois)
DistRuleCompliance : proportion des membres respectant la règle de distance, c'est-à-dire joingnant au moins 80% des membres référents à moins de 5 certifications
AvgRefReached_{1,5} : proportion de membres référents atteints en remontant une chaîne de {1,5} certifications
csv_write("members+distrule", df, legend)
# api_write("daily/prop_referent_reached", ipr) # daily ipr from distance 1 to 5
# api_write("daily/betweenness_centrality", btw_centralities) # betweenness centrality
# api_write("current/proportion_members_reached", ipr[end,:,:]) # current quality of members from distance 1 to 5
# api_write("current/betweenness_centrality", btw_centralities[end,:]) # current betweenness centrality