commit fdc6c1a3faedc2c70a2e367130b76af94a196e3a Author: qwjyh Date: Sun Apr 13 23:28:51 2025 +0900 init with basic distance calc diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..07b5002 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,82 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.4" +manifest_format = "2.0" +project_hash = "0d936e24ff57e10f312c576b6073d374c23d76ee" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.ProgressMeter]] +deps = ["Distributed", "Printf"] +git-tree-sha1 = "13c5103482a8ed1536a54c08d0e742ae3dca2d42" +uuid = "92933f4c-e287-5a05-a399-4b506db050ca" +version = "1.10.4" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + + [deps.Statistics.weakdeps] + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..4458119 --- /dev/null +++ b/Project.toml @@ -0,0 +1,21 @@ +name = "MethodSimilarity" +uuid = "a653e358-0bbe-4a80-8f0e-568b83ef5641" +authors = ["qwjyh "] +version = "0.1.0" + +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[compat] +LinearAlgebra = "1.11.0" +ProgressMeter = "1.10.4" +Statistics = "1.11.1" +Test = "1.11.0" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/src/MethodSimilarity.jl b/src/MethodSimilarity.jl new file mode 100644 index 0000000..9400c6c --- /dev/null +++ b/src/MethodSimilarity.jl @@ -0,0 +1,17 @@ +module MethodSimilarity + +using Statistics +using LinearAlgebra +using ProgressMeter + +include("Utils.jl") +using .Utils + +include("method_info.jl") +include("distance.jl") + +include("analyze.jl") + +greet() = print("Hello World!") + +end # module MethodSimilarity diff --git a/src/Utils.jl b/src/Utils.jl new file mode 100644 index 0000000..6fa42a7 --- /dev/null +++ b/src/Utils.jl @@ -0,0 +1,7 @@ +module Utils + +export Choose + +include("utils/choose.jl") + +end diff --git a/src/analyze.jl b/src/analyze.jl new file mode 100644 index 0000000..e65eae1 --- /dev/null +++ b/src/analyze.jl @@ -0,0 +1,21 @@ +function main() + all_methods = Method[] + for base_prop_sym in propertynames(Base) + base_prop = getproperty(Base, base_prop_sym) + if !(base_prop isa Function) + continue + end + if !Base.isexported(Base, base_prop_sym) && !Base.ispublic(Base, base_prop_sym) + continue + end + + push!(all_methods, methods(base_prop)...) + end + num_methods = length(all_methods) + @info "number of methods: $(num_methods)" + dists = zeros((num_methods, num_methods)) + @showprogress Threads.@threads for (i, j) in Choose(2, num_methods) |> collect + dists[i, j] = distance(all_methods[i], all_methods[j]) + end + dists +end diff --git a/src/distance.jl b/src/distance.jl new file mode 100644 index 0000000..5efebd3 --- /dev/null +++ b/src/distance.jl @@ -0,0 +1,42 @@ +function distance_seqs(dist_func, x, y) + if length(x) > length(y) + x, y = y, x + end + diffs = minimum(Choose(length(x), length(y))) do y_ids + sum(enumerate(y_ids); init=0) do v + xid, yid = v + dist_func(x[xid], y[yid]) + end + end + diffs / length(x) +end + +function distance(x::MethodInfo, y::MethodInfo)::Float64 + arg_types = if isempty(x.sig.types) || isempty(y.sig.types) + 1 + else + distance_seqs(x.sig.types, y.sig.types) do x_type, y_type + x_type == y_type + end + end + arg_names = if isempty(x.args) || isempty(y.args) + 1 + else + distance_seqs(x.args, y.args) do x_arg, y_arg + x_arg == y_arg + end + end + kwargs = if isempty(x.kwargs) || isempty(y.kwargs) + 1 + else + distance_seqs(x.kwargs, y.kwargs) do x_kw, y_kw + x_kw == y_kw + end + end + + mean((arg_types, arg_names, kwargs)) +end + +function distance(x::Method, y::Method) + distance(get_method_info(x), get_method_info(y)) +end diff --git a/src/method_info.jl b/src/method_info.jl new file mode 100644 index 0000000..ee958d6 --- /dev/null +++ b/src/method_info.jl @@ -0,0 +1,40 @@ +struct MethodInfo + "type vars (maybe `TypeVar`s?)" + tv::Vector{Any} + "signature" + sig::Type + "argument names, including the function itself" + args::Vector{Symbol} + "keyword arguments" + kwargs::Vector{Symbol} + "definition location" + file::String + "definition location" + line::Int64 +end + +function get_method_info(m::Method) + tv = Any[] + sig = m.sig + while isa(sig, UnionAll) + push!(tv, sig.var) + sig = sig.body + end + file, line = Base.updated_methodloc(m) + argnames = Base.method_argnames(m) + if length(argnames) >= m.nargs + show_env = Base.ImmutableDict{Symbol,Any}() + for t in tv + show_env = Base.ImmutableDict(show_env, :unionall_env => t) + end + decls = Tuple{String,String}[Base.argtype_decl(show_env, argnames[i], sig, i, m.nargs, m.isva) + for i = 1:m.nargs] + # decls[1] = ("", sprint(Base.show_signature_function, Base.unwrapva(sig.parameters[1]), false, decls[1][1], html, + # context = show_env)) + else + # decls = Tuple{String,String}[("", "") for i = 1:length(sig.parameters::SimpleVector)] + end + # return tv, decls, file, line + return MethodInfo(tv, sig, argnames, Base.kwarg_decl(m), file, line) +end + diff --git a/src/utils/choose.jl b/src/utils/choose.jl new file mode 100644 index 0000000..4e1ccb3 --- /dev/null +++ b/src/utils/choose.jl @@ -0,0 +1,46 @@ +struct Choose{M,N} + function Choose(m::Integer, n::Integer) + if m > n + m, n = n, m + end + new{m,n}() + end +end + +function Base.iterate(::Choose{M,N}) where {M,N} + if M == 0 + return nothing + end + ret = ntuple(identity, M) + ret, ret +end + +function Base.iterate(::Choose{M,N}, state) where {M,N} + if state[1] == N - M + 1 + return nothing + end + for (i, v) in Iterators.reverse(enumerate(state)) + if v != N - M + i + ret = ntuple(M) do j + if j < i + state[j] + else + v + (j - i) + 1 + end + end + return ret, ret + end + end + error("must not reachable") +end + +Base.haslength(::Choose{M,N}) where {M,N} = Base.HasLength() + +function Base.length(::Choose{M,N}) where {M,N} + binomial(N, M) +end + +function Base.eltype(::Choose{M,N}) where {M,N} + NTuple{M, Int64} +end + diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..5a6cdf6 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,7 @@ +using Test +using MethodSimilarity + +@testset "Choose" begin + itr = MethodSimilarity.Choose(2, 3) + @test collect(itr) == [(1, 2), (1, 3), (2, 3)] +end