More information: https://docs.julialang.org/en/v1/
A = [1 2 ; 3 4] ; display(A)
B = [1.0,2.0] ; display(B)
C = A[1,2] ; C = A[:,2]
D = A * B
E = B' * B
F = log.(A)
tuple = (1,2,3) ;
2×2 Array{Int64,2}: 1 2 3 4
2-element Array{Float64,1}: 1.0 2.0
f(x) = x ^ 2
@show f(2)
# Equivalent:
function f(x)
y = x ^ 2
return y
end
# Keyword arguments (with defaults)
g(x; y = 1) = x + y
@show g(2)
# Generates error
f([2,3])
f(2) = 4 g(2) = 3
MethodError: no method matching ^(::Array{Int64,1}, ::Int64) Closest candidates are: ^(!Matched::Float16, ::Integer) at math.jl:862 ^(!Matched::Regex, ::Integer) at regex.jl:712 ^(!Matched::Missing, ::Integer) at missing.jl:151 ... Stacktrace: [1] macro expansion at .\none:0 [inlined] [2] literal_pow at .\none:0 [inlined] [3] f(::Array{Int64,1}) at .\In[2]:6 [4] top-level scope at In[2]:13
f(x::Array{T,1} where T<: Real) = x .^ 2
# Now no error
@show f([2.0,3.0])
# Note: f([2 3]) would give error, as is 2d
# Alternative: Dot notation
@show f.([2.0,3.0]) ;
f([2.0, 3.0]) = [4.0, 9.0] f.([2.0, 3.0]) = [4.0, 9.0]
Array{Float64,1}
is subtype of Array{Real,1}
# Check whether subtype
@show Int32 <: Integer
@show Integer <: Real
@show Array <: Real
Int32 <: Integer = true Integer <: Real = true Array <: Real = false
false
using CSV,DataFrames
@show example_data = CSV.read("example.csv")
using Distributions
d = Normal()
@show cdf(d,0.0)
# Add functionality
import Base:+
function +(d1::Normal,d2::Normal)
return Normal(mean(d1)+mean(d2),sqrt(var(d1)+var(d2)))
end
@show dist = Normal() + Normal(-1,2) ;
example_data = CSV.read("example.csv") = 3×2 DataFrame │ Row │ var1 │ var2 │ │ │ Float64⍰ │ Int64 │ ├─────┼──────────┼───────┤ │ 1 │ 1.2 │ 1 │ │ 2 │ 3.2 │ 1 │ │ 3 │ missing │ 2 │ cdf(d, 0.0) = 0.5 dist = Normal() + Normal(-1, 2) = Normal{Float64}(μ=-1.0, σ=2.23606797749979)
# Composite type
struct ShachamParameters
β::Array{Real,1}
c::Float64
end
pars = ShachamParameters([1,2],0.2)
@show pars.β
function logLik(p::ShachamParameters,d::DataFrame)
# DO CALCULATIONS
end ;
pars.β = Real[1, 2]
f_matmul(x,y) = exp.(x)' * exp.(y)
f_dot(x,y) = sum( exp.(x) .* exp.(y) )
using Strided
f_dot_strided(x,y) = @strided sum( exp.(x) .* exp.(y))
function f_loop(x,y)
out = 0.0
for ii = 1:length(x)
out += exp(x[ii])*exp(y[ii])
end
out
end
function f_loop_multithread(x,y)
out = zeros(length(x))
Threads.@threads for ii = 1:length(x)
out[ii] = exp(x[ii])*exp(y[ii])
end
return sum(out)
end ;
using BenchmarkTools
using Random ; Random.seed!(42)
x = rand(50_000) ; y = rand(50_000)
f_matmul(x,y) ; f_dot(x,y) ; f_loop(x,y) ; f_loop_multithread(x,y) ; f_dot_strided(x,y) # Initial run
println("Matmul")
@btime f_matmul(x,y)
println("Dot")
@btime f_dot(x,y)
println("Dot strided")
@btime f_dot_strided(x,y)
println("Loop")
@btime f_loop(x,y)
println("Loop multithread")
@btime f_loop_multithread(x,y)
CSV.write("xy.csv",DataFrame(x=x,y=y));
Matmul 892.394 μs (5 allocations: 781.42 KiB) Dot 774.009 μs (3 allocations: 390.72 KiB) Dot strided 252.094 μs (110 allocations: 399.58 KiB) Loop 764.695 μs (1 allocation: 16 bytes) Loop multithread 284.845 μs (33 allocations: 394.19 KiB)
Julia 1.3 | Matlab 2019a | R 3.6.1 | Python 3.6 | |
---|---|---|---|---|
Matmul | 829 | 361 | 6,070 | 802 |
Dot | 745 | 360 | 5,761 | 1,129 |
Loop | 747 | 1,100 | 11,073 | 88,788 |
Dot Strided | 251 | --- | --- | --- |
Loop Multithreaded | 280 | --- | --- | --- |
Translate human code into efficient machine code
# Julia code
function f()
x = 0
x += 2
x *= x
x += 2
return x
end
f()
# Assembly code (close to machine code)
@code_native syntax = :intel f()
.text ; ┌ @ In[9]:3 within `f' push rbp mov rbp, rsp ; │ @ In[9]:7 within `f' mov eax, 6 pop rbp ret nop dword ptr [rax + rax] ; └
f(x) = x^3
@time f(3)
@time f(4) ;
0.003183 seconds (966 allocations: 53.732 KiB) 0.000001 seconds (4 allocations: 160 bytes)
Static compilation (Fortran/C): Compilation before running any code
Interpreted (R/Matlab/Python): No compilation, code calls subroutines
p_bad(p) = p < 0 ? 0 : p
p_stable(p) = p <0 ? zero(p) : p
function outer_fun(p_fun)
p = rand(Normal(),1000)
return sum([p_fun(p[ii]) for ii in eachindex(p)])
end
outer_fun(p_bad) ; outer_fun(p_stable)
Random.seed!(42)
@btime outer_fun(p_bad)
Random.seed!(42)
@btime outer_fun(p_stable) ;
44.169 μs (1443 allocations: 46.33 KiB) 10.216 μs (4 allocations: 15.92 KiB)
function f_bad(N)
x = 1
for ii =1:N
x /= 2
end
return x
end
function f_stable(N)
x = 1.0
for ii =1:N
x /= 2
end
return x
end
f_bad(10) ; f_stable(10)
Random.seed!(42)
@btime f_bad(10)
Random.seed!(42)
@btime f_stable(10)
6.610 ns (0 allocations: 0 bytes) 1.201 ns (0 allocations: 0 bytes)
0.0009765625
@code_warntype f_bad(10)
Variables #self#::Core.Compiler.Const(f_bad, false) N::Int64 x::Union{Float64, Int64} @_4::Union{Nothing, Tuple{Int64,Int64}} ii::Int64 Body::Union{Float64, Int64} 1 ─ (x = 1) │ %2 = (1:N)::Core.Compiler.PartialStruct(UnitRange{Int64}, Any[Core.Compiler.Const(1, false), Int64]) │ (@_4 = Base.iterate(%2)) │ %4 = (@_4 === nothing)::Bool │ %5 = Base.not_int(%4)::Bool └── goto #4 if not %5 2 ┄ %7 = @_4::Tuple{Int64,Int64}::Tuple{Int64,Int64} │ (ii = Core.getfield(%7, 1)) │ %9 = Core.getfield(%7, 2)::Int64 │ (x = x / 2) │ (@_4 = Base.iterate(%2, %9)) │ %12 = (@_4 === nothing)::Bool │ %13 = Base.not_int(%12)::Bool └── goto #4 if not %13 3 ─ goto #2 4 ┄ return x
Nested fixed point algorithm: Iterate over $\delta_{jm}$ until observed equals model implied market share
$$S_{jm} = s_{jm}(\delta_m) \forall j,m$$where $\delta_m$ is the mean utitility given by
$$\delta_m = x_{jm} \beta - \alpha p_{jm} + \xi_{jm} $$Market share: Monte Carlo integration over random coefficient $\alpha_i = \alpha + \nu_i \sigma_{\alpha} $
$$ s_{jm}(\delta_{jm}) = \frac{1}{n}\sum_i^n \frac{\exp\left( \delta_{jm} + \sigma_{\alpha} \nu_i\right)}{1 + \sum_{h \in J_m} \exp \left( \delta_{hm} + \sigma_{\alpha} \nu_i \right)} $$"Tricky" part is in calculating the following sum per market share and per simulation draw:
$$ \sum_{h \in J_m} \exp \left( \delta_{hm} + \sigma_{\alpha} \nu_i \right) $$# Looped Julia version
function fillsumshare!(sumshare,numer,d)
# jj: simulation draws
# ii: markets
# tt: products within market
@inbounds Threads.@threads for jj = 1:size(numer,2)
for ii = 1:maximum(d.IDmkt),tt = d.T[ii,1]:d.T[ii,2]
sumshare[ii,jj] += numer[tt,jj]
end
end
end
fillsumshare! (generic function with 1 method)
include("functions_setup.jl")
include("functions_BLP.jl")
d = prep_data()
startval = get_startval()
beta = startval[1:end-4]
sigma = startval[end-3:end]
BLPmom(beta,sigma,d)
@btime BLPmom(beta,sigma,d)
1.382 s (9518988 allocations: 264.24 MiB)
1×1 Array{Float64,2}: 137502.32455933973
with $\varepsilon_i$ being drawn from a logistic distribution.
We estimate $\beta$ using observations $\mathbf{x}_i$ and $y_i$.
using Distributions
# Generate data
function gen_data(β,n)
X = [ones(Float64,n) rand(Chisq(10),n)]
e = rand(Logistic(),n)
y = [X[i,:]'*β > e[i] for i in eachindex(e)]
return y,X
end
β = [-0.3;-0.1]
using Random ; Random.seed!(132)
y,X = gen_data(β,1000)
(Bool[0, 1, 0, 0, 0, 0, 0, 0, 0, 1 … 1, 1, 1, 1, 0, 0, 0, 0, 0, 0], [1.0 10.679635923514951; 1.0 4.817428373056074; … ; 1.0 3.4148723728709314; 1.0 10.965797991953453])
# Likelihood function
p(β,x) = 1.0 / (1.0 + exp(-x*β)) # logit probability
# = cdf(Logistic(),x*β) can also be used, but is slower
function nll(β::Array{T,1},y,X) where T<:Real
ll = zeros(T,length(y))
for ii in eachindex(y)
p_i = p(β,X[ii,:]')
ll[ii] = y[ii]*log(p_i) + (1-y[ii])*log(1.0-p_i)
end
return -mean(ll)
end
nll(β,y,X)
0.5291460618733443
# Gradient
function Δnll!(g,β::Array{T,1},y,X) where T<:Real
s = zeros(Float64,length(y),2)
for ii in eachindex(y)
p_i = p(β,X[ii,:]')
s[ii,:] = y[ii]*(1.0 - p_i)*X[ii,:] - (1-y[ii])*p_i * X[ii,:]
end
g[:] = -mean(s,dims=1)
end
g = zeros(Float64,2)
Δnll!(g,β,y,X)
invalid redefinition of constant g Stacktrace: [1] top-level scope at In[18]:10
# Optimization
using Optim
obj_fun(β) = nll(β,y,X)
grad(g,β) = Δnll!(g,β,y,X)
x0 = rand(2)
res = Array{Any,2}(undef,4,1)
# Use finite differencing for gradient
@btime res[1] = optimize(obj_fun,x0,LBFGS())
# Use own gradient definition
@btime res[2] = optimize(obj_fun,grad,x0,LBFGS())
# Use Automatic differentiation
@btime res[3] = optimize(obj_fun,x0,LBFGS();autodiff=:forward)
# No gradient
@btime res[4] = optimize(obj_fun,x0)
for i = 1:4
println(Optim.minimizer(res[i]))
end
17.866 ms (320926 allocations: 30.56 MiB) 11.570 ms (288504 allocations: 27.13 MiB) 3.901 ms (96547 allocations: 7.10 MiB) 6.562 ms (118337 allocations: 11.27 MiB) [-0.3240504099532686, -0.09409542033967724] [-0.3240504127427367, -0.09409542001711972] [-0.3240504127427371, -0.09409542001711968] [-0.32282299576061224, -0.09424837358873464]
Julia Docs: https://docs.julialang.org/en/v1/
Performance tips: https://docs.julialang.org/en/v1/manual/performance-tips/
Quant Econ: https://julia.quantecon.org/