Writing Data
uCSV.write
supports writing generic datasets as well as writing DataFrames
julia> using uCSV, DataFrames, CodecZlib
julia> df = DataFrame(uCSV.read(GzipDecompressorStream(open(joinpath(dirname(dirname(pathof(uCSV))), "test", "data", "iris.csv.gz"))), header=1));
julia> first(df, 6)
6×6 DataFrames.DataFrame. Omitted printing of 1 columns
│ Row │ Id │ SepalLengthCm │ SepalWidthCm │ PetalLengthCm │ PetalWidthCm │
│ │ Int64 │ Float64 │ Float64 │ Float64 │ Float64 │
├─────┼───────┼───────────────┼──────────────┼───────────────┼──────────────┤
│ 1 │ 1 │ 5.1 │ 3.5 │ 1.4 │ 0.2 │
│ 2 │ 2 │ 4.9 │ 3.0 │ 1.4 │ 0.2 │
│ 3 │ 3 │ 4.7 │ 3.2 │ 1.3 │ 0.2 │
│ 4 │ 4 │ 4.6 │ 3.1 │ 1.5 │ 0.2 │
│ 5 │ 5 │ 5.0 │ 3.6 │ 1.4 │ 0.2 │
│ 6 │ 6 │ 5.4 │ 3.9 │ 1.7 │ 0.4 │
julia> outpath = joinpath(dirname(dirname(pathof(uCSV))), "test", "temp.txt");
julia> uCSV.write(outpath, header = string.(names(df)), data = DataFrames.columns(df))
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
julia> uCSV.write(outpath, df)
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
1,5.1,3.5,1.4,0.2,Iris-setosa
2,4.9,3.0,1.4,0.2,Iris-setosa
3,4.7,3.2,1.3,0.2,Iris-setosa
4,4.6,3.1,1.5,0.2,Iris-setosa
Users can specify delimiters other than ','
julia> using uCSV, DataFrames, CodecZlib
julia> df = DataFrame(uCSV.read(GzipDecompressorStream(open(joinpath(dirname(dirname(pathof(uCSV))), "test", "data", "iris.csv.gz"))), header=1));
julia> outpath = joinpath(dirname(dirname(pathof(uCSV))), "test", "temp.txt");
julia> uCSV.write(outpath, df, delim='\t')
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
1 5.1 3.5 1.4 0.2 Iris-setosa
2 4.9 3.0 1.4 0.2 Iris-setosa
3 4.7 3.2 1.3 0.2 Iris-setosa
4 4.6 3.1 1.5 0.2 Iris-setosa
Quotes can also be requested, and by default they apply only to String
(and Union{String, Missing}
) columns and the header
julia> using uCSV, DataFrames, CodecZlib
julia> df = DataFrame(uCSV.read(GzipDecompressorStream(open(joinpath(dirname(dirname(pathof(uCSV))), "test", "data", "iris.csv.gz"))), header=1));
julia> outpath = joinpath(dirname(dirname(pathof(uCSV))), "test", "temp.txt");
julia> uCSV.write(outpath, df, quotes='"')
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
"Id","SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"
1,5.1,3.5,1.4,0.2,"Iris-setosa"
2,4.9,3.0,1.4,0.2,"Iris-setosa"
3,4.7,3.2,1.3,0.2,"Iris-setosa"
4,4.6,3.1,1.5,0.2,"Iris-setosa"
julia> # columns that are Union{T, Missing} where T <: quotetypes also works
df_with_missings = deepcopy(df);
julia> df_with_missings[6] = convert(Vector{Union{String, Missing}}, df_with_missings[6]);
julia> df_with_missings[6][2:3] .= missing;
julia> uCSV.write(outpath, df_with_missings, quotes='"')
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
"Id","SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"
1,5.1,3.5,1.4,0.2,"Iris-setosa"
2,4.9,3.0,1.4,0.2,"missing"
3,4.7,3.2,1.3,0.2,"missing"
4,4.6,3.1,1.5,0.2,"Iris-setosa"
julia> # but not if the column is ONLY missings
df_with_missings[6] = missings(size(df_with_missings, 1));
julia> uCSV.write(outpath, df_with_missings, quotes='"')
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
"Id","SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"
1,5.1,3.5,1.4,0.2,missing
2,4.9,3.0,1.4,0.2,missing
3,4.7,3.2,1.3,0.2,missing
4,4.6,3.1,1.5,0.2,missing
To quote every field in the dataset or other custom rules, use the quotetypes
argument
julia> using uCSV, DataFrames, CodecZlib
julia> df = DataFrame(uCSV.read(GzipDecompressorStream(open(joinpath(dirname(dirname(pathof(uCSV))), "test", "data", "iris.csv.gz"))), header=1));
julia> outpath = joinpath(dirname(dirname(pathof(uCSV))), "test", "temp.txt");
julia> uCSV.write(outpath, df, quotes='"', quotetypes=Any)
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
"Id","SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"
"1","5.1","3.5","1.4","0.2","Iris-setosa"
"2","4.9","3.0","1.4","0.2","Iris-setosa"
"3","4.7","3.2","1.3","0.2","Iris-setosa"
"4","4.6","3.1","1.5","0.2","Iris-setosa"
julia> uCSV.write(outpath, df, quotes='"', quotetypes=Real)
julia> for line in readlines(open(outpath))[1:5]
println(line)
end
"Id","SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm","Species"
"1","5.1","3.5","1.4","0.2",Iris-setosa
"2","4.9","3.0","1.4","0.2",Iris-setosa
"3","4.7","3.2","1.3","0.2",Iris-setosa
"4","4.6","3.1","1.5","0.2",Iris-setosa