{"id":1406,"date":"2025-02-02T10:46:57","date_gmt":"2025-02-02T01:46:57","guid":{"rendered":"https:\/\/daba-no-heya.com\/?p=1406"},"modified":"2025-02-02T10:46:58","modified_gmt":"2025-02-02T01:46:58","slug":"post-1406","status":"publish","type":"post","link":"https:\/\/daba-no-heya.com\/?p=1406","title":{"rendered":"\u3010Rust\u3011DataFrame\u30e9\u30a4\u30d6\u30e9\u30eaPolars\u3092\u4f7f\u3063\u3066\u307f\u308b"},"content":{"rendered":"\n<div id=\"ez-toc-container\" class=\"ez-toc-v2_0_83 counter-hierarchy ez-toc-counter ez-toc-grey ez-toc-container-direction\">\n<div class=\"ez-toc-title-container\">\n<p class=\"ez-toc-title\" style=\"cursor:inherit\">Table of Contents<\/p>\n<span class=\"ez-toc-title-toggle\"><a href=\"#\" class=\"ez-toc-pull-right ez-toc-btn ez-toc-btn-xs ez-toc-btn-default ez-toc-toggle\" aria-label=\"Toggle Table of Content\"><span class=\"ez-toc-js-icon-con\"><span class=\"\"><span class=\"eztoc-hide\" style=\"display:none;\">Toggle<\/span><span class=\"ez-toc-icon-toggle-span\"><svg style=\"fill: #999;color:#999\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" class=\"list-377408\" width=\"20px\" height=\"20px\" viewBox=\"0 0 24 24\" fill=\"none\"><path d=\"M6 6H4v2h2V6zm14 0H8v2h12V6zM4 11h2v2H4v-2zm16 0H8v2h12v-2zM4 16h2v2H4v-2zm16 0H8v2h12v-2z\" fill=\"currentColor\"><\/path><\/svg><svg style=\"fill: #999;color:#999\" class=\"arrow-unsorted-368013\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\" width=\"10px\" height=\"10px\" viewBox=\"0 0 24 24\" version=\"1.2\" baseProfile=\"tiny\"><path d=\"M18.2 9.3l-6.2-6.3-6.2 6.3c-.2.2-.3.4-.3.7s.1.5.3.7c.2.2.4.3.7.3h11c.3 0 .5-.1.7-.3.2-.2.3-.5.3-.7s-.1-.5-.3-.7zM5.8 14.7l6.2 6.3 6.2-6.3c.2-.2.3-.5.3-.7s-.1-.5-.3-.7c-.2-.2-.4-.3-.7-.3h-11c-.3 0-.5.1-.7.3-.2.2-.3.5-.3.7s.1.5.3.7z\"\/><\/svg><\/span><\/span><\/span><\/a><\/span><\/div>\n<nav><ul class='ez-toc-list ez-toc-list-level-1 ' ><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-1\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#%E5%89%8D%E7%BD%AE%E3%81%8D\" >\u524d\u7f6e\u304d<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-2\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#%E7%92%B0%E5%A2%83%E6%83%85%E5%A0%B1\" >\u74b0\u5883\u60c5\u5831<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-3\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#%E4%BA%8B%E5%89%8D%E6%BA%96%E5%82%99\" >\u4e8b\u524d\u6e96\u5099<\/a><ul class='ez-toc-list-level-3' ><li class='ez-toc-heading-level-3'><a class=\"ez-toc-link ez-toc-heading-4\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#Cargotoml\" >Cargo.toml<\/a><\/li><\/ul><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-5\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#%E3%82%B3%E3%83%BC%E3%83%89\" >\u30b3\u30fc\u30c9<\/a><\/li><li class='ez-toc-page-1 ez-toc-heading-level-2'><a class=\"ez-toc-link ez-toc-heading-6\" href=\"https:\/\/daba-no-heya.com\/?p=1406\/#%E3%83%A1%E3%83%A2\" >\u30e1\u30e2<\/a><\/li><\/ul><\/nav><\/div>\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E5%89%8D%E7%BD%AE%E3%81%8D\"><\/span>\u524d\u7f6e\u304d<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">Rust\u3067DataFrame\u3092\u6271\u3046\u305f\u3081\u306ePolars\u3068\u3044\u3046\u30e9\u30a4\u30d6\u30e9\u30ea\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u30b3\u30a2\u90e8\u5206\u306fRust\u3067\u66f8\u304b\u308c\u3066\u3044\u307e\u3059\u304c\u3001Python\u304b\u3089\u3082\u4f7f\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<br>\u5b9f\u969b\u306bGoogle\u691c\u7d22\u3057\u3066\u307f\u308b\u3068\u3001Rust\u3088\u308a\u3082Python\u306e\u89e3\u8aac\u8a18\u4e8b\u304c\u591a\u304f\u30d2\u30c3\u30c8\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">Rust\u304b\u3089Polars\u3092\u6271\u3046\u969b\u306b\u53c2\u8003\u306b\u3067\u304d\u308b\u60c5\u5831\u304c\u5c11\u306a\u304f\u3066\u8a66\u884c\u932f\u8aa4\u3057\u305f\u306e\u3067\u3001\u500b\u4eba\u7684\u306a\u5099\u5fd8\u9332\u3082\u517c\u306d\u3066\u8a18\u4e8b\u3092\u4f5c\u6210\u3057\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u516c\u5f0f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u306f\u3053\u3061\u3089: <a href=\"https:\/\/docs.pola.rs\/\">https:\/\/docs.pola.rs\/<\/a><\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E7%92%B0%E5%A2%83%E6%83%85%E5%A0%B1\"><\/span>\u74b0\u5883\u60c5\u5831<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>$ cat \/etc\/os-release \nPRETTY_NAME=\"Zorin OS 17.2\"\nNAME=\"Zorin OS\"\nVERSION_ID=\"17\"\nVERSION=\"17.2\"\nVERSION_CODENAME=jammy\nID=zorin\nID_LIKE=\"ubuntu debian\"\nHOME_URL=\"https:\/\/zorin.com\/os\/\"\nSUPPORT_URL=\"https:\/\/help.zorin.com\/\"\nBUG_REPORT_URL=\"https:\/\/zorin.com\/os\/feedback\/\"\nPRIVACY_POLICY_URL=\"https:\/\/zorin.com\/legal\/privacy\/\"\nUBUNTU_CODENAME=jammy<\/code><\/pre>\n\n\n\n<pre class=\"wp-block-code\"><code>$ cargo version\ncargo 1.83.0 (5ffbef321 2024-10-29)<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E4%BA%8B%E5%89%8D%E6%BA%96%E5%82%99\"><\/span>\u4e8b\u524d\u6e96\u5099<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f4f\u6240\u3068\u90f5\u4fbf\u756a\u53f7\u306eCSV\u30c7\u30fc\u30bf\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u308b\u306e\u3067\u3001\u4eca\u56de\u306f\u3053\u308c\u3092\u984c\u6750\u306b\u3057\u305f\u3044\u3068\u601d\u3044\u307e\u3059\u3002<br><a href=\"https:\/\/www.post.japanpost.jp\/zipcode\/dl\/utf-zip.html\">https:\/\/www.post.japanpost.jp\/zipcode\/dl\/utf-zip.html<\/a><\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><code>cargo init<\/code>\u3067\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<br>\u305d\u306e\u5f8c\u3001<code>cargo add<\/code>\u3067polars\u3068clap\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002<br>clap\u306f\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u5f15\u6570\u3092\u6271\u3046\u305f\u3081\u306b\u4f7f\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>$ cargo add polars -F lazy,csv,parquet,strings\n$ cargo add clap -F derive<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u30aa\u30d7\u30b7\u30e7\u30f3\u306a\u3057\u3067add\u3059\u308b\u3068\u4eca\u56de\u4f7f\u3044\u305f\u3044\u6a5f\u80fd\u304c\u5165\u3089\u306a\u3044\u306e\u3067\u3001-F (&#8211;features)\u30aa\u30d7\u30b7\u30e7\u30f3\u3067feature\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"Cargotoml\"><\/span>Cargo.toml<span class=\"ez-toc-section-end\"><\/span><\/h3>\n\n\n\n<pre class=\"wp-block-code\"><code>&#91;package]\nname = \"polars-sample\"\nversion = \"0.1.0\"\nedition = \"2021\"\n\n&#91;dependencies]\nclap = { version = \"4.5.27\", features = &#91;\"derive\"] }\npolars = { version = \"0.46.0\", features = &#91;\"lazy\", \"csv\", \"parquet\", \"strings\"] }<\/code><\/pre>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E3%82%B3%E3%83%BC%E3%83%89\"><\/span>\u30b3\u30fc\u30c9<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">use std::{\n    error::Error,\n    fs::{self, File},\n    path::Path,\n};\n\nuse clap::Parser;\nuse polars::{\n    frame::DataFrame,\n    io::{SerReader, SerWriter},\n    prelude::{\n        col, lit, CsvReadOptions, CsvWriter, DataType, Field, IntoLazy, ParquetWriter, Schema,\n        SortMultipleOptions,\n    },\n};\n\n#[derive(Debug, Parser)]\nstruct Args {\n    #[arg(short, long, default_value = \"utf_ken_all.csv\")]\n    input_filepath: String,\n    #[arg(short, long, default_value = \"Data\")]\n    output_dirname: String,\n}\n\nfn main() -> Result&lt;(), Box&lt;dyn Error>> {\n    let args = Args::parse();\n\n    \/\/CSV\u30d5\u30a1\u30a4\u30eb\u3092\u8aad\u307f\u8fbc\u3093\u3067DataFrame\u3092\u4f5c\u6210\u3059\u308b\n    let schema = Schema::from_iter(vec![\n        Field::new(\"\u5168\u56fd\u5730\u65b9\u516c\u5171\u56e3\u4f53\u30b3\u30fc\u30c9\".into(), DataType::String),\n        Field::new(\"\u65e7\u90f5\u4fbf\u756a\u53f7\".into(), DataType::String),\n        Field::new(\"\u90f5\u4fbf\u756a\u53f7\".into(), DataType::String),\n        Field::new(\"\u90fd\u9053\u5e9c\u770c\u540d(\u30ab\u30bf\u30ab\u30ca)\".into(), DataType::String),\n        Field::new(\"\u5e02\u533a\u753a\u6751\u540d(\u30ab\u30bf\u30ab\u30ca)\".into(), DataType::String),\n        Field::new(\"\u753a\u57df\u540d(\u30ab\u30bf\u30ab\u30ca)\".into(), DataType::String),\n        Field::new(\"\u90fd\u9053\u5e9c\u770c\u540d(\u6f22\u5b57)\".into(), DataType::String),\n        Field::new(\"\u5e02\u533a\u753a\u6751\u540d(\u6f22\u5b57)\".into(), DataType::String),\n        Field::new(\"\u753a\u57df\u540d(\u6f22\u5b57)\".into(), DataType::String),\n        Field::new(\n            \"\u4e00\u753a\u57df\u304c\u4e8c\u4ee5\u4e0a\u306e\u90f5\u4fbf\u756a\u53f7\u3067\u8868\u3055\u308c\u308b\u5834\u5408\u306e\u8868\u793a\".into(),\n            DataType::Int32,\n        ),\n        Field::new(\n            \"\u5c0f\u5b57\u6bce\u306b\u756a\u5730\u304c\u8d77\u756a\u3055\u308c\u3066\u3044\u308b\u753a\u57df\u306e\u8868\u793a\".into(),\n            DataType::Int32,\n        ),\n        Field::new(\"\u4e01\u76ee\u3092\u6709\u3059\u308b\u753a\u57df\u306e\u5834\u5408\u306e\u8868\u793a\".into(), DataType::Int32),\n        Field::new(\n            \"\u4e00\u3064\u306e\u90f5\u4fbf\u756a\u53f7\u3067\u4e8c\u4ee5\u4e0a\u306e\u753a\u57df\u3092\u8868\u3059\u5834\u5408\u306e\u8868\u793a\".into(),\n            DataType::Int32,\n        ),\n        Field::new(\"\u66f4\u65b0\u306e\u8868\u793a\".into(), DataType::Int32),\n        Field::new(\"\u5909\u66f4\u7406\u7531\".into(), DataType::Int32),\n    ]);\n    let df = CsvReadOptions::default()\n        .with_has_header(false)\n        .with_schema(Some(schema.into()))\n        .try_into_reader_with_file_path(Some(args.input_filepath.into()))?\n        .finish()?;\n    println!(\"\u30ec\u30b3\u30fc\u30c9\u6570: {}\", df.shape().0);\n\n    \/\/\u6771\u4eac\u90fd\u306e\u30c7\u30fc\u30bf\u306b\u7d5e\u308a\u8fbc\u3080\n    let df_tokyo = df\n        .clone()\n        .lazy()\n        .filter(col(\"\u90fd\u9053\u5e9c\u770c\u540d(\u6f22\u5b57)\").eq(lit(\"\u6771\u4eac\u90fd\")))\n        .collect()?;\n    println!(\"\u6771\u4eac\u90fd\u306e\u30ec\u30b3\u30fc\u30c9\u6570: {}\", df_tokyo.shape().0);\n\n    \/\/\u90f5\u4fbf\u756a\u53f7\u3092\u524d\u5f8c\u306b\u5206\u5272\u3059\u308b\n    let df_postal_code = df\n        .clone()\n        .lazy()\n        .with_columns([\n            col(\"\u90f5\u4fbf\u756a\u53f7\")\n                .str()\n                .slice(lit(0), lit(3))\n                .alias(\"postal_code_first\"),\n            col(\"\u90f5\u4fbf\u756a\u53f7\")\n                .str()\n                .slice(lit(3), lit(4))\n                .alias(\"postal_code_second\"),\n        ])\n        .collect()?;\n\n    println!(\"\u90f5\u4fbf\u756a\u53f7\u306e\u5206\u5272:\");\n    println!(\n        \"{:?}\",\n        df_postal_code\n            .select([\"postal_code_first\", \"postal_code_second\"])?\n            .head(Some(10))\n    );\n\n    \/\/\u90f5\u4fbf\u756a\u53f7\u306e\u6700\u521d\u306e3\u6841\u3067group by\u3057\u3066\u30ab\u30a6\u30f3\u30c8\u3059\u308b\n    let df_postal_code_count = df_postal_code\n        .clone()\n        .lazy()\n        .group_by([col(\"postal_code_first\")])\n        .agg([col(\"postal_code_first\").count().alias(\"count\")])\n        .sort(\n            [\"count\"],\n            SortMultipleOptions::default().with_order_descending(true),\n        )\n        .collect()?;\n\n    \/\/\u4f5c\u6210\u3057\u305fDataFrame\u3092\u4fdd\u5b58\u3059\u308b\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3092\u4f5c\u6210\u3059\u308b\n    let output_dir = Path::new(&amp;args.output_dirname);\n    if !output_dir.exists() {\n        fs::create_dir_all(&amp;args.output_dirname)?;\n    }\n\n    \/\/\u30aa\u30ea\u30b8\u30ca\u30eb\u306eDataFrame\u306fParquet\u5f62\u5f0f\u3067\u4fdd\u5b58\u3059\u308b\n    let mut output_file_df = File::create(output_dir.join(\"original.parquet\"))?;\n    let mut df = df;\n    ParquetWriter::new(&amp;mut output_file_df).finish(&amp;mut df)?;\n\n    \/\/\u305d\u306e\u4ed6\u306eDataFrame\u306fCSV\u5f62\u5f0f\u3067\u4fdd\u5b58\u3059\u308b\n    let fn_save_as_csv = |filename: &amp;str, df: &amp;mut DataFrame| -> Result&lt;(), Box&lt;dyn Error>> {\n        let mut output_file = File::create(output_dir.join(filename))?;\n        CsvWriter::new(&amp;mut output_file).finish(df)?;\n\n        Ok(())\n    };\n    fn_save_as_csv(\"tokyo.csv\", &amp;mut df_tokyo.clone())?;\n    fn_save_as_csv(\"postal_code_count.csv\", &amp;mut df_postal_code_count.clone())?;\n\n    Ok(())\n}<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u4eca\u56de\u306eCSV\u30d5\u30a1\u30a4\u30eb\u306f\u30d8\u30c3\u30c0\u884c\u304c\u306a\u3044\u305f\u3081\u3001\u30ab\u30e9\u30e0\u540d\u3068\u30c7\u30fc\u30bf\u578b\u306f\u3053\u3061\u3089\u3067\u6307\u5b9a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">Field::new(\"\u5168\u56fd\u5730\u65b9\u516c\u5171\u56e3\u4f53\u30b3\u30fc\u30c9\".into(), DataType::String)<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">0\u304b1\u3057\u304b\u5165\u3063\u3066\u3044\u306a\u3044\u30ab\u30e9\u30e0\u306e\u30c7\u30fc\u30bf\u578b\u306fUInt8\u3068\u304bBoolean\u3067\u3044\u3044\u3068\u601d\u3044\u307e\u3057\u305f\u304c\u3001\u305d\u308c\u3060\u3068\u30a8\u30e9\u30fc\u306b\u306a\u3063\u3066\u3046\u307e\u304f\u3044\u304b\u306a\u304b\u3063\u305f\u305f\u3081\u3001\u304a\u3068\u306a\u3057\u304fInt32\u3092\u4f7f\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f5c\u6210\u3057\u305fSchema\u3092CSV\u30d5\u30a1\u30a4\u30eb\u8aad\u8fbc\u307f\u306e\u969b\u306b\u30d1\u30e9\u30e1\u30fc\u30bf\u3068\u3057\u3066\u6e21\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">let df = CsvReadOptions::default()\n        .with_has_header(false)\n        .with_schema(Some(schema.into()))\n        .try_into_reader_with_file_path(Some(args.input_filepath.into()))?\n        .finish()?;<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u30c7\u30fc\u30bf\u3092\u7d5e\u308a\u8fbc\u3080\u969b\u306b\u306f<code>filter()<\/code>\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">let df_tokyo = df\n        .clone()\n        .lazy()\n        .filter(col(\"\u90fd\u9053\u5e9c\u770c\u540d(\u6f22\u5b57)\").eq(lit(\"\u6771\u4eac\u90fd\")))\n        .collect()?;<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3053\u3053\u3067\u547c\u3073\u51fa\u3057\u3066\u3044\u308b<code>lazy()<\/code>\u306fLazyFrame\u3092\u4f5c\u6210\u3059\u308b\u95a2\u6570\u3067\u3059\u3002<br>LazyFrame\u306f\u9045\u5ef6\u8a55\u4fa1\u3092\u884c\u3046\u30c7\u30fc\u30bf\u30d5\u30ec\u30fc\u30e0\u3067\u3001\u6700\u7d42\u7684\u306b<code>collect()<\/code>\u304c\u5b9f\u884c\u3055\u308c\u308b\u307e\u3067\u5b9f\u969b\u306e\u5909\u63db\u51e6\u7406\u306f\u5b9f\u884c\u3055\u308c\u307e\u305b\u3093\u3002<br>\u9045\u5ef6\u8a55\u4fa1\u3092\u884c\u3046\u3053\u3068\u306b\u3088\u3063\u3066\u3001\u30af\u30a8\u30ea\u306e\u5927\u5e45\u306a\u52b9\u7387\u5316\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u90f5\u4fbf\u756a\u53f7\u3092\u524d\u5f8c\u306b\u5206\u5272\u3057\u307e\u3059\u3002<br>\u30ab\u30e9\u30e0\u306b\u5bfe\u3057\u3066\u4f55\u304b\u64cd\u4f5c\u3057\u305f\u3044\u3068\u304d\u306f<code>with_columns()<\/code>\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<br>\u3053\u3053\u3067\u306f\u3001\u90f5\u4fbf\u756a\u53f7\u30ab\u30e9\u30e0\u306e\u5024\u3092\u6587\u5b57\u5217\u3068\u3057\u3066\u53d6\u308a\u51fa\u3057\u3066\u3001substring\u3092\u53d6\u5f97\u3057\u3066\u3044\u307e\u3059\u3002<br>\u51e6\u7406\u5f8c\u306e\u30c7\u30fc\u30bf\u3092\u683c\u7d0d\u3059\u308b\u30ab\u30e9\u30e0\u540d\u306f<code>alias()<\/code>\u3067\u6307\u5b9a\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">let df_postal_code = df\n        .clone()\n        .lazy()\n        .with_columns([\n            col(\"\u90f5\u4fbf\u756a\u53f7\")\n                .str()\n                .slice(lit(0), lit(3))\n                .alias(\"postal_code_first\"),\n            col(\"\u90f5\u4fbf\u756a\u53f7\")\n                .str()\n                .slice(lit(3), lit(4))\n                .alias(\"postal_code_second\"),\n        ])\n        .collect()?;<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u7279\u5b9a\u306e\u30ab\u30e9\u30e0\u3092\u53d6\u308a\u51fa\u3057\u305f\u3044\u3068\u304d\u306f<code>select()<\/code>\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">println!(\n        \"{:?}\",\n        df_postal_code\n            .select([\"postal_code_first\", \"postal_code_second\"])?\n            .head(Some(10))\n    );<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u90f5\u4fbf\u756a\u53f7\u306e\u6700\u521d\u306e3\u6841\u3067group by\u3057\u3066\u30ab\u30a6\u30f3\u30c8\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">let df_postal_code_count = df_postal_code\n        .clone()\n        .lazy()\n        .group_by([col(\"postal_code_first\")])\n        .agg([col(\"postal_code_first\").count().alias(\"count\")])\n        .sort(\n            [\"count\"],\n            SortMultipleOptions::default().with_order_descending(true),\n        )\n        .collect()?;<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">DataFrame\u3092\u30d5\u30a1\u30a4\u30eb\u306b\u51fa\u529b\u3057\u307e\u3059\u3002<br>Parquet\u5f62\u5f0f\u3067\u51fa\u529b\u3059\u308b\u5834\u5408\u306f<code>ParquetWriter<\/code>\u3001CSV\u5f62\u5f0f\u3067\u51fa\u529b\u3059\u308b\u5834\u5408\u306f<code>CsvWriter<\/code>\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002<br>\u3069\u3061\u3089\u3082\u4f7f\u3044\u65b9\u306f\u540c\u3058\u3067\u3059\u306d\u3002<\/p>\n\n\n\n<pre class=\"wp-block-luxe-blocks-syntaxhighlighter line-numbers language-rust\"><code class=\"language-rust\">ParquetWriter::new(&amp;mut output_file_df).finish(&amp;mut df)?;<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u3068\u3053\u308d\u3067\u3001\u4eca\u56de\u306e\u30b3\u30fc\u30c9\u3067\u306fDataFrame\u306b\u5bfe\u3057\u3066<code>clone()<\/code>\u3092\u4f55\u5ea6\u3082\u5b9f\u884c\u3057\u3066\u3044\u307e\u3059\u3002<br>Polars\u306e<code>clone()<\/code>\u306fdeep copy\u3059\u308b\u308f\u3051\u3067\u306f\u306a\u3044\u306e\u3067\u3001\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u306b\u306f\u5f71\u97ff\u3042\u308a\u307e\u305b\u3093\u3002<br><code>lazy()<\/code>\u3092\u547c\u3073\u51fa\u3059\u3068\u6240\u6709\u6a29\u304c\u79fb\u52d5\u3057\u3066\u3057\u307e\u3046\u305f\u3081\u3001<code>clone()<\/code>\u3067\u30b3\u30d4\u30fc\u3092\u4f5c\u6210\u3057\u3066\u3044\u307e\u3059\u3002<br><a href=\"https:\/\/docs.pola.rs\/api\/python\/stable\/reference\/dataframe\/api\/polars.DataFrame.clone.html\" data-type=\"link\" data-id=\"https:\/\/docs.pola.rs\/api\/python\/stable\/reference\/dataframe\/api\/polars.DataFrame.clone.html\">Python\u306e\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8<\/a>\u3057\u304b\u898b\u3064\u304b\u308a\u307e\u305b\u3093\u3067\u3057\u305f\u304c\u3001\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u8a18\u8f09\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">This is a cheap operation that does not copy data.<\/p>\n<\/blockquote>\n\n\n\n<h2 class=\"wp-block-heading\"><span class=\"ez-toc-section\" id=\"%E3%83%A1%E3%83%A2\"><\/span>\u30e1\u30e2<span class=\"ez-toc-section-end\"><\/span><\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u90f5\u4fbf\u756a\u53f7\u306e\u982d3\u6841\u3067group by\u3057\u3066\u30ab\u30a6\u30f3\u30c8\u3057\u305f\u3068\u3053\u308d\u3001\u6700\u3082\u591a\u3044\u306e\u306f939\u30671337\u4ef6\u3001\u6700\u3082\u5c11\u306a\u3044\u306e\u306f539\u30671\u4ef6\u3067\u3057\u305f\u3002<br>939\u306f\u5bcc\u5c71\u770c\u3001539\u306f\u5927\u962a\u5e02\u4e2d\u592e\u533a\u306b\u5272\u308a\u5f53\u3066\u3089\u308c\u3066\u3044\u308b\u3089\u3057\u3044\u3067\u3059\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u524d\u7f6e\u304d Rust\u3067DataFrame\u3092\u6271\u3046\u305f\u3081\u306ePolars\u3068\u3044\u3046\u30e9\u30a4\u30d6\u30e9\u30ea\u304c\u3042\u308a\u307e\u3059\u3002 \u30b3\u30a2\u90e8\u5206\u306fRust\u3067\u66f8\u304b\u308c\u3066\u3044\u307e\u3059\u304c\u3001Python\u304b\u3089\u3082\u4f7f\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u5b9f\u969b\u306bGoogle\u691c\u7d22\u3057\u3066\u307f\u308b\u3068\u3001Rust\u3088\u308a\u3082P [&hellip;]<\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[27,16],"tags":[],"class_list":["post-1406","post","type-post","status-publish","format-standard","hentry","category-rust","category-16"],"_links":{"self":[{"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/posts\/1406","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1406"}],"version-history":[{"count":13,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/posts\/1406\/revisions"}],"predecessor-version":[{"id":1419,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=\/wp\/v2\/posts\/1406\/revisions\/1419"}],"wp:attachment":[{"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1406"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1406"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/daba-no-heya.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1406"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}