惯性聚合 高效追踪和阅读你感兴趣的博客、新闻、科技资讯
阅读原文 在惯性聚合中打开

推荐订阅源

GbyAI
GbyAI
T
Tenable Blog
Webroot Blog
Webroot Blog
L
Lohrmann on Cybersecurity
S
Securelist
S
Schneier on Security
NISL@THU
NISL@THU
Know Your Adversary
Know Your Adversary
C
Cybersecurity and Infrastructure Security Agency CISA
T
The Exploit Database - CXSecurity.com
L
LINUX DO - 热门话题
C
CXSECURITY Database RSS Feed - CXSecurity.com
O
OpenAI News
I
Intezer
Threat Intelligence Blog | Flashpoint
Threat Intelligence Blog | Flashpoint
TaoSecurity Blog
TaoSecurity Blog
S
Secure Thoughts
Application and Cybersecurity Blog
Application and Cybersecurity Blog
P
Privacy International News Feed
H
Hacker News: Front Page
N
Netflix TechBlog - Medium
M
MIT News - Artificial intelligence
博客园 - Franky
PCI Perspectives
PCI Perspectives
OSCHINA 社区最新新闻
OSCHINA 社区最新新闻
Microsoft Azure Blog
Microsoft Azure Blog
MongoDB | Blog
MongoDB | Blog
L
LangChain Blog
P
Proofpoint News Feed
S
Security Affairs
WordPress大学
WordPress大学
The Last Watchdog
The Last Watchdog
S
SegmentFault 最新的问题
小众软件
小众软件
F
Full Disclosure
博客园 - 叶小钗
cs.AI updates on arXiv.org
cs.AI updates on arXiv.org
T
The Blog of Author Tim Ferriss
Simon Willison's Weblog
Simon Willison's Weblog
P
Palo Alto Networks Blog
Security Latest
Security Latest
P
Proofpoint News Feed
月光博客
月光博客
T
Tailwind CSS Blog
Scott Helme
Scott Helme
Hacker News - Newest:
Hacker News - Newest: "LLM"
Google Online Security Blog
Google Online Security Blog
T
Threat Research - Cisco Blogs
Help Net Security
Help Net Security
Project Zero
Project Zero

博客园 - yuejianjun

搜索结果点击情况 进行加权 java httpURL连接远程服务器并返回数据(httpurlconnection)(转) 实体类 topN topN 堆排序 (int 类型) 过滤词 Lucene的评分(score) 位图求交集 位运算包含功能 页面抓取匹配时,万恶的 , , 要先替换掉为空,出现匹配有问题,都是这个引起的 探索推荐引擎内部的秘密,第 1 部分: 推荐引擎初探 深度用户行为 多维度深入分析笔记 lucene 搜索学习笔记 - OK 中文自动摘要提取 常用的vs编码 快捷键 Lucene中的堆(Heap)[ScorerDocQueue,TopScoreDocCollector] lucene 大数据量 快速 排序 T a s k 搜 索 ( L u c e n e ) 集中、分布式搜索引擎的4种设计方案 lucene fenlei ThreadPool 使用
l u c e n e 创 建 修 改 删 除 索 引
yuejianjun · 2011-11-03 · via 博客园 - yuejianjun

2011-11-03 15:56  yuejianjun  阅读(253)  评论()    收藏  举报

using System;
using System.Collections.Generic;
using System.Text;
using Business;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.PanGu;
using Lucene.Net.Search;
using Lucene.Net.Store;

namespace Index
{
    class Program
    {
        private  static string path = @"D:\Work\HotelIndex";
        static void Main(string[] args)
        {
            CreateIndex();

            Search();
            Update();
            Search();
            Delete();
            Search();

            Console.Read();
        }
        private static void Update()
        {
            IndexWriter writer = new IndexWriter(path, new PanGuAnalyzer());
            Term term = new Term("keyid""3");
            //Term term = new Term("datapart", "sd");
            string title = "datapartxiu酒店酒店";
            writer.UpdateDocument(term, CreateDataEntityIndex(title));
            writer.Commit();
            //writer.Optimize();
            writer.Close();
        }
        private static void Delete()
        { 
            IndexReader reader = IndexReader.Open(FSDirectory.Open(new System.IO.FileInfo(path)), false);
 
            //Term term = new Term("keyid", "196");
            Term term = new Term("keyid""3"); 
            reader.DeleteDocuments(term);
            reader.Commit(); 
            reader.Close();
        }
        private static void Search()
        {
            TopScoreDocCollector collector = TopScoreDocCollector.create(10false);
            IndexSearcher searcher = new IndexSearcher(path );
            BooleanQuery booleanQuery = new BooleanQuery(); 
            BooleanQuery keyQuery = new BooleanQuery();
            keyQuery.Add(new TermQuery(new Term("title""酒店")), BooleanClause.Occur.SHOULD);
            booleanQuery.Add(keyQuery, BooleanClause.Occur.MUST);
            Query query = booleanQuery; 
            searcher.Search(query, null, collector);
            ScoreDoc[] scoreDocArr = collector.TopDocs(010).scoreDocs;
            for (int i = 0; i < scoreDocArr.Length; i++)
            {
                ScoreDoc scoreDoc = scoreDocArr[i];
                int docId = scoreDoc.doc;//拿到搜到的文档ID

                Document doc = searcher.Doc(docId);//根据文档ID创建DOCUMENT 
                 string id = doc.Get("keyid").ToString();
                 string title = doc.Get("title").ToString();
                 Console.WriteLine("id: " + id + "   title: " + title);
            }
            searcher.Close();
            Console.WriteLine("------------------------------");
        }
        private static void CreateIndex()
        {
            IndexWriter writer = new IndexWriter(path, new PanGuAnalyzer());
            string title = "希尔顿酒店s";
            Document doc = CreateDataEntityIndex(title,"2");
            writer.AddDocument(doc);

            doc = CreateDataEntityIndex(title,"3");
            writer.AddDocument(doc);

            writer.Close();
        }
        private static Document CreateDataEntityIndex(string title,string keyid="1")
        {
            Field field;
            Document doc = new Document();
            field = new Field("title", title, Field.Store.YES, Field.Index.TOKENIZED);//存储, 索引 
            doc.Add(field);
            field = new Field("data","sss", Field.Store.NO, Field.Index.TOKENIZED);//不存储( 不能在搜索结果中用doc.get("data").toString() 读取 ), 索引
            
//field.SetBoost(dataEntity.Content.EntityBoost ); 
            doc.Add(field);
            field = new Field("datapart""sd", Field.Store.YES, Field.Index.UN_TOKENIZED);//存储,不索引
            
//field.SetBoost(dataEntity.Content.EntityBoost ); 
            doc.Add(field);
            field = new Field("mainurl",  "sd",  Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            //field = new Field("districtid", "12112", Field.Store.YES, Field.Index.UN_TOKENIZED);
            
//doc.Add(field);
            field = new Field("keyid", keyid, Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            field = new Field("ctripscore""1", Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            field = new Field("districtname""1", Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            field = new Field("datatype""1", Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            field = new Field("districtpath""1", Field.Store.YES, Field.Index.UN_TOKENIZED);
            doc.Add(field);
            //double boost = double.Parse(dataEntity.CtripScore) + 1;
            
//boost = Math.Sqrt(boost);
            
//field = new Field("ctripscore", boost.ToString(), Field.Store.YES, Field.Index.TOKENIZED);
            
//doc.Add(field);
            
//doc.SetBoost(float.Parse(boost.ToString()));//提高其中一条数据的权重,如广告 
            return doc;
        }
    }
}