Wednesday, May 14, 2008

搜索题做多了都是一个样……广搜题心得体会

 基本就是结合了广搜+状态压缩+剪枝的思路。

比如最近2008 sysu校赛预选的gene reprogram,2008广东省赛的fengshui,以及usaco的clocks(http://ace.delos.com/usacoprob2?a=RRbITFklHzA&S=clocks)
下面用gene reprogram做个例子,备忘一下。

-------------------------------------------------------------------------------

Problem

      As a gene engineer of a gene engineering project, Enigma encountered a puzzle about gene recombination.
      It is well known that a gene can be considered as a sequence, consisting of four nucleotides, which are simply denoted by four letters, A, C, G, and T.
      Enigma has gotten a gene, such as "ATCC" for example. And he wants to reorder this gene to a new one, "CTCA" for instance. He can use two kinds of operations: (1) exchange the first two letters, or (2) move the first letter to end of the gene. For example, "ATCC" can be changed to "TCCA" by operation two, and then "TCCA" can be changed to "CTCA" by operation one. Your task is to make a program to help Enigma to find out the minimum number of operations to reorder the gene.

Input

      The input contains several test cases. The first line of a test case contains one integer N indicating the length of the gene (1<=N<=12). The second line contains a string indicating the initial gene. The third line contains another string which Enigma wants.
For each kind of letter, the two strings have the same number of it.

Output

      For each test case, output the minimum number of operations.

Sample input

4
ATCC
CTCA
4
ATCG
GCTA
4
ATCG
TAGC

Sample output

2
4
6
-------------------------------------------------------------------------------

代码:

#include <stdio.h>
#include <string.h>
int len;
int before[15];
int after[15];
char mark[1<<24];            // 用于hash的数组
const int QLEN = 200000;    // 广搜队列长度

int bval,eval;

struct node
{// 广搜树节点
    int value;
    int level;
};
node q[QLEN];
int b,e;    // queue and its head,tail

void inc(int & p)
{// 使用循环队列的方法来保存节点
    if(++p == QLEN) p=0;
}

int zip(int * gene)
{// 状态压缩
    int result = 0,base = 1,i,j;
    for(i=len-1;i>=0;i--)
        result |= gene[i]<<((len-i-1)*2);
    return result;
}

int * unzip(int zipped,int * gene)
{// 解压缩
    int i;
    for(i=0;i<len;i++)
    {
        gene[i] = (zipped>>(len-i-1)*2)&3;
    }
    return gene;
}

int op1(int gene)
{// 对应题目的第一个操作
    int first = (gene>>(len-1)*2)&3;
    int second = (gene>>(len-2)*2)&3;
    int mask = (1<<(len*2))-1;    // for len=4, 11 11 11 11
    int mask1 = ~(3<<(len-1)*2) & mask; // for len=4, 00 11 11 11
    int mask2 = ~(3<<(len-2)*2) & mask; // for len=4, 11 00 11 11
    gene &= mask1;
    gene &= mask2;
    gene |= first<<(len-2)*2;
    gene |= second<<(len-1)*2;
    return gene;
}

int op2(int gene)
{// 对应题目的第二个操作
    int first = (gene>>(len-1)*2)&3;
    int m = (1<<(len*2))-1;             // for len=4, 11 11 11 11
    int mask = ~(3<<(len-1)*2) & m;  // for len=4, 00 11 11 11
    gene = (gene&mask) <<2;
    gene |= first;
    return gene;
}

void getGene(int * gene)
{// 输入gene
    char ch;
    for(int i=0;i<len;i++)
    {
        scanf("%c",&ch);
        switch(ch)
        {
        case 'A':gene[i] = 0;break;
        case 'C':gene[i] = 1;break;
        case 'G':gene[i] = 2;break;
        case 'T':gene[i] = 3;break;
        }
    }
    scanf("\n");
}

int search()
{// 进行搜索,返回结果所在的层数,就是所需要的最短步数
    while( b!=e )
    {
        node n = q[b]; // de-queue
        inc(b);
        if( n.value == eval ) return n.level;
       
        node temp;
        temp.value = op1(n.value);
        temp.level = n.level + 1;
        if( temp.value == eval ) return temp.level;
        if( mark[temp.value] != 1)
        {// en-queue
            q[e] = temp;
            inc(e);
            mark[temp.value] = 1;
        }

        temp.value = op2(n.value);
        temp.level = n.level + 1;
        if( temp.value == eval ) return temp.level;
        if( mark[temp.value] != 1)
        {// en-queue
            q[e] = temp;
            inc(e);
            mark[temp.value] = 1;
        }
    }
    return -1;
}

int main()
{
    freopen("order.in","r",stdin);
    while( scanf("%d\n",&len) != EOF)
    {
        memset(mark,0,sizeof(mark));
        b=0;e=0;
        getGene(before);
        getGene(after);

        bval = zip(before);
        eval = zip(after);
        node temp;
        temp.value = bval;
        temp.level = 0;
        q[b] = temp;    // 把初始状态压入队列
        inc(e);
        mark[bval] = 1;

        int result = search();
        printf("%d\n",result);
    }
    return 0;
}

-------------------------------------------------------------------------------

分析:
1.广搜
一般题目都是给出一个configuration,再给出几个操作op,要求使用这些op对初始状态进行变换,
直到状态变为目标状态为止,如果无法达到则输出-1.
一般有两种问法,比较简单的是问最少要多少步能达到;也可能会要求给出最短路径。
对于前者,无需保存整棵搜索树,只需要记录当前节点的层数即可,这时可以使用循环队列的方法来记录节点。
对于后者,则有必要保存整棵搜索树(所以此时状态压缩、空间分配很重要),才能进行有效回溯。不能使用循环队列,否则会覆盖回溯路径的父节点。
此时广搜队列的长度则能大则大了,不过用内存多少是给定的,就狠狠地用尽它吧!

2.状态压缩
一般题目都是给出一个向量或矩阵表示初始状态。则此时可以把每一个元素可能出现的状态记为base,
则整个状态可以看作一个base进制的数,第i个元素可以看作权为i的系数。
比如本题给出的元素状态有4种:A,C,G,T,我分别把它们记做0,1,2,3.
对于一个具有n位的串a,状态可以表示为一个int:
a[0]*4^0 + a[1]*4^1 + ... a[n-1]*4^(n-1)
因为n最大取12,则可能出现的最大数为4^13-1=64M,可能存在的状态有4^12=16M种。
比如串ACGT=0123,可以表示为0*64+1*16+2*4+3*1=27
一般化一点,可能出现的最大数为base^(n+1)-1,存在的状态有base^n种

此时往往可能出现的问题是:
由于base或n太大,使得一个int无法保存,或者使得剪枝操作无法进行(跟后面介绍的hash方法有关)

考虑到一个字节有8位,用以上的方法我们只利用了其中的2位,因此可以进一步使用位压缩方法进行压缩。

显然,可以使用00=0,01=1,10=2,11=3来表示这四种状态,使用移位的方法可以拼凑出需要的数字。
也许其中会写很多微操作的步骤,不过不用担心,做一个位操作比做一个乘除快多了。

比如本题,使用
    int result = 0,base = 1,i,j;
    for(i=len-1;i>=0;i--)
        result |= gene[i]<<((len-i-1)*2);
从最低位开始每次左移两位来拼凑出最后的int表示。

这样则充分利用了每一个位,使得状态压缩得更精简。

3.剪枝
剪枝的主要原则是发掘操作的顺序、重复,以删除无用的树枝。
比如问题中,可以观察到进行两次操作一则得到原串,进行k=串长度次数的操作二可以得到原串,
另外经过多次的op1,op2操作,也是有可能得到相同的串的。
而USACO的clocks题目,操作之间是无序的,即op1(op2(state)) = op2(op1(state))
而且四个op
此时如果已经搜索过某个状态及其生成的树而无结果,则下次不必再搜索。

剪枝的实现一般是通过顺序搜索或者hash实现的。
对于前者,每次生成新的状态便在队列中搜索是否存在相同元素,如果存在则不必加入;
对于后者,每次新生成的状态在一个开关数组中查询,如果标记了说明已经生成过了,如果没有标记则可以进行搜索,并且标记之,
一般使用char 的数组来完成标记,其实这就是一种桶大小为1,而桶数量为状态数的hash。

对于以上介绍的压缩方法,刚好可以利用hash方法来进行标记,因此这两种技术往往是一起使用的。威力不是一般的强大。
本题16M的状态刚好可以使用一个足够大小的char数组保存,大小为16MB(一般允许使用32MB-64MB的内存)

-------------------------------------------------------------------------------

后记:刚刚开始接触这种题目时,觉得混杂了这么多的算法和技巧很复杂,其实写多了就是一个样
代码也不长,简单的100行左右就好了。因此也可以说是简单题目。
不过很多题目的数据强度都是恰好能让人用尽内存和时间后AC的,所以也不容轻视。

Tuesday, May 13, 2008

rule of thumb : 经验法则

在USACO上看到的,非常不错的一些算法竞赛tips:
http://ace.delos.com/usacotext2?a=BxBFg3CZbVJ&S=craft

以下摘自rule of thumb
  • When analyzing an algorithm to figure out how long it might run for a given dataset, the first rule of thumb is: modern (2004) computers can deal with 100M actions per second. In a five second time limit program, about 500M actions can be handled. Really well optimized programs might be able to double or even quadruple that number. Challenging algorithms might only be able to handle half that much. Current contests usually have a time limit of 1 second for large datasets.
  • 16MB maximum memory use
  • 210 ~approx~ 10 3
  • If you have k nested loops running about N iterations each, the program has O(N k) complexity.
  • If your program is recursive with b recursive calls per level and has l levels, the program O(b l) complexity.
  • Bear in mind that there are N! permutations and 2 n subsets or combinations of N elements when dealing with those kinds of algorithms.
  • The best times for sorting N elements are O(N log N).
  • DO THE MATH! Plug in the numbers.

全文:
Crafting Winning Solutions

A good way to get a competitive edge is to write down a game plan for what you're going to do in a contest round. This will help you script out your actions, in terms of what to do both when things go right and when things go wrong. This way you can spend your thinking time in the round figuring out programming problems and not trying to figure out what the heck you should do next... it's sort of like precomputing your reactions to most situations.

Mental preparation is also important.

Game Plan For A Contest Round

Read through ALL the problems FIRST; sketch notes with algorithm, complexity, the numbers, data structs, tricky details, ...

  • Brainstorm many possible algorithms - then pick the stupidest that works!
  • DO THE MATH! (space & time complexity, and plug in actual expected and worst case numbers)
  • Try to break the algorithm - use special (degenerate?) test cases
  • Order the problems: shortest job first, in terms of your effort (shortest to longest: done it before, easy, unfamiliar, hard)

Coding a problem - For each, one at a time:

  • Finalize algorithm
  • Create test data for tricky cases
  • Write data structures
  • Code the input routine and test it (write extra output routines to show data?)
  • Code the output routine and test it
  • Stepwise refinement: write comments outlining the program logic
  • Fill in code and debug one section at a time
  • Get it working & verify correctness (use trivial test cases)
  • Try to break the code - use special cases for code correctness
  • Optimize progressively - only as much as needed, and keep all versions (use hard test cases to figure out actual runtime)

Time management strategy and "damage control" scenarios

Have a plan for what to do when various (foreseeable!) things go wrong; imagine problems you might have and figure out how you want to react. The central question is: "When do you spend more time debugging a program, and when do you cut your losses and move on?". Consider these issues:

  • How long have you spent debugging it already?
  • What type of bug do you seem to have?
  • Is your algorithm wrong?
  • Do you data structures need to be changed?
  • Do you have any clue about what's going wrong?
  • A short amount (20 mins) of debugging is better than switching to anything else; but you might be able to solve another from scratch in 45 mins.
  • When do you go back to a problem you've abandoned previously?
  • When do you spend more time optimizing a program, and when do you switch?
  • Consider from here out - forget prior effort, focus on the future: how can you get the most points in the next hour with what you have?

Have a checklist to use before turning in your solutions:

    Code freeze five minutes before end of contest?
  • Turn asserts off.
  • Turn off debugging output.

Tips & Tricks

  • Brute force it when you can
  • KISS: Simple is smart!
  • Hint: focus on limits (specified in problem statement)
  • Waste memory when it makes your life easier (if you can get away with it)
  • Don't delete your extra debugging output, comment it out
  • Optimize progressively, and only as much as needed
  • Keep all working versions!
  • Code to debug:
    • whitespace is good,
    • use meaningful variable names,
    • don't reuse variables,
    • stepwise refinement,
    • COMMENT BEFORE CODE.
  • Avoid pointers if you can
  • Avoid dynamic memory like the plague: statically allocate everything.
  • Try not to use floating point; if you have to, put tolerances in everywhere (never test equality)
  • Comments on comments:
    • Not long prose, just brief notes
    • Explain high-level functionality: ++i; /* increase the value of i by */ is worse than useless
    • Explain code trickery
    • Delimit & document functional sections
    • As if to someone intelligent who knows the problem, but not the code
    • Anything you had to think about
    • Anything you looked at even once saying, "now what does that do again?"
    • Always comment order of array indices
  • Keep a log of your performance in each contest: successes, mistakes, and what you could have done better; use this to rewrite and improve your game plan!

Complexity

Basics and order notation

The fundamental basis of complexity analysis revolves around the notion of ``big oh'' notation, for instance: O(N). This means that the algorithm's execution speed or memory usage will double when the problem size doubles. An algorithm of O(N 2) will run about four times slower (or use 4x more space) when the problem size doubles. Constant-time or space algorithms are denoted O(1). This concept applies to time and space both; here we will concentrate discussion on time.

One deduces the O() run time of a program by examining its loops. The most nested (and hence slowest) loop dominates the run time and is the only one mentioned when discussing O() notation. A program with a single loop and a nested loop (presumably loops that execute N times each) is O(N 2), even though there is also a O(N) loop present.

Of course, recursion also counts as a loop and recursive programs can have orders like O(b N), O(N!), or even O(N N).

Rules of thumb

  • When analyzing an algorithm to figure out how long it might run for a given dataset, the first rule of thumb is: modern (2004) computers can deal with 100M actions per second. In a five second time limit program, about 500M actions can be handled. Really well optimized programs might be able to double or even quadruple that number. Challenging algorithms might only be able to handle half that much. Current contests usually have a time limit of 1 second for large datasets.
  • 16MB maximum memory use
  • 210 ~approx~ 10 3
  • If you have k nested loops running about N iterations each, the program has O(N k) complexity.
  • If your program is recursive with b recursive calls per level and has l levels, the program O(b l) complexity.
  • Bear in mind that there are N! permutations and 2 n subsets or combinations of N elements when dealing with those kinds of algorithms.
  • The best times for sorting N elements are O(N log N).
  • DO THE MATH! Plug in the numbers.

Examples

A single loop with N iterations is O(N):

 1  sum = 0
 2  for i = 1 to n
 3    sum = sum + i

A double nested loop is often O(N 2):

# fill array a with N elements
1 for i = 1 to n-1
2   for j = i + 1 to n
3     if (a[i] > a[j])
         swap (a[i], a[j])
Note that even though this loop executes N x (N+1) / 2 iterations of the if statement, it is O(N 2) since doubling N quadruples the execution times.

Consider this well balanced binary tree with four levels:

An algorithm that traverses a general binary tree will have complexity O(2 N).

Solution Paradigms

Generating vs. Filtering

Programs that generate lots of possible answers and then choose the ones that are correct (imagine an 8-queen solver) are filters. Those that hone in exactly on the correct answer without any false starts are generators. Generally, filters are easier (faster) to code and run slower. Do the math to see if a filter is good enough or if you need to try and create a generator.

Precomputation

Sometimes it is helpful to generate tables or other data structures that enable the fastest possible lookup of a result. This is called precomputation (in which one trades space for time). One might either compile precomputed data into a program, calculate it when the program starts, or just remember results as you compute them. A program that must translate letters from upper to lower case when they are in upper case can do a very fast table lookup that requires no conditionals, for example. Contest problems often use prime numbers - many times it is practical to generate a long list of primes for use elsewhere in a program.

Decomposition (The Hardest Thing At Programming Contests)

While there are fewer than 20 basic algorithms used in contest problems, the challenge of combination problems that require a combination of two algorithms for solution is daunting. Try to separate the cues from different parts of the problem so that you can combine one algorithm with a loop or with another algorithm to solve different parts of the problem independently. Note that sometimes you can use the same algorithm twice on different (independent!) parts of your data to significantly improve your running time.

Symmetries

Many problems have symmetries (e.g., distance between a pair of points is often the same either way you traverse the points). Symmetries can be 2-way, 4-way, 8-way, and more. Try to exploit symmetries to reduce execution time.

For instance, with 4-way symmetry, you solve only one fourth of the problem and then write down the four solutions that share symmetry with the single answer (look out for self-symmetric solutions which should only be output once or twice, of course).

Forward vs. Backward

Surprisingly, many contest problems work far better when solved backwards than when using a frontal attack. Be on the lookout for processing data in reverse order or building an attack that looks at the data in some order or fashion other than the obvious.

Simplification

Some problems can be rephrased into a somewhat different problem such that if you solve the new problem, you either already have or can easily find the solution to the original one; of course, you should solve the easier of the two only. Alternatively, like induction, for some problems one can make a small change to the solution of a slightly smaller problem to find the full answer.


Monday, May 12, 2008

[zz]边防证办理须知

http://hi.baidu.com/duohaha/blog/item/293ed0c87ec0e7127e3e6f24.html

进藏问题!!!澄清一些关于护照可以替代边防证的误传!及拉萨边防证的办理办法!

很多准备今年进藏的朋友正摩拳擦掌准备出发,最近在各个论坛及QQ群看到很多朋友谈论进藏边防证的问题,其中有些说法是不正确的,我根据自己的经验向大家解释一下,希望大家少走弯路。其实不仅仅是进藏,去中国任何边境管理区,以下的解释均有效!

(误传一)有了护照就可以替代边防证,去哪都可以。

其实护照是不解决什么问题的,如果护照上没有签注,就什么用都没有。护照是给中国公民在国外用的,在国内还是用身份证。

之所以有很多谣传说带护照就哪都能去,原因大概是起于在拉萨用护照办理了尼泊尔签 证的话,可以通过去后藏地区的鲁鲁检查站。该检查站检查边防通行证,不过要是你已经有尼泊尔签证则可以通过。这样如果只是去珠峰及尼泊尔的朋友,理论上可 以用护照通过(注意:护照上要有尼泊尔签证,否则没用)

但是就算你有护照和尼泊尔签证,进入后藏(阿里地区等)过检查站一样需要通行证,因为虽然在阿里普兰也可以去尼泊尔,但是普兰口岸的尼泊尔签证是特别签证,也就是说你要是想在普兰出境,在签证上要表明出境口岸是普兰,没有标注的只能从樟木出境。

前些年(现在也可能有这样的情况)很多游客在通往后藏地区的检查站用护照可以通 过,那是因为很多站岗的小武警不懂英文还有对业务不熟悉,所以看到游客有护照并说去尼泊尔就放行,比如我之前的护照上有各个国家十几个签证,武警看着也分 不清楚。但是现在武警的业务水平也在提高,特别要是遇到军官,仔细看一下你就很可能被挡回,不允许通过。

最近看到很多朋友在说去西藏带本护照就一切搞定,在此再重复一次,去边境管理区,一定要办理边境通行证,当地办不了的可以到拉萨或者日喀则办理。除非你只是从拉萨去珠峰及尼泊尔,这样可以只是用护照(主意要有尼泊尔签证)。

当然就算现在用护照也有机会蒙混过关,不过我想我们这么远跑去没有必要冒这个风险吧,办个证也就十几块钱,特别是西藏办理的边防证还有藏文,留作纪念也好。

西藏我去过七八次了,特此说明一下,说的不对的地方请指正!

户籍所在地停止办理边防证怎么办?

可以到拉萨办理

具体办理办法:

1,先去拉萨市公安局去办一个开边防证的介绍信,要身份证,最好还能有单位的介绍信,能少点麻烦。注意:公安局开的办理边防证的介绍信一张最多可以填8个人,每张收费都是10元,所以人多可以一起去,节省些费用。

2,拿到介绍信就去武警边防总队,总队大门旁就有办理的窗口,不用进院子(日喀则武警总队也是如此)办证费用8元,不需要照片,和身份证一起使用。押金是30元,不过除非你还回拉萨,不然押金一般是退不回来的了,就算30元买个纪念品了,因为上面有藏文。



1、西藏办事效率低,在西藏办边防证是非常麻烦的,办边防证最好的地点是在身份证所在地办,那样不仅手续简单,费用也很低廉。边防证上,可以写上以下地方:"西藏自治区定日县、樟木、阿里地区、墨脱县、米林县"等等。

2、如果有有效的出国护照的话,可以免办理边防证,凭护照通行。

3、具体程序
(1)、先去户口所在地的派出所,拿一张边防证办理的申请表
(2)、自己填写个人资料,最重要的是具体写明将要到达的西藏的哪个地区,然后让单位保卫科写意见,盖印章
(3)、拿回户口所在地派出所,盖印章
(4)、到户口所在地管辖区的公安分局边防证办理处(如XX区公安分局)办理边防证(免费)
(5)办证全程需要个人身份证及户口本,以及免冠大一寸相片二张,其余免

4、当地办理方法
(1)、如果你没有事先办理边防证,在拉萨办要比在日喀则办便宜得多,一个证在这里各种费用加起来是30多元,到了日喀则据说就到了100多。不过如果你还能回到当地的话,这笔钱还可以退回来,因为其中最大的部分是押金。此外,在西藏办理需要持驻藏单位的介绍信。
(2)、在拉萨办边防证要去两个地方。先去拉萨市公安局,在小昭寺路北端的十字路口向东看可见拉萨市公安局,凭你的身份证及介绍信去办一个开边防证的介绍 信。然后出来去武警边防总队。在边防总队大门边有一小房间专门办边防证,办证费用八元,另交押金三十元,在规定时间内返回可退押金。

5、:切莫以为边境通行证申请表即为边境证,不要草草了事,不然当你被边境检查站的哨兵卡住时,有得你懊恼的。所以办理时一定要问个清楚。

Wednesday, May 7, 2008

最近试用软件汇总(5.5)

1.为了毕业论文能跟几个童鞋用qq/tm沟通,不得不下载了一个vmware workstation来用 -- 与vmware server还是有一定区别d。
不过一般都是启动vmware player来用win(装了个精简版的win,还行,问题是竟然把智能卡服务模块删了……结果网银不能用)
期待workstation 6.0 正式版的seamless功能!

2.装上了scim-python,只因fcitx用起来不知干嘛老是觉得有lag(话说以前在fedora时不会的...),而且老是跟evaqq,qterm等有冲突.
"巨蟒拼音",正在试用中!

3.octave+gnuplot很强大,其中octave可以当matlab来使用,做矩阵乘法时真的特别爽.gnuplot用来绘制一些二维/三维数据很方便,而且可定制性也不错,可以画出非常专业pp的图.

4.终于忍受不了sb的dolphin了,决定用回konqueror. 方法是: 在kcontrol的file association里面,更改inode->folder的默认打开方式为konqueror(我这里不知干嘛改来改去都是dolphin跑到前头,索性直接delete掉它)

5.摄像头终于被我这个勇敢的人通过自行拆解笔记本修好了(acer wxmi系列的通病:摄像头的内部线易扭断)
使用camstreams可以很方便地录制视频与拍摄图像.

6.装了个ksensors来监视cpu温度...

7.使用screenkast+vncserver+vncclient可以方便地录制视频.

8.battle for wesnoth 没说的,很不错.可惜我还是不大喜欢玩战棋耶.......

Monday, May 5, 2008

近期to do list

学术相关:
0.等待毕业证 && 办理 student visa -- 等待中
1.ACM/ICPC省赛 -- 已完成,省赛一等奖第九名
2.CUHK宿舍申请 -- 等待结果中
3.毕业论文答辩 -- 准备中
4.参加baidu之星受鄙视 -- 认命中
5.研究多核架构编程模型与OS变革 -- 遥不可望中
6.研究CUHK老师方向 -- 等待网速变好中
7.啃完算法导论与SICP -- yy中

旅行相关:
0.班级毕业旅行 -- 犹豫中
1.办理passport && 边防证 -- 进行中
2.计划去tibet && nepal 旅游一次 -- 筹划中
3.回家 && 顺便拐带某mm去丹霞山一次 -- yy中
4.计划跟某mm去河源一次 -- yy中

腐败相关:
0.与99级大师兄吃餐饭 -- 等ing他回广州
1.跟家姐撑台脚 && 唱K -- 期待中!
2.参加argo篮球版版友赛 -- 备战中
3.bg argo bbs站务 -- 排期中
4.bg 篮球版众 -- 找时间中

胡思乱想相关:
0.毕业典礼 && 毕业照邀请list -- 准备中
1.学会gdb与kdevelop使用与开发 -- 渴望中
2.进一步熟悉shell 编程 -- 日常应用中
3.临走前去献血一次 -- 可能
4.负责集体购买开源文化衫 -- 进行中
5.学会演唱30首近期港台流行曲 -- 被鄙视中...

Sunday, May 4, 2008

线性代数重要概念

determinant of a matrix:矩阵的行列式

rank of a matrix: 矩阵的秩
定理:矩阵的行秩(行向量组的秩)与列秩相同.
因此不区分它们,统称为矩阵的秩.

rank of a vector: 向量的秩
一个向量组的极大线性无关组所含向量的个数称为这个向量组的秩.
因为线性无关的向量组就是它本身的极大线性无关组,因此一向量组线性无关的充要条件就是:它的秩与它所含向量个数相同.

每一向量组都与它的极大线性无关组等价(所谓等价就是它们可以互相线性表出),并且由秩的定义可得,它们的秩相同.
由等价的传递性可知,任意两个等价向量组的极大线性无关组也等价.因此等价的向量组必有相同的秩.

向量组的极大线性无关组不是唯一的,但是每一个极大线性无关组都与向量组本身等价.
因此一个向量组的任意两个极大线性无关组都是等价的.

linear express: 线性表出
若向量b可以表示为
b=k1a2+k2a2+...knan,
则称b是向量组a1,a2,...,an的一个线性组合或说b由向量组a1,a2,...,an线性表出.
注意: 零向量是任一向量组的线性组合(系数全0即可)

若向量组a1,a2,...,as的每一个向量都可以经向量组b1,b2,...,bt线性表出,
则向量组a1,a2,...,as称为由向量组b1,b2,...,bt线性表出.
如果两个向量组互相可以线性表出,则它们称为等价(equivalent)

linear dependent: 线性相关
如果向量组a1,a2,...,as(s>=2)中有一个向量可以由其余向量线性表出,则这个向量组称为线性相关的.
注意:任意一个包含零向量的向量组一定是线性相关的.

另一种表达方法: 若,k1a1+k2a2+...+knan=0,当ki不全为0,则a1,a2,...,an线性相关.
因为按照上面线性相关的定义,假设an可以由其余向量表出,即:
an=k1a1+k2a2+...+k(n-1)a(n-1)
因此显然经过移项有:
k1a1+k2a2+...+k(n-1)a(n-1)+(-1)an=0
反之亦然.
因此上面两个定义是等价的.

特别地,根据上面定义,只有一个向量(记为a)的向量组线性相关的充要条件是a是零向量.
(ka=0)

linear independent: 线性无关
按照上述线性相关的定义,若不存在全为零的数k1,k2,...,kn,使得向量组a1,a2,...,an:
k1a1+k2a2+...+knan=0
则向量组a1,a2,...,an线性无关.
同时说明,向量组中任意一个向量ai都不可以被其他元素线性表出.

推理易得: 该向量组中任意一个向量子集都是线性无关的.

Homogeneous Linear Equation : 齐次线性方程组
常数项全为0的线性方程组称为齐次线性方程组(它的行列数不一定相等),它总是有解的(全为0.称为零解).
按照Cramer's Rule,如果它的系数行列式不为0,则它只有一个解,因此它是零解.
反过来说,如果齐次线性方程组有非零解,则必有|A|=0

Cramer's Rule: 克拉默法则
如果线性方程组:
AX=B
的系数矩阵A的行列式d=|A|不为0,则该线性方程组有解,且解是唯一的.

推论:如果一个n*n的齐次线性方程组的系数矩阵的行列式|A|不为0,则它只有一个解,
又由于它必定有零解,因此综上得到结论:它有且只有零解


推论的逆否命题:如果一个n*n的齐次线性方程组有非零解,那么必有|A|=0.

矩阵的秩(rank),行列式(determinant)与线性方程组的解(solution)的一些关系(理解)
0.
如果齐次线性方程组的系数矩阵为A,行列数为s*n,如果s<n,则它必有非零解.
使用消元法把A化成阶梯状,方程的个数不会超过原方程组中方程的个数,
因此r<=s<n,则r<n,因此解不是唯一的(无穷),因此有非零解.

1.如果齐次线性方程组的系数矩阵为A,行列数为s*n的行秩r<n,则它有非零解.
由于A的行秩r<n,则说明A的行向量组线性相关,假设a1,a2,...,ar是一个极大线性无关组,组成一个新矩阵为A'
那么它与A的行向量组a1,a2,...,an是等价的.因此,系数矩阵为A'的方程组与原方程组互相线性表出,
它们是同解的,应用0,可得结论.

2.n*n矩阵A的行列式为零的充分必要条件是A的秩小于n
(即A的行向量组不是线性无关的).

充分性:由于A的秩小于n,说明A的n个行向量线性相关.
若n=1,则该向量为零向量.因此|A|=0.
若n>1,则A中有一个行向量可以由其余行线性表出.
因此从这一行减去其他行相应的倍数,即可以类似消元地消去这一行的系数,使得这行变为0.
因此根据行列式的性质,|A|=0.

必要性:用数学归纳法来证明.(此处略...)

3.系数为A,次数为n*n的齐次线性方程组有非零解的充分必要条件是它的系数矩阵A的行列式等于零.
充分性:由1,2可得.
根据1,|A|=0,则说明A的秩小于n;根据2,A的秩r小于n,则该方程有非零解.
必要性:克拉默法则的推论的逆否命题可知.
由于方程有非零解,则|A|=0.

矩阵(方阵)的逆与行列式的关系
首先,定义:
如果n*n矩阵A的行列式|A|不等于0,则称A为非退化的(non-degenerative).否则是退化的(generative)
而存在逆阵的矩阵称为非奇异的矩阵(non-singular matrix),否则称为奇异矩阵(singular matrix).

矩阵A是可逆的充要条件是A非退化,或说:
|A| != 0   <=>  矩阵A非退化  <=>  矩阵A是非奇异矩阵  <=>  矩阵A满秩(根据以上定理2)


---------------------------------------------
附:中英文对照表
向量代数

(向量(vector)),(向量的长度(模)),(零向量(zero vector)),(负向量),(向量的加法(addition)),(三角形法则),(平行四边形法则),(多边形法则),(减法),(向量的标量乘积(scalar multiplication)),(向量的线性运算),线性组合(linear combination),线性表示,线性相关(linearly dependent),线性无关(linearly independent),(原点(origin)),(位置向量(position vector)),(线性流形(linear manifold)),(线性子空间(linear subspace));基(basis),仿射坐标(affine coordinates),仿射标架(affine frame),仿射坐标系(affine coordinate system),(坐标轴(coordinate axis)),(坐标平面),(卦限(octant)),(右手系),(左手系),(定比分点);(线性方程组(system of linear equations)),(齐次线性方程组(system of homogeneous linear equations)),(行列式(determinant));维向量,向量的分量(component),向量的相等,和向量,零向量,负向量,标量乘积, 维向量空间(vector space),自然基,(行向量(row vector)),(列向量(column vector));单位向量(unit vector),直角坐标系(rectangular coordinate system),直角坐标(rectangular coordinates),射影(projection),向量在某方向上的分量,(正交分解),(向量的夹角),内积(inner product),标量积(scalar product),(数量积),(方向的方向角),(方向的方向余弦);外积(exterior product),向量积(cross product),(二重外积);混合积(mixed product,scalar triple product)


行列式

(映射(mapping)),(象(image)),(一个原象(preimage)),(定义域(domain)),(值域(range)),(变换(transformation)),(单射(injection)),(象集),(满射(surjection)),(一一映射,双射(bijection)),(原象),(映射的复合,映射的乘积),(恒同映射,恒同变换(identity mapping)),(逆映射(inverse mapping));(置换(permutation)),( 阶对称群(symmetric group)),(对换(transposition)),(逆序对),(逆序数),(置换的符号(sign)),(偶置换(even permutation)),(奇置换(odd permutation));行列式(determinant),矩阵(matrix),矩阵的元(entry),(方阵(square matrix)),(零矩阵(zero matrix)),(对角元),(上三角形矩阵(upper triangular matrix)),(下三角形矩阵(lower triangular matrix)),(对角矩阵(diagonal matrix)),(单位矩阵(identity matrix)),转置矩阵(transpose matrix),初等行变换(elementary row transformation),初等列变换(elementary column transformation);(反称矩阵(skew-symmetric matrix));子矩阵(submatrix),子式(minor),余子式(cofactor),代数余子式(algebraic cofactor),(范德蒙德行列式(Vandermonde determinant));(未知量),(方程的系数(coefficient)),(常数项(constant)),(线性方程组的解(solution)),(系数矩阵),(增广矩阵(augmented matrix)),(零解);子式的余子式,子式的代数余子式


线性方程组与线性子空间


(阶梯形方程组),(方程组的初等变换),行阶梯矩阵(row echelon matrix),主元,简化行阶梯矩阵(reduced row echelon matrix),(高斯消元法(Gauss elimination)),(解向量),(同解),(自反性(reflexivity)),(对称性(symmetry)),(传递性(transitivity)),(等价关系(equivalence));(主变量),(自由位置量),(一般解),(齐次线性方程组的秩(rank));向量组线性相关,向量组线性无关,线性组合,线性表示,线性组合的系数,(向量组的延伸组);线性子空间,由向量组张成的线性子空间;基,坐标,(自然基),线性子空间的维数(dimension),向量组的秩;(解空间),齐次线性方程组的基础解系(fundamental system of solutions);(导出组),线性流形,(方向子空间),(线性流形的维数),(方程组的特解);(方程组的零点),(方程组的图象),(平面的一般方程),(平面的三点式方程),(平面的截距式方程),(平面的参数方程),(参数),(方向向量);(直线的方向向量),(直线的参数方程),(直线的标准方程),(直线的方向系数),(直线的两点式方程),(直线的一般方程);(平面束(pencil of planes))


矩阵的秩与矩阵的运算

线性表示,线性等价,极大线性无关组;(行空间,列空间),行秩(row rank),列秩(column rank),秩,满秩矩阵,行满秩矩阵,列满秩矩阵;线性映射(linear mapping),线性变换(linear transformation),线性函数(linear function);(零映射),(负映射),(矩阵的和),(负矩阵),(线性映射的标量乘积),(矩阵的标量乘积),(矩阵的乘积),(零因子),(标量矩阵(scalar matrix)),(矩阵的多项式);(退化的(degenerate)方阵),(非退化的(non-degenerate)方阵),(退化的线性变换),(非退化的线性变换),(逆矩阵(inverse matrix)),(可逆的(invertible),(伴随矩阵(adjoint matrix));(分块矩阵(block matrix)),(分块对角矩阵(block diagonal matrix));初等矩阵(elementary matrix),等价(equivalent);(象空间),(核空间(kernel)),(线性映射的秩),(零化度(nullity))